From 50bdcb8d9ce0cf7531c428c8bc565e7cead01b33 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Wed, 30 Jan 2019 00:47:01 +1000 Subject: [PATCH 1/4] TextureCache: Bind textures/samplers after loading all textures Since loading textures can result in rendering, e.g. partial copies, we don't want to disturb partially-bound GX state. --- Source/Core/VideoCommon/TextureCacheBase.cpp | 72 +++++++++++++++++- Source/Core/VideoCommon/VertexManagerBase.cpp | 74 +------------------ 2 files changed, 71 insertions(+), 75 deletions(-) diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index a42438e0b1..d0355b724b 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -33,6 +33,7 @@ #include "VideoCommon/Debugger.h" #include "VideoCommon/FramebufferManagerBase.h" #include "VideoCommon/HiresTextures.h" +#include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/RenderBase.h" #include "VideoCommon/SamplerCommon.h" #include "VideoCommon/Statistics.h" @@ -482,12 +483,79 @@ static u32 CalculateLevelSize(u32 level_0_size, u32 level) return std::max(level_0_size >> level, 1u); } +static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex, + bool has_arbitrary_mips) +{ + const FourTexUnits& tex = bpmem.tex[index / 4]; + const TexMode0& tm0 = tex.texMode0[index % 4]; + + SamplerState state = {}; + state.Generate(bpmem, index); + + // Force texture filtering config option. + if (g_ActiveConfig.bForceFiltering) + { + state.min_filter = SamplerState::Filter::Linear; + state.mag_filter = SamplerState::Filter::Linear; + state.mipmap_filter = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? + SamplerState::Filter::Linear : + SamplerState::Filter::Point; + } + + // Custom textures may have a greater number of mips + if (custom_tex) + state.max_lod = 255; + + // Anisotropic filtering option. + if (g_ActiveConfig.iMaxAnisotropy != 0 && !SamplerCommon::IsBpTexMode0PointFiltering(tm0)) + { + // https://www.opengl.org/registry/specs/EXT/texture_filter_anisotropic.txt + // For predictable results on all hardware/drivers, only use one of: + // GL_LINEAR + GL_LINEAR (No Mipmaps [Bilinear]) + // GL_LINEAR + GL_LINEAR_MIPMAP_LINEAR (w/ Mipmaps [Trilinear]) + // Letting the game set other combinations will have varying arbitrary results; + // possibly being interpreted as equal to bilinear/trilinear, implicitly + // disabling anisotropy, or changing the anisotropic algorithm employed. + state.min_filter = SamplerState::Filter::Linear; + state.mag_filter = SamplerState::Filter::Linear; + if (SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0)) + state.mipmap_filter = SamplerState::Filter::Linear; + state.anisotropic_filtering = 1; + } + else + { + state.anisotropic_filtering = 0; + } + + if (has_arbitrary_mips && SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0)) + { + // Apply a secondary bias calculated from the IR scale to pull inwards mipmaps + // that have arbitrary contents, eg. are used for fog effects where the + // distance they kick in at is important to preserve at any resolution. + // Correct this with the upscaling factor of custom textures. + s64 lod_offset = std::log2(g_renderer->GetEFBScale() / custom_tex_scale) * 256.f; + state.lod_bias = MathUtil::Clamp(state.lod_bias + lod_offset, -32768, 32767); + + // Anisotropic also pushes mips farther away so it cannot be used either + state.anisotropic_filtering = 0; + } + + g_renderer->SetSamplerState(index, state); +} + void TextureCacheBase::BindTextures() { for (u32 i = 0; i < bound_textures.size(); i++) { - if (IsValidBindPoint(i) && bound_textures[i]) - g_renderer->SetTexture(i, bound_textures[i]->texture.get()); + const TCacheEntry* tentry = bound_textures[i]; + if (IsValidBindPoint(i) && tentry) + { + g_renderer->SetTexture(i, tentry->texture.get()); + PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height); + + const float custom_tex_scale = tentry->GetWidth() / float(tentry->native_width); + SetSamplerState(i, custom_tex_scale, tentry->is_custom_tex, tentry->has_arbitrary_mips); + } } } diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index 0e178e4d9b..e50cc1ceed 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -232,66 +232,6 @@ void VertexManagerBase::UploadUtilityVertices(const void* vertices, u32 vertex_s CommitBuffer(num_vertices, vertex_stride, num_indices, out_base_vertex, out_base_index); } -static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex, - bool has_arbitrary_mips) -{ - const FourTexUnits& tex = bpmem.tex[index / 4]; - const TexMode0& tm0 = tex.texMode0[index % 4]; - - SamplerState state = {}; - state.Generate(bpmem, index); - - // Force texture filtering config option. - if (g_ActiveConfig.bForceFiltering) - { - state.min_filter = SamplerState::Filter::Linear; - state.mag_filter = SamplerState::Filter::Linear; - state.mipmap_filter = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? - SamplerState::Filter::Linear : - SamplerState::Filter::Point; - } - - // Custom textures may have a greater number of mips - if (custom_tex) - state.max_lod = 255; - - // Anisotropic filtering option. - if (g_ActiveConfig.iMaxAnisotropy != 0 && !SamplerCommon::IsBpTexMode0PointFiltering(tm0)) - { - // https://www.opengl.org/registry/specs/EXT/texture_filter_anisotropic.txt - // For predictable results on all hardware/drivers, only use one of: - // GL_LINEAR + GL_LINEAR (No Mipmaps [Bilinear]) - // GL_LINEAR + GL_LINEAR_MIPMAP_LINEAR (w/ Mipmaps [Trilinear]) - // Letting the game set other combinations will have varying arbitrary results; - // possibly being interpreted as equal to bilinear/trilinear, implicitly - // disabling anisotropy, or changing the anisotropic algorithm employed. - state.min_filter = SamplerState::Filter::Linear; - state.mag_filter = SamplerState::Filter::Linear; - if (SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0)) - state.mipmap_filter = SamplerState::Filter::Linear; - state.anisotropic_filtering = 1; - } - else - { - state.anisotropic_filtering = 0; - } - - if (has_arbitrary_mips && SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0)) - { - // Apply a secondary bias calculated from the IR scale to pull inwards mipmaps - // that have arbitrary contents, eg. are used for fog effects where the - // distance they kick in at is important to preserve at any resolution. - // Correct this with the upscaling factor of custom textures. - s64 lod_offset = std::log2(g_renderer->GetEFBScale() / custom_tex_scale) * 256.f; - state.lod_bias = MathUtil::Clamp(state.lod_bias + lod_offset, -32768, 32767); - - // Anisotropic also pushes mips farther away so it cannot be used either - state.anisotropic_filtering = 0; - } - - g_renderer->SetSamplerState(index, state); -} - void VertexManagerBase::Flush() { if (m_is_flushed) @@ -355,20 +295,8 @@ void VertexManagerBase::Flush() usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true; for (unsigned int i : usedtextures) - { - const auto* tentry = g_texture_cache->Load(i); + g_texture_cache->Load(i); - if (tentry) - { - float custom_tex_scale = tentry->GetWidth() / float(tentry->native_width); - SetSamplerState(i, custom_tex_scale, tentry->is_custom_tex, tentry->has_arbitrary_mips); - PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height); - } - else - { - ERROR_LOG(VIDEO, "error loading texture"); - } - } g_texture_cache->BindTextures(); } From 2165523fdcdaf5a27110f92ae86d773293458343 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sat, 2 Feb 2019 14:14:58 +1000 Subject: [PATCH 2/4] TextureConverterShader: Write EFB2Tex XFB copies with alpha value of 1 This way we don't end up with artifacts of the EFB's alpha values in frame dumps. XFB copies loaded from RAM also set the alpha to 1, so this will match. --- Source/Core/VideoCommon/TextureConverterShaderGen.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp index 5cfb3e7c76..ccb01c20ed 100644 --- a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp +++ b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp @@ -268,8 +268,8 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) break; case EFBCopyFormat::XFB: - out.Write(" ocol0 = float4(pow(texcol.rgb, float3(gamma_rcp, gamma_rcp, gamma_rcp)), " - "texcol.a);\n"); + out.Write( + " ocol0 = float4(pow(texcol.rgb, float3(gamma_rcp, gamma_rcp, gamma_rcp)), 1.0f);\n"); break; default: From 933f3ba008942cba96ed623c25e61d6ea86582fe Mon Sep 17 00:00:00 2001 From: Stenzek Date: Fri, 15 Feb 2019 11:58:59 +1000 Subject: [PATCH 3/4] TextureCache: Don't copy out-of-range rectangles when stitching textures This can cause driver crashes or GPU hangs if we do. --- Source/Core/VideoCommon/TextureCacheBase.cpp | 22 ++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index d0355b724b..90abc01c65 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -384,6 +384,17 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale dst_y = 0; } + // If the source rectangle is outside of what we actually have in VRAM, skip the copy. + // The backend doesn't do any clamping, so if we don't, we'd pass out-of-range coordinates + // to the graphics driver, which can cause GPU resets. + if (static_cast(src_x) >= entry->native_width || + static_cast(src_y) >= entry->native_height || + static_cast(dst_x) >= entry_to_update->native_width || + static_cast(dst_y) >= entry_to_update->native_height) + { + continue; + } + u32 copy_width = std::min(entry->native_width - src_x, entry_to_update->native_width - dst_x); u32 copy_height = @@ -1453,6 +1464,17 @@ TextureCacheBase::GetTextureFromOverlappingTextures(const TextureLookupInformati dst_y = 0; } + // If the source rectangle is outside of what we actually have in VRAM, skip the copy. + // The backend doesn't do any clamping, so if we don't, we'd pass out-of-range coordinates + // to the graphics driver, which can cause GPU resets. + if (static_cast(src_x) >= entry->native_width || + static_cast(src_y) >= entry->native_height || + static_cast(dst_x) >= stitched_entry->native_width || + static_cast(dst_y) >= stitched_entry->native_height) + { + continue; + } + u32 copy_width = std::min(entry->native_width - src_x, stitched_entry->native_width - dst_x); u32 copy_height = std::min(entry->native_height - src_y, stitched_entry->native_height - dst_y); From f039149198657c1891e1c6462ed30c31ed4b8486 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Fri, 15 Feb 2019 11:59:50 +1000 Subject: [PATCH 4/4] Move most backend functionality to VideoCommon --- Source/Core/Common/GL/GLUtil.h | 3 + Source/Core/Core/Config/GraphicsSettings.cpp | 2 - Source/Core/Core/Config/GraphicsSettings.h | 1 - .../Core/ConfigLoaders/IsSettingSaveable.cpp | 1 - .../Config/Graphics/EnhancementsWidget.cpp | 11 +- .../Graphics/PostProcessingConfigWindow.cpp | 7 +- .../Graphics/PostProcessingConfigWindow.h | 6 +- Source/Core/VideoBackends/D3D/BoundingBox.cpp | 4 +- Source/Core/VideoBackends/D3D/CMakeLists.txt | 20 - Source/Core/VideoBackends/D3D/D3D.vcxproj | 20 - .../VideoBackends/D3D/D3D.vcxproj.filters | 60 - Source/Core/VideoBackends/D3D/D3DBase.cpp | 62 +- Source/Core/VideoBackends/D3D/D3DBase.h | 6 +- Source/Core/VideoBackends/D3D/D3DBlob.cpp | 60 - Source/Core/VideoBackends/D3D/D3DBlob.h | 39 - Source/Core/VideoBackends/D3D/D3DShader.cpp | 304 ----- Source/Core/VideoBackends/D3D/D3DShader.h | 78 -- Source/Core/VideoBackends/D3D/D3DState.cpp | 175 ++- Source/Core/VideoBackends/D3D/D3DState.h | 76 +- Source/Core/VideoBackends/D3D/D3DTexture.cpp | 105 -- Source/Core/VideoBackends/D3D/D3DTexture.h | 48 - Source/Core/VideoBackends/D3D/D3DUtil.cpp | 407 ------ Source/Core/VideoBackends/D3D/D3DUtil.h | 32 - Source/Core/VideoBackends/D3D/DXPipeline.cpp | 21 +- Source/Core/VideoBackends/D3D/DXPipeline.h | 6 +- Source/Core/VideoBackends/D3D/DXShader.cpp | 179 +-- Source/Core/VideoBackends/D3D/DXShader.h | 22 +- Source/Core/VideoBackends/D3D/DXTexture.cpp | 374 ++--- Source/Core/VideoBackends/D3D/DXTexture.h | 30 +- .../VideoBackends/D3D/FramebufferManager.cpp | 303 ----- .../VideoBackends/D3D/FramebufferManager.h | 96 -- .../VideoBackends/D3D/GeometryShaderCache.cpp | 113 -- .../VideoBackends/D3D/GeometryShaderCache.h | 27 - .../VideoBackends/D3D/NativeVertexFormat.cpp | 28 +- .../VideoBackends/D3D/PSTextureEncoder.cpp | 160 --- .../Core/VideoBackends/D3D/PSTextureEncoder.h | 53 - .../VideoBackends/D3D/PixelShaderCache.cpp | 315 ----- .../Core/VideoBackends/D3D/PixelShaderCache.h | 34 - Source/Core/VideoBackends/D3D/Render.cpp | 649 ++------- Source/Core/VideoBackends/D3D/Render.h | 49 +- .../Core/VideoBackends/D3D/TextureCache.cpp | 318 ----- Source/Core/VideoBackends/D3D/TextureCache.h | 49 - .../Core/VideoBackends/D3D/VertexManager.cpp | 212 ++- Source/Core/VideoBackends/D3D/VertexManager.h | 37 +- .../VideoBackends/D3D/VertexShaderCache.cpp | 136 -- .../VideoBackends/D3D/VertexShaderCache.h | 32 - Source/Core/VideoBackends/D3D/main.cpp | 39 +- .../Core/VideoBackends/Null/NullBackend.cpp | 19 +- .../Core/VideoBackends/Null/NullTexture.cpp | 20 +- Source/Core/VideoBackends/Null/NullTexture.h | 10 +- Source/Core/VideoBackends/Null/Render.cpp | 21 +- Source/Core/VideoBackends/Null/Render.h | 10 +- Source/Core/VideoBackends/Null/TextureCache.h | 13 +- .../Core/VideoBackends/Null/VertexManager.cpp | 42 +- .../Core/VideoBackends/Null/VertexManager.h | 15 +- Source/Core/VideoBackends/OGL/BoundingBox.cpp | 172 +-- Source/Core/VideoBackends/OGL/BoundingBox.h | 13 +- Source/Core/VideoBackends/OGL/CMakeLists.txt | 4 - .../VideoBackends/OGL/FramebufferManager.cpp | 634 --------- .../VideoBackends/OGL/FramebufferManager.h | 127 -- .../VideoBackends/OGL/NativeVertexFormat.cpp | 21 +- Source/Core/VideoBackends/OGL/OGL.vcxproj | 8 - .../VideoBackends/OGL/OGL.vcxproj.filters | 24 - Source/Core/VideoBackends/OGL/OGLShader.cpp | 15 +- Source/Core/VideoBackends/OGL/OGLShader.h | 14 +- Source/Core/VideoBackends/OGL/OGLTexture.cpp | 254 ++-- Source/Core/VideoBackends/OGL/OGLTexture.h | 23 +- .../Core/VideoBackends/OGL/PostProcessing.cpp | 273 ---- .../Core/VideoBackends/OGL/PostProcessing.h | 44 - .../VideoBackends/OGL/ProgramShaderCache.cpp | 54 +- .../VideoBackends/OGL/ProgramShaderCache.h | 15 +- Source/Core/VideoBackends/OGL/Render.cpp | 894 +++++------- Source/Core/VideoBackends/OGL/Render.h | 64 +- Source/Core/VideoBackends/OGL/StreamBuffer.h | 4 +- .../Core/VideoBackends/OGL/TextureCache.cpp | 574 -------- Source/Core/VideoBackends/OGL/TextureCache.h | 108 -- .../VideoBackends/OGL/TextureConverter.cpp | 170 --- .../Core/VideoBackends/OGL/TextureConverter.h | 33 - .../Core/VideoBackends/OGL/VertexManager.cpp | 190 ++- Source/Core/VideoBackends/OGL/VertexManager.h | 27 +- Source/Core/VideoBackends/OGL/main.cpp | 39 +- .../VideoBackends/Software/SWRenderer.cpp | 32 +- .../Core/VideoBackends/Software/SWRenderer.h | 18 +- .../Core/VideoBackends/Software/SWTexture.cpp | 58 +- .../Core/VideoBackends/Software/SWTexture.h | 10 +- .../VideoBackends/Software/SWVertexLoader.cpp | 50 +- .../VideoBackends/Software/SWVertexLoader.h | 12 - Source/Core/VideoBackends/Software/SWmain.cpp | 27 +- .../VideoBackends/Software/TextureCache.h | 14 +- .../Core/VideoBackends/Vulkan/BoundingBox.cpp | 21 +- .../Core/VideoBackends/Vulkan/BoundingBox.h | 3 - .../Core/VideoBackends/Vulkan/CMakeLists.txt | 7 - .../Vulkan/CommandBufferManager.cpp | 140 +- .../Vulkan/CommandBufferManager.h | 63 +- Source/Core/VideoBackends/Vulkan/Constants.h | 87 +- .../Vulkan/FramebufferManager.cpp | 1198 ----------------- .../VideoBackends/Vulkan/FramebufferManager.h | 161 --- .../Core/VideoBackends/Vulkan/ObjectCache.cpp | 357 +++-- .../Core/VideoBackends/Vulkan/ObjectCache.h | 49 +- .../Core/VideoBackends/Vulkan/PerfQuery.cpp | 56 +- Source/Core/VideoBackends/Vulkan/PerfQuery.h | 6 +- .../VideoBackends/Vulkan/PostProcessing.cpp | 316 ----- .../VideoBackends/Vulkan/PostProcessing.h | 44 - Source/Core/VideoBackends/Vulkan/Renderer.cpp | 531 ++------ Source/Core/VideoBackends/Vulkan/Renderer.h | 45 +- .../Core/VideoBackends/Vulkan/ShaderCache.cpp | 859 ------------ .../Core/VideoBackends/Vulkan/ShaderCache.h | 145 -- .../VideoBackends/Vulkan/ShaderCompiler.cpp | 10 +- .../VideoBackends/Vulkan/StagingBuffer.cpp | 35 +- .../Core/VideoBackends/Vulkan/StagingBuffer.h | 9 +- .../VideoBackends/Vulkan/StateTracker.cpp | 855 +++++------- .../Core/VideoBackends/Vulkan/StateTracker.h | 156 +-- .../VideoBackends/Vulkan/StreamBuffer.cpp | 124 +- .../Core/VideoBackends/Vulkan/StreamBuffer.h | 33 +- .../Core/VideoBackends/Vulkan/SwapChain.cpp | 96 +- Source/Core/VideoBackends/Vulkan/SwapChain.h | 27 +- .../Core/VideoBackends/Vulkan/Texture2D.cpp | 404 ------ Source/Core/VideoBackends/Vulkan/Texture2D.h | 79 -- .../VideoBackends/Vulkan/TextureCache.cpp | 316 ----- .../Core/VideoBackends/Vulkan/TextureCache.h | 65 - .../VideoBackends/Vulkan/TextureConverter.cpp | 634 --------- .../VideoBackends/Vulkan/TextureConverter.h | 109 -- Source/Core/VideoBackends/Vulkan/Util.cpp | 925 ------------- Source/Core/VideoBackends/Vulkan/Util.h | 235 ---- .../Core/VideoBackends/Vulkan/VKPipeline.cpp | 361 ++++- Source/Core/VideoBackends/Vulkan/VKShader.cpp | 43 +- .../Core/VideoBackends/Vulkan/VKTexture.cpp | 812 +++++++---- Source/Core/VideoBackends/Vulkan/VKTexture.h | 76 +- .../VideoBackends/Vulkan/VertexFormat.cpp | 48 +- .../Core/VideoBackends/Vulkan/VertexFormat.h | 4 +- .../VideoBackends/Vulkan/VertexManager.cpp | 362 ++++- .../Core/VideoBackends/Vulkan/VertexManager.h | 37 +- .../Core/VideoBackends/Vulkan/Vulkan.vcxproj | 14 - .../VideoBackends/Vulkan/VulkanContext.cpp | 28 +- .../Core/VideoBackends/Vulkan/VulkanContext.h | 4 - Source/Core/VideoBackends/Vulkan/main.cpp | 64 +- .../Core/VideoCommon/AbstractFramebuffer.cpp | 9 +- Source/Core/VideoCommon/AbstractFramebuffer.h | 7 +- Source/Core/VideoCommon/AbstractPipeline.h | 19 +- .../Core/VideoCommon/AbstractStagingTexture.h | 8 + Source/Core/VideoCommon/AbstractTexture.cpp | 42 +- Source/Core/VideoCommon/AbstractTexture.h | 15 +- Source/Core/VideoCommon/BPFunctions.cpp | 93 +- Source/Core/VideoCommon/CMakeLists.txt | 4 +- Source/Core/VideoCommon/Debugger.cpp | 163 --- Source/Core/VideoCommon/Debugger.h | 83 -- .../Core/VideoCommon/FramebufferManager.cpp | 764 +++++++++++ Source/Core/VideoCommon/FramebufferManager.h | 171 +++ .../VideoCommon/FramebufferManagerBase.cpp | 28 - .../Core/VideoCommon/FramebufferManagerBase.h | 30 - .../Core/VideoCommon/FramebufferShaderGen.cpp | 464 +++++++ .../Core/VideoCommon/FramebufferShaderGen.h | 32 + Source/Core/VideoCommon/NativeVertexFormat.h | 10 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 15 +- .../Core/VideoCommon/PixelShaderManager.cpp | 4 +- Source/Core/VideoCommon/PostProcessing.cpp | 501 +++++-- Source/Core/VideoCommon/PostProcessing.h | 59 +- Source/Core/VideoCommon/RenderBase.cpp | 463 +++++-- Source/Core/VideoCommon/RenderBase.h | 111 +- Source/Core/VideoCommon/RenderState.cpp | 60 +- Source/Core/VideoCommon/RenderState.h | 27 +- Source/Core/VideoCommon/ShaderCache.cpp | 224 ++- Source/Core/VideoCommon/ShaderCache.h | 86 +- Source/Core/VideoCommon/ShaderGenCommon.cpp | 1 + Source/Core/VideoCommon/ShaderGenCommon.h | 5 +- Source/Core/VideoCommon/TextureCacheBase.cpp | 556 ++++++-- Source/Core/VideoCommon/TextureCacheBase.h | 109 +- Source/Core/VideoCommon/TextureConfig.cpp | 9 +- Source/Core/VideoCommon/TextureConfig.h | 19 +- .../VideoCommon/TextureConversionShader.cpp | 466 ++++--- .../VideoCommon/TextureConversionShader.h | 20 +- .../VideoCommon/TextureConverterShaderGen.cpp | 119 +- .../VideoCommon/TextureConverterShaderGen.h | 3 +- Source/Core/VideoCommon/UberShaderPixel.cpp | 7 +- .../Core/VideoCommon/VertexLoaderManager.cpp | 7 +- Source/Core/VideoCommon/VertexManagerBase.cpp | 269 +++- Source/Core/VideoCommon/VertexManagerBase.h | 90 +- Source/Core/VideoCommon/VideoBackendBase.cpp | 1 - Source/Core/VideoCommon/VideoCommon.vcxproj | 8 +- .../VideoCommon/VideoCommon.vcxproj.filters | 28 +- Source/Core/VideoCommon/VideoConfig.cpp | 2 - Source/Core/VideoCommon/VideoConfig.h | 9 +- 182 files changed, 8334 insertions(+), 15917 deletions(-) delete mode 100644 Source/Core/VideoBackends/D3D/D3DBlob.cpp delete mode 100644 Source/Core/VideoBackends/D3D/D3DBlob.h delete mode 100644 Source/Core/VideoBackends/D3D/D3DShader.cpp delete mode 100644 Source/Core/VideoBackends/D3D/D3DShader.h delete mode 100644 Source/Core/VideoBackends/D3D/D3DTexture.cpp delete mode 100644 Source/Core/VideoBackends/D3D/D3DTexture.h delete mode 100644 Source/Core/VideoBackends/D3D/D3DUtil.cpp delete mode 100644 Source/Core/VideoBackends/D3D/D3DUtil.h delete mode 100644 Source/Core/VideoBackends/D3D/FramebufferManager.cpp delete mode 100644 Source/Core/VideoBackends/D3D/FramebufferManager.h delete mode 100644 Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp delete mode 100644 Source/Core/VideoBackends/D3D/GeometryShaderCache.h delete mode 100644 Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp delete mode 100644 Source/Core/VideoBackends/D3D/PSTextureEncoder.h delete mode 100644 Source/Core/VideoBackends/D3D/PixelShaderCache.cpp delete mode 100644 Source/Core/VideoBackends/D3D/PixelShaderCache.h delete mode 100644 Source/Core/VideoBackends/D3D/TextureCache.cpp delete mode 100644 Source/Core/VideoBackends/D3D/TextureCache.h delete mode 100644 Source/Core/VideoBackends/D3D/VertexShaderCache.cpp delete mode 100644 Source/Core/VideoBackends/D3D/VertexShaderCache.h delete mode 100644 Source/Core/VideoBackends/OGL/FramebufferManager.cpp delete mode 100644 Source/Core/VideoBackends/OGL/FramebufferManager.h delete mode 100644 Source/Core/VideoBackends/OGL/PostProcessing.cpp delete mode 100644 Source/Core/VideoBackends/OGL/PostProcessing.h delete mode 100644 Source/Core/VideoBackends/OGL/TextureCache.cpp delete mode 100644 Source/Core/VideoBackends/OGL/TextureCache.h delete mode 100644 Source/Core/VideoBackends/OGL/TextureConverter.cpp delete mode 100644 Source/Core/VideoBackends/OGL/TextureConverter.h delete mode 100644 Source/Core/VideoBackends/Vulkan/FramebufferManager.cpp delete mode 100644 Source/Core/VideoBackends/Vulkan/FramebufferManager.h delete mode 100644 Source/Core/VideoBackends/Vulkan/PostProcessing.cpp delete mode 100644 Source/Core/VideoBackends/Vulkan/PostProcessing.h delete mode 100644 Source/Core/VideoBackends/Vulkan/ShaderCache.cpp delete mode 100644 Source/Core/VideoBackends/Vulkan/ShaderCache.h delete mode 100644 Source/Core/VideoBackends/Vulkan/Texture2D.cpp delete mode 100644 Source/Core/VideoBackends/Vulkan/Texture2D.h delete mode 100644 Source/Core/VideoBackends/Vulkan/TextureCache.cpp delete mode 100644 Source/Core/VideoBackends/Vulkan/TextureCache.h delete mode 100644 Source/Core/VideoBackends/Vulkan/TextureConverter.cpp delete mode 100644 Source/Core/VideoBackends/Vulkan/TextureConverter.h delete mode 100644 Source/Core/VideoBackends/Vulkan/Util.cpp delete mode 100644 Source/Core/VideoBackends/Vulkan/Util.h delete mode 100644 Source/Core/VideoCommon/Debugger.cpp delete mode 100644 Source/Core/VideoCommon/Debugger.h create mode 100644 Source/Core/VideoCommon/FramebufferManager.cpp create mode 100644 Source/Core/VideoCommon/FramebufferManager.h delete mode 100644 Source/Core/VideoCommon/FramebufferManagerBase.cpp delete mode 100644 Source/Core/VideoCommon/FramebufferManagerBase.h create mode 100644 Source/Core/VideoCommon/FramebufferShaderGen.cpp create mode 100644 Source/Core/VideoCommon/FramebufferShaderGen.h diff --git a/Source/Core/Common/GL/GLUtil.h b/Source/Core/Common/GL/GLUtil.h index 1ec63ebad0..2d595e5286 100644 --- a/Source/Core/Common/GL/GLUtil.h +++ b/Source/Core/Common/GL/GLUtil.h @@ -10,6 +10,9 @@ class GLContext; +// Texture which we use to not disturb the other bindings. +constexpr GLenum GL_MUTABLE_TEXTURE_INDEX = GL_TEXTURE10; + namespace GLUtil { GLuint CompileProgram(const std::string& vertexShader, const std::string& fragmentShader); diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index a59b646618..3418e97629 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -137,8 +137,6 @@ const ConfigInfo GFX_STEREO_DEPTH_PERCENTAGE{ const ConfigInfo GFX_HACK_EFB_ACCESS_ENABLE{{System::GFX, "Hacks", "EFBAccessEnable"}, true}; const ConfigInfo GFX_HACK_BBOX_ENABLE{{System::GFX, "Hacks", "BBoxEnable"}, false}; -const ConfigInfo GFX_HACK_BBOX_PREFER_STENCIL_IMPLEMENTATION{ - {System::GFX, "Hacks", "BBoxPreferStencilImplementation"}, false}; const ConfigInfo GFX_HACK_FORCE_PROGRESSIVE{{System::GFX, "Hacks", "ForceProgressive"}, true}; const ConfigInfo GFX_HACK_SKIP_EFB_COPY_TO_RAM{{System::GFX, "Hacks", "EFBToTextureEnable"}, true}; diff --git a/Source/Core/Core/Config/GraphicsSettings.h b/Source/Core/Core/Config/GraphicsSettings.h index 73edbe8cf4..8e29c13a77 100644 --- a/Source/Core/Core/Config/GraphicsSettings.h +++ b/Source/Core/Core/Config/GraphicsSettings.h @@ -102,7 +102,6 @@ extern const ConfigInfo GFX_STEREO_DEPTH_PERCENTAGE; extern const ConfigInfo GFX_HACK_EFB_ACCESS_ENABLE; extern const ConfigInfo GFX_HACK_BBOX_ENABLE; -extern const ConfigInfo GFX_HACK_BBOX_PREFER_STENCIL_IMPLEMENTATION; extern const ConfigInfo GFX_HACK_FORCE_PROGRESSIVE; extern const ConfigInfo GFX_HACK_SKIP_EFB_COPY_TO_RAM; extern const ConfigInfo GFX_HACK_SKIP_XFB_COPY_TO_RAM; diff --git a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp index 146fdd1120..b1021f7827 100644 --- a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp +++ b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp @@ -114,7 +114,6 @@ bool IsSettingSaveable(const Config::ConfigLocation& config_location) Config::GFX_HACK_EFB_ACCESS_ENABLE.location, Config::GFX_HACK_BBOX_ENABLE.location, - Config::GFX_HACK_BBOX_PREFER_STENCIL_IMPLEMENTATION.location, Config::GFX_HACK_FORCE_PROGRESSIVE.location, Config::GFX_HACK_SKIP_EFB_COPY_TO_RAM.location, Config::GFX_HACK_SKIP_XFB_COPY_TO_RAM.location, diff --git a/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp b/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp index 9909dad3ae..950d647f3c 100644 --- a/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp +++ b/Source/Core/DolphinQt/Config/Graphics/EnhancementsWidget.cpp @@ -152,10 +152,9 @@ void EnhancementsWidget::ConnectWidgets() void EnhancementsWidget::LoadPPShaders() { const bool anaglyph = g_Config.stereo_mode == StereoMode::Anaglyph; - std::vector shaders = - anaglyph ? PostProcessingShaderImplementation::GetAnaglyphShaderList( - g_Config.backend_info.api_type) : - PostProcessingShaderImplementation::GetShaderList(g_Config.backend_info.api_type); + std::vector shaders = anaglyph ? + VideoCommon::PostProcessing::GetAnaglyphShaderList() : + VideoCommon::PostProcessing::GetShaderList(); m_pp_effect->clear(); @@ -187,7 +186,7 @@ void EnhancementsWidget::LoadPPShaders() tr("%1 doesn't support this feature.") .arg(tr(g_video_backend->GetDisplayName().c_str()))); - PostProcessingShaderConfiguration pp_shader; + VideoCommon::PostProcessingConfiguration pp_shader; if (selected_shader != "(off)" && supports_postprocessing) { pp_shader.LoadShader(selected_shader); @@ -266,7 +265,7 @@ void EnhancementsWidget::SaveSettings() "(off)" : m_pp_effect->currentText().toStdString()); - PostProcessingShaderConfiguration pp_shader; + VideoCommon::PostProcessingConfiguration pp_shader; if (Config::Get(Config::GFX_ENHANCE_POST_SHADER) != "(off)") { pp_shader.LoadShader(Config::Get(Config::GFX_ENHANCE_POST_SHADER)); diff --git a/Source/Core/DolphinQt/Config/Graphics/PostProcessingConfigWindow.cpp b/Source/Core/DolphinQt/Config/Graphics/PostProcessingConfigWindow.cpp index 8de48e7cf3..3a5bfcad28 100644 --- a/Source/Core/DolphinQt/Config/Graphics/PostProcessingConfigWindow.cpp +++ b/Source/Core/DolphinQt/Config/Graphics/PostProcessingConfigWindow.cpp @@ -25,7 +25,7 @@ #include "VideoCommon/RenderBase.h" #include "VideoCommon/VideoConfig.h" -using ConfigurationOption = PostProcessingShaderConfiguration::ConfigurationOption; +using ConfigurationOption = VideoCommon::PostProcessingConfiguration::ConfigurationOption; using OptionType = ConfigurationOption::OptionType; PostProcessingConfigWindow::PostProcessingConfigWindow(EnhancementsWidget* parent, @@ -38,7 +38,7 @@ PostProcessingConfigWindow::PostProcessingConfigWindow(EnhancementsWidget* paren } else { - m_post_processor = new PostProcessingShaderConfiguration(); + m_post_processor = new VideoCommon::PostProcessingConfiguration(); m_post_processor->LoadShader(m_shader); } @@ -61,7 +61,8 @@ PostProcessingConfigWindow::~PostProcessingConfigWindow() void PostProcessingConfigWindow::PopulateGroups() { - const PostProcessingShaderConfiguration::ConfigMap& config_map = m_post_processor->GetOptions(); + const VideoCommon::PostProcessingConfiguration::ConfigMap& config_map = + m_post_processor->GetOptions(); auto config_groups = std::vector>(); for (const auto& it : config_map) diff --git a/Source/Core/DolphinQt/Config/Graphics/PostProcessingConfigWindow.h b/Source/Core/DolphinQt/Config/Graphics/PostProcessingConfigWindow.h index 4d78404981..1e54620401 100644 --- a/Source/Core/DolphinQt/Config/Graphics/PostProcessingConfigWindow.h +++ b/Source/Core/DolphinQt/Config/Graphics/PostProcessingConfigWindow.h @@ -35,7 +35,7 @@ private: { public: explicit ConfigGroup( - const PostProcessingShaderConfiguration::ConfigurationOption* config_option); + const VideoCommon::PostProcessingConfiguration::ConfigurationOption* config_option); const std::string& GetGUIName() const noexcept; const std::string& GetParent() const noexcept; @@ -57,7 +57,7 @@ private: std::vector m_sliders; std::vector m_value_boxes; - const PostProcessingShaderConfiguration::ConfigurationOption* m_config_option; + const VideoCommon::PostProcessingConfiguration::ConfigurationOption* m_config_option; std::vector> m_subgroups; }; void Create(); @@ -72,7 +72,7 @@ private: QDialogButtonBox* m_buttons; const std::string& m_shader; - PostProcessingShaderConfiguration* m_post_processor; + VideoCommon::PostProcessingConfiguration* m_post_processor; std::unordered_map m_config_map; std::vector> m_config_groups; }; diff --git a/Source/Core/VideoBackends/D3D/BoundingBox.cpp b/Source/Core/VideoBackends/D3D/BoundingBox.cpp index e52ebb9c6d..abbd7036a1 100644 --- a/Source/Core/VideoBackends/D3D/BoundingBox.cpp +++ b/Source/Core/VideoBackends/D3D/BoundingBox.cpp @@ -5,6 +5,7 @@ #include "VideoBackends/D3D/BoundingBox.h" #include "Common/CommonTypes.h" #include "Common/MsgHandler.h" +#include "VideoBackends/D3D/D3DState.h" #include "VideoCommon/VideoConfig.h" namespace DX11 @@ -54,6 +55,7 @@ void BBox::Init() hr = D3D::device->CreateUnorderedAccessView(s_bbox_buffer, &UAVdesc, &s_bbox_uav); CHECK(SUCCEEDED(hr), "Create BoundingBox UAV."); D3D::SetDebugObjectName(s_bbox_uav, "BoundingBox UAV"); + D3D::stateman->SetOMUAV(s_bbox_uav); } } @@ -83,4 +85,4 @@ int BBox::Get(int index) D3D::context->Unmap(s_bbox_staging_buffer, 0); return data; } -}; +}; // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/CMakeLists.txt b/Source/Core/VideoBackends/D3D/CMakeLists.txt index 33976f93d7..facd2b07e3 100644 --- a/Source/Core/VideoBackends/D3D/CMakeLists.txt +++ b/Source/Core/VideoBackends/D3D/CMakeLists.txt @@ -3,42 +3,22 @@ add_library(videod3d BoundingBox.h D3DBase.cpp D3DBase.h - D3DBlob.cpp - D3DBlob.h - D3DShader.cpp - D3DShader.h D3DState.cpp D3DState.h - D3DTexture.cpp - D3DTexture.h - D3DUtil.cpp - D3DUtil.h DXPipeline.cpp DXPipeline.h DXShader.cpp DXShader.h DXTexture.cpp DXTexture.h - FramebufferManager.cpp - FramebufferManager.h - GeometryShaderCache.cpp - GeometryShaderCache.h main.cpp NativeVertexFormat.cpp PerfQuery.cpp PerfQuery.h - PixelShaderCache.cpp - PixelShaderCache.h - PSTextureEncoder.cpp - PSTextureEncoder.h Render.cpp Render.h - TextureCache.cpp - TextureCache.h VertexManager.cpp VertexManager.h - VertexShaderCache.cpp - VertexShaderCache.h VideoBackend.h ) diff --git a/Source/Core/VideoBackends/D3D/D3D.vcxproj b/Source/Core/VideoBackends/D3D/D3D.vcxproj index 54549d2f00..5721e4d3ce 100644 --- a/Source/Core/VideoBackends/D3D/D3D.vcxproj +++ b/Source/Core/VideoBackends/D3D/D3D.vcxproj @@ -38,46 +38,26 @@ - - - - - - - - - - - - - - - - - - - - diff --git a/Source/Core/VideoBackends/D3D/D3D.vcxproj.filters b/Source/Core/VideoBackends/D3D/D3D.vcxproj.filters index d3f7f7b01d..4ca2cfa179 100644 --- a/Source/Core/VideoBackends/D3D/D3D.vcxproj.filters +++ b/Source/Core/VideoBackends/D3D/D3D.vcxproj.filters @@ -12,51 +12,21 @@ D3D - - D3D - - - D3D - - - D3D - - - D3D - D3D - - Render - - - Render - Render Render - - Render - - - Render - Render - - Render - Render - - Render - Render @@ -75,48 +45,18 @@ D3D - - D3D - - - D3D - - - D3D - - - D3D - D3D - - Render - - - Render - Render - - Render - - - Render - Render - - Render - Render - - Render - Render diff --git a/Source/Core/VideoBackends/D3D/D3DBase.cpp b/Source/Core/VideoBackends/D3D/D3DBase.cpp index 654cf17c95..9a86cd3c0c 100644 --- a/Source/Core/VideoBackends/D3D/D3DBase.cpp +++ b/Source/Core/VideoBackends/D3D/D3DBase.cpp @@ -12,7 +12,7 @@ #include "Core/ConfigManager.h" #include "VideoBackends/D3D/D3DBase.h" #include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/D3DTexture.h" +#include "VideoBackends/D3D/DXTexture.h" #include "VideoCommon/VideoConfig.h" namespace DX11 @@ -42,7 +42,8 @@ IDXGISwapChain1* swapchain = nullptr; static IDXGIFactory2* s_dxgi_factory; static ID3D11Debug* s_debug; static D3D_FEATURE_LEVEL s_featlevel; -static D3DTexture2D* s_backbuf; +static std::unique_ptr s_swap_chain_texture; +static std::unique_ptr s_swap_chain_framebuffer; static std::vector s_aa_modes; // supported AA modes of the current adapter @@ -244,18 +245,40 @@ static bool SupportsBPTCTextures(ID3D11Device* dev) return (bc7_support & D3D11_FORMAT_SUPPORT_TEXTURE2D) != 0; } -static bool CreateSwapChainTextures() +static bool CreateSwapChainFramebuffer() { - ID3D11Texture2D* buf; - HRESULT hr = swapchain->GetBuffer(0, IID_ID3D11Texture2D, (void**)&buf); + ID3D11Texture2D* texture; + HRESULT hr = swapchain->GetBuffer(0, IID_ID3D11Texture2D, (void**)&texture); CHECK(SUCCEEDED(hr), "GetBuffer for swap chain failed with HRESULT %08X", hr); if (FAILED(hr)) return false; - s_backbuf = new D3DTexture2D(buf, D3D11_BIND_RENDER_TARGET); - SAFE_RELEASE(buf); - SetDebugObjectName(s_backbuf->GetTex(), "backbuffer texture"); - SetDebugObjectName(s_backbuf->GetRTV(), "backbuffer render target view"); + D3D11_TEXTURE2D_DESC desc; + texture->GetDesc(&desc); + + s_swap_chain_texture = std::make_unique( + TextureConfig(desc.Width, desc.Height, desc.MipLevels, desc.ArraySize, desc.SampleDesc.Count, + AbstractTextureFormat::RGBA8, AbstractTextureFlag_RenderTarget), + texture, nullptr, nullptr); + + ID3D11RenderTargetView* rtv; + CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(texture, D3D11_RTV_DIMENSION_TEXTURE2DARRAY, desc.Format, + 0, 0, desc.ArraySize); + hr = device->CreateRenderTargetView(texture, &rtv_desc, &rtv); + CHECK(SUCCEEDED(hr), "Create render target view for swap chain"); + if (FAILED(hr)) + { + s_swap_chain_texture.reset(); + return false; + } + + SetDebugObjectName(texture, "backbuffer texture"); + SetDebugObjectName(rtv, "backbuffer render target view"); + s_swap_chain_framebuffer = std::make_unique( + s_swap_chain_texture.get(), nullptr, AbstractTextureFormat::RGBA8, + AbstractTextureFormat::Undefined, desc.Width, desc.Height, desc.ArraySize, + desc.SampleDesc.Count, rtv, nullptr, nullptr); + return true; } @@ -300,7 +323,7 @@ static bool CreateSwapChain(HWND hWnd) return false; } - if (!CreateSwapChainTextures()) + if (!CreateSwapChainFramebuffer()) { SAFE_RELEASE(swapchain); return false; @@ -451,7 +474,8 @@ void Close() // release all bound resources context->ClearState(); - SAFE_RELEASE(s_backbuf); + s_swap_chain_framebuffer.reset(); + s_swap_chain_texture.reset(); SAFE_RELEASE(swapchain); SAFE_DELETE(stateman); context->Flush(); // immediately destroy device objects @@ -527,9 +551,13 @@ const char* ComputeShaderVersionString() return "cs_4_0"; } -D3DTexture2D* GetBackBuffer() +DXTexture* GetSwapChainTexture() { - return s_backbuf; + return s_swap_chain_texture.get(); +} +DXFramebuffer* GetSwapChainFramebuffer() +{ + return s_swap_chain_framebuffer.get(); } bool BGRATexturesSupported() { @@ -568,7 +596,8 @@ u32 GetMaxTextureSize(D3D_FEATURE_LEVEL feature_level) void Reset(HWND new_wnd) { - SAFE_RELEASE(s_backbuf); + s_swap_chain_framebuffer.reset(); + s_swap_chain_texture.reset(); if (swapchain) { @@ -583,10 +612,11 @@ void Reset(HWND new_wnd) void ResizeSwapChain() { - SAFE_RELEASE(s_backbuf); + s_swap_chain_framebuffer.reset(); + s_swap_chain_texture.reset(); const UINT swap_chain_flags = AllowTearingSupported() ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0; swapchain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_R8G8B8A8_UNORM, swap_chain_flags); - if (!CreateSwapChainTextures()) + if (!CreateSwapChainFramebuffer()) { PanicAlert("Failed to get swap chain textures"); SAFE_RELEASE(swapchain); diff --git a/Source/Core/VideoBackends/D3D/D3DBase.h b/Source/Core/VideoBackends/D3D/D3DBase.h index f17ebd54eb..fedb2fe0c4 100644 --- a/Source/Core/VideoBackends/D3D/D3DBase.h +++ b/Source/Core/VideoBackends/D3D/D3DBase.h @@ -38,7 +38,8 @@ namespace DX11 PanicAlert("%s failed in %s at line %d: " Message, __func__, __FILE__, __LINE__, __VA_ARGS__); \ } -class D3DTexture2D; +class DXTexture; +class DXFramebuffer; namespace D3D { @@ -64,7 +65,8 @@ void Reset(HWND new_wnd); void ResizeSwapChain(); void Present(); -D3DTexture2D* GetBackBuffer(); +DXTexture* GetSwapChainTexture(); +DXFramebuffer* GetSwapChainFramebuffer(); const char* PixelShaderVersionString(); const char* GeometryShaderVersionString(); const char* VertexShaderVersionString(); diff --git a/Source/Core/VideoBackends/D3D/D3DBlob.cpp b/Source/Core/VideoBackends/D3D/D3DBlob.cpp deleted file mode 100644 index c0991825b6..0000000000 --- a/Source/Core/VideoBackends/D3D/D3DBlob.cpp +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include - -#include "VideoBackends/D3D/D3DBlob.h" - -namespace DX11 -{ -D3DBlob::D3DBlob(unsigned int blob_size, const u8* init_data) - : ref(1), size(blob_size), blob(nullptr) -{ - data = new u8[blob_size]; - if (init_data) - memcpy(data, init_data, size); -} - -D3DBlob::D3DBlob(ID3D10Blob* d3dblob) : ref(1) -{ - blob = d3dblob; - data = (u8*)blob->GetBufferPointer(); - size = (unsigned int)blob->GetBufferSize(); - d3dblob->AddRef(); -} - -D3DBlob::~D3DBlob() -{ - if (blob) - blob->Release(); - else - delete[] data; -} - -void D3DBlob::AddRef() -{ - ++ref; -} - -unsigned int D3DBlob::Release() -{ - if (--ref == 0) - { - delete this; - return 0; - } - return ref; -} - -unsigned int D3DBlob::Size() const -{ - return size; -} - -u8* D3DBlob::Data() -{ - return data; -} - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/D3DBlob.h b/Source/Core/VideoBackends/D3D/D3DBlob.h deleted file mode 100644 index c332b0c517..0000000000 --- a/Source/Core/VideoBackends/D3D/D3DBlob.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include "Common/CommonTypes.h" - -struct ID3D10Blob; - -namespace DX11 -{ -// use this class instead ID3D10Blob or ID3D11Blob whenever possible -class D3DBlob -{ -public: - // memory will be copied into an own buffer - D3DBlob(unsigned int blob_size, const u8* init_data = nullptr); - - // d3dblob will be AddRef'd - D3DBlob(ID3D10Blob* d3dblob); - - void AddRef(); - unsigned int Release(); - - unsigned int Size() const; - u8* Data(); - -private: - ~D3DBlob(); - - unsigned int ref; - unsigned int size; - - u8* data; - ID3D10Blob* blob; -}; - -} // namespace diff --git a/Source/Core/VideoBackends/D3D/D3DShader.cpp b/Source/Core/VideoBackends/D3D/D3DShader.cpp deleted file mode 100644 index f47b428904..0000000000 --- a/Source/Core/VideoBackends/D3D/D3DShader.cpp +++ /dev/null @@ -1,304 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include -#include - -#include "Common/FileUtil.h" -#include "Common/Logging/Log.h" -#include "Common/MsgHandler.h" -#include "Common/StringUtil.h" -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DShader.h" -#include "VideoCommon/VideoConfig.h" - -namespace DX11 -{ -namespace D3D -{ -// bytecode->shader -ID3D11VertexShader* CreateVertexShaderFromByteCode(const void* bytecode, size_t len) -{ - ID3D11VertexShader* v_shader; - HRESULT hr = D3D::device->CreateVertexShader(bytecode, len, nullptr, &v_shader); - if (FAILED(hr)) - return nullptr; - - return v_shader; -} - -// code->bytecode -bool CompileVertexShader(const std::string& code, D3DBlob** blob) -{ - ID3D10Blob* shaderBuffer = nullptr; - ID3D10Blob* errorBuffer = nullptr; - -#if defined(_DEBUG) || defined(DEBUGFAST) - UINT flags = D3D10_SHADER_ENABLE_BACKWARDS_COMPATIBILITY | D3D10_SHADER_DEBUG; -#else - UINT flags = D3D10_SHADER_ENABLE_BACKWARDS_COMPATIBILITY | D3D10_SHADER_OPTIMIZATION_LEVEL3 | - D3D10_SHADER_SKIP_VALIDATION; -#endif - HRESULT hr = PD3DCompile(code.c_str(), code.length(), nullptr, nullptr, nullptr, "main", - D3D::VertexShaderVersionString(), flags, 0, &shaderBuffer, &errorBuffer); - if (errorBuffer) - { - INFO_LOG(VIDEO, "Vertex shader compiler messages:\n%s", - (const char*)errorBuffer->GetBufferPointer()); - } - - if (FAILED(hr)) - { - static int num_failures = 0; - std::string filename = StringFromFormat("%sbad_vs_%04i.txt", - File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); - std::ofstream file; - File::OpenFStream(file, filename, std::ios_base::out); - file << code; - file.close(); - - PanicAlert("Failed to compile vertex shader: %s\nDebug info (%s):\n%s", filename.c_str(), - D3D::VertexShaderVersionString(), (const char*)errorBuffer->GetBufferPointer()); - - *blob = nullptr; - errorBuffer->Release(); - } - else - { - *blob = new D3DBlob(shaderBuffer); - shaderBuffer->Release(); - } - return SUCCEEDED(hr); -} - -// bytecode->shader -ID3D11GeometryShader* CreateGeometryShaderFromByteCode(const void* bytecode, size_t len) -{ - ID3D11GeometryShader* g_shader; - HRESULT hr = D3D::device->CreateGeometryShader(bytecode, len, nullptr, &g_shader); - if (FAILED(hr)) - return nullptr; - - return g_shader; -} - -// code->bytecode -bool CompileGeometryShader(const std::string& code, D3DBlob** blob, - const D3D_SHADER_MACRO* pDefines) -{ - ID3D10Blob* shaderBuffer = nullptr; - ID3D10Blob* errorBuffer = nullptr; - -#if defined(_DEBUG) || defined(DEBUGFAST) - UINT flags = D3D10_SHADER_ENABLE_BACKWARDS_COMPATIBILITY | D3D10_SHADER_DEBUG; -#else - UINT flags = D3D10_SHADER_ENABLE_BACKWARDS_COMPATIBILITY | D3D10_SHADER_OPTIMIZATION_LEVEL3 | - D3D10_SHADER_SKIP_VALIDATION; -#endif - HRESULT hr = - PD3DCompile(code.c_str(), code.length(), nullptr, pDefines, nullptr, "main", - D3D::GeometryShaderVersionString(), flags, 0, &shaderBuffer, &errorBuffer); - - if (errorBuffer) - { - INFO_LOG(VIDEO, "Geometry shader compiler messages:\n%s", - (const char*)errorBuffer->GetBufferPointer()); - } - - if (FAILED(hr)) - { - static int num_failures = 0; - std::string filename = StringFromFormat("%sbad_gs_%04i.txt", - File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); - std::ofstream file; - File::OpenFStream(file, filename, std::ios_base::out); - file << code; - file.close(); - - PanicAlert("Failed to compile geometry shader: %s\nDebug info (%s):\n%s", filename.c_str(), - D3D::GeometryShaderVersionString(), (const char*)errorBuffer->GetBufferPointer()); - - *blob = nullptr; - errorBuffer->Release(); - } - else - { - *blob = new D3DBlob(shaderBuffer); - shaderBuffer->Release(); - } - return SUCCEEDED(hr); -} - -// bytecode->shader -ID3D11PixelShader* CreatePixelShaderFromByteCode(const void* bytecode, size_t len) -{ - ID3D11PixelShader* p_shader; - HRESULT hr = D3D::device->CreatePixelShader(bytecode, len, nullptr, &p_shader); - if (FAILED(hr)) - { - PanicAlert("CreatePixelShaderFromByteCode failed at %s %d\n", __FILE__, __LINE__); - p_shader = nullptr; - } - return p_shader; -} - -// code->bytecode -bool CompilePixelShader(const std::string& code, D3DBlob** blob, const D3D_SHADER_MACRO* pDefines) -{ - ID3D10Blob* shaderBuffer = nullptr; - ID3D10Blob* errorBuffer = nullptr; - -#if defined(_DEBUG) || defined(DEBUGFAST) - UINT flags = D3D10_SHADER_DEBUG; -#else - UINT flags = D3D10_SHADER_OPTIMIZATION_LEVEL3; -#endif - HRESULT hr = PD3DCompile(code.c_str(), code.length(), nullptr, pDefines, nullptr, "main", - D3D::PixelShaderVersionString(), flags, 0, &shaderBuffer, &errorBuffer); - - if (errorBuffer) - { - INFO_LOG(VIDEO, "Pixel shader compiler messages:\n%s", - (const char*)errorBuffer->GetBufferPointer()); - } - - if (FAILED(hr)) - { - static int num_failures = 0; - std::string filename = StringFromFormat("%sbad_ps_%04i.txt", - File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); - std::ofstream file; - File::OpenFStream(file, filename, std::ios_base::out); - file << code; - file.close(); - - PanicAlert("Failed to compile pixel shader: %s\nDebug info (%s):\n%s", filename.c_str(), - D3D::PixelShaderVersionString(), (const char*)errorBuffer->GetBufferPointer()); - - *blob = nullptr; - errorBuffer->Release(); - } - else - { - *blob = new D3DBlob(shaderBuffer); - shaderBuffer->Release(); - } - - return SUCCEEDED(hr); -} - -// bytecode->shader -ID3D11ComputeShader* CreateComputeShaderFromByteCode(const void* bytecode, size_t len) -{ - ID3D11ComputeShader* shader; - HRESULT hr = D3D::device->CreateComputeShader(bytecode, len, nullptr, &shader); - if (FAILED(hr)) - { - PanicAlert("CreateComputeShaderFromByteCode failed at %s %d\n", __FILE__, __LINE__); - return nullptr; - } - return shader; -} - -// code->bytecode -bool CompileComputeShader(const std::string& code, D3DBlob** blob, const D3D_SHADER_MACRO* pDefines) -{ - ID3D10Blob* shaderBuffer = nullptr; - ID3D10Blob* errorBuffer = nullptr; - -#if defined(_DEBUG) || defined(DEBUGFAST) - UINT flags = D3D10_SHADER_DEBUG; -#else - UINT flags = D3D10_SHADER_OPTIMIZATION_LEVEL3; -#endif - HRESULT hr = - PD3DCompile(code.c_str(), code.length(), nullptr, pDefines, nullptr, "main", - D3D::ComputeShaderVersionString(), flags, 0, &shaderBuffer, &errorBuffer); - - if (errorBuffer) - { - INFO_LOG(VIDEO, "Compute shader compiler messages:\n%s", - (const char*)errorBuffer->GetBufferPointer()); - } - - if (FAILED(hr)) - { - static int num_failures = 0; - std::string filename = StringFromFormat("%sbad_cs_%04i.txt", - File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); - std::ofstream file; - File::OpenFStream(file, filename, std::ios_base::out); - file << code; - file.close(); - - PanicAlert("Failed to compile compute shader: %s\nDebug info (%s):\n%s", filename.c_str(), - D3D::ComputeShaderVersionString(), - reinterpret_cast(errorBuffer->GetBufferPointer())); - - *blob = nullptr; - errorBuffer->Release(); - } - else - { - *blob = new D3DBlob(shaderBuffer); - shaderBuffer->Release(); - } - - return SUCCEEDED(hr); -} - -ID3D11VertexShader* CompileAndCreateVertexShader(const std::string& code) -{ - D3DBlob* blob = nullptr; - if (CompileVertexShader(code, &blob)) - { - ID3D11VertexShader* v_shader = CreateVertexShaderFromByteCode(blob); - blob->Release(); - return v_shader; - } - return nullptr; -} - -ID3D11GeometryShader* CompileAndCreateGeometryShader(const std::string& code, - const D3D_SHADER_MACRO* pDefines) -{ - D3DBlob* blob = nullptr; - if (CompileGeometryShader(code, &blob, pDefines)) - { - ID3D11GeometryShader* g_shader = CreateGeometryShaderFromByteCode(blob); - blob->Release(); - return g_shader; - } - return nullptr; -} - -ID3D11PixelShader* CompileAndCreatePixelShader(const std::string& code) -{ - D3DBlob* blob = nullptr; - CompilePixelShader(code, &blob); - if (blob) - { - ID3D11PixelShader* p_shader = CreatePixelShaderFromByteCode(blob); - blob->Release(); - return p_shader; - } - return nullptr; -} - -ID3D11ComputeShader* CompileAndCreateComputeShader(const std::string& code) -{ - D3DBlob* blob = nullptr; - CompileComputeShader(code, &blob); - if (blob) - { - ID3D11ComputeShader* shader = CreateComputeShaderFromByteCode(blob); - blob->Release(); - return shader; - } - return nullptr; -} - -} // namespace - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/D3DShader.h b/Source/Core/VideoBackends/D3D/D3DShader.h deleted file mode 100644 index 4212847e63..0000000000 --- a/Source/Core/VideoBackends/D3D/D3DShader.h +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DBlob.h" - -struct ID3D11PixelShader; -struct ID3D11VertexShader; - -namespace DX11 -{ -namespace D3D -{ -ID3D11VertexShader* CreateVertexShaderFromByteCode(const void* bytecode, size_t len); -ID3D11GeometryShader* CreateGeometryShaderFromByteCode(const void* bytecode, size_t len); -ID3D11PixelShader* CreatePixelShaderFromByteCode(const void* bytecode, size_t len); -ID3D11ComputeShader* CreateComputeShaderFromByteCode(const void* bytecode, size_t len); - -// The returned bytecode buffers should be Release()d. -bool CompileVertexShader(const std::string& code, D3DBlob** blob); -bool CompileGeometryShader(const std::string& code, D3DBlob** blob, - const D3D_SHADER_MACRO* pDefines = nullptr); -bool CompilePixelShader(const std::string& code, D3DBlob** blob, - const D3D_SHADER_MACRO* pDefines = nullptr); -bool CompileComputeShader(const std::string& code, D3DBlob** blob, - const D3D_SHADER_MACRO* pDefines = nullptr); - -// Utility functions -ID3D11VertexShader* CompileAndCreateVertexShader(const std::string& code); -ID3D11GeometryShader* CompileAndCreateGeometryShader(const std::string& code, - const D3D_SHADER_MACRO* pDefines = nullptr); -ID3D11PixelShader* CompileAndCreatePixelShader(const std::string& code); -ID3D11ComputeShader* CompileAndCreateComputeShader(const std::string& code); - -inline ID3D11VertexShader* CreateVertexShaderFromByteCode(D3DBlob* bytecode) -{ - return CreateVertexShaderFromByteCode(bytecode->Data(), bytecode->Size()); -} -inline ID3D11GeometryShader* CreateGeometryShaderFromByteCode(D3DBlob* bytecode) -{ - return CreateGeometryShaderFromByteCode(bytecode->Data(), bytecode->Size()); -} -inline ID3D11PixelShader* CreatePixelShaderFromByteCode(D3DBlob* bytecode) -{ - return CreatePixelShaderFromByteCode(bytecode->Data(), bytecode->Size()); -} -inline ID3D11ComputeShader* CreateComputeShaderFromByteCode(D3DBlob* bytecode) -{ - return CreateComputeShaderFromByteCode(bytecode->Data(), bytecode->Size()); -} - -inline ID3D11VertexShader* CompileAndCreateVertexShader(D3DBlob* code) -{ - return CompileAndCreateVertexShader(reinterpret_cast(code->Data())); -} - -inline ID3D11GeometryShader* -CompileAndCreateGeometryShader(D3DBlob* code, const D3D_SHADER_MACRO* pDefines = nullptr) -{ - return CompileAndCreateGeometryShader(reinterpret_cast(code->Data()), pDefines); -} - -inline ID3D11PixelShader* CompileAndCreatePixelShader(D3DBlob* code) -{ - return CompileAndCreatePixelShader(reinterpret_cast(code->Data())); -} -inline ID3D11ComputeShader* CompileAndCreateComputeShader(D3DBlob* code) -{ - return CompileAndCreateComputeShader(reinterpret_cast(code->Data())); -} -} - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/D3DState.cpp b/Source/Core/VideoBackends/D3D/D3DState.cpp index 25742d2a65..72528e51d6 100644 --- a/Source/Core/VideoBackends/D3D/D3DState.cpp +++ b/Source/Core/VideoBackends/D3D/D3DState.cpp @@ -12,6 +12,7 @@ #include "VideoBackends/D3D/D3DBase.h" #include "VideoBackends/D3D/D3DState.h" +#include "VideoBackends/D3D/DXTexture.h" #include "VideoCommon/VideoConfig.h" namespace DX11 @@ -28,19 +29,31 @@ void StateManager::Apply() if (!m_dirtyFlags) return; - const int textureMaskShift = Common::LeastSignificantSetBit((u32)DirtyFlag_Texture0); - const int samplerMaskShift = Common::LeastSignificantSetBit((u32)DirtyFlag_Sampler0); + // Framebuffer changes must occur before texture changes, otherwise the D3D runtime messes with + // our bindings and sets them to null to prevent hazards. + if (m_dirtyFlags & DirtyFlag_Framebuffer) + { + if (g_ActiveConfig.backend_info.bSupportsBBox) + { + D3D::context->OMSetRenderTargetsAndUnorderedAccessViews( + m_pending.framebuffer->GetNumRTVs(), + m_pending.use_integer_rtv ? m_pending.framebuffer->GetIntegerRTVArray() : + m_pending.framebuffer->GetRTVArray(), + m_pending.framebuffer->GetDSV(), 2, 1, &m_pending.uav, nullptr); + } + else + { + D3D::context->OMSetRenderTargets(m_pending.framebuffer->GetNumRTVs(), + m_pending.use_integer_rtv ? + m_pending.framebuffer->GetIntegerRTVArray() : + m_pending.framebuffer->GetRTVArray(), + m_pending.framebuffer->GetDSV()); + } + m_current.framebuffer = m_pending.framebuffer; + m_current.uav = m_pending.uav; + m_current.use_integer_rtv = m_pending.use_integer_rtv; + } - u32 dirtyTextures = - (m_dirtyFlags & - (DirtyFlag_Texture0 | DirtyFlag_Texture1 | DirtyFlag_Texture2 | DirtyFlag_Texture3 | - DirtyFlag_Texture4 | DirtyFlag_Texture5 | DirtyFlag_Texture6 | DirtyFlag_Texture7)) >> - textureMaskShift; - u32 dirtySamplers = - (m_dirtyFlags & - (DirtyFlag_Sampler0 | DirtyFlag_Sampler1 | DirtyFlag_Sampler2 | DirtyFlag_Sampler3 | - DirtyFlag_Sampler4 | DirtyFlag_Sampler5 | DirtyFlag_Sampler6 | DirtyFlag_Sampler7)) >> - samplerMaskShift; u32 dirtyConstants = m_dirtyFlags & (DirtyFlag_PixelConstants | DirtyFlag_VertexConstants | DirtyFlag_GeometryConstants); u32 dirtyShaders = @@ -103,30 +116,6 @@ void StateManager::Apply() } } - while (dirtyTextures) - { - const int index = Common::LeastSignificantSetBit(dirtyTextures); - if (m_current.textures[index] != m_pending.textures[index]) - { - D3D::context->PSSetShaderResources(index, 1, &m_pending.textures[index]); - m_current.textures[index] = m_pending.textures[index]; - } - - dirtyTextures &= ~(1 << index); - } - - while (dirtySamplers) - { - const int index = Common::LeastSignificantSetBit(dirtySamplers); - if (m_current.samplers[index] != m_pending.samplers[index]) - { - D3D::context->PSSetSamplers(index, 1, &m_pending.samplers[index]); - m_current.samplers[index] = m_pending.samplers[index]; - } - - dirtySamplers &= ~(1 << index); - } - if (dirtyShaders) { if (m_current.pixelShader != m_pending.pixelShader) @@ -164,9 +153,51 @@ void StateManager::Apply() m_current.rasterizerState = m_pending.rasterizerState; } + ApplyTextures(); + m_dirtyFlags = 0; } +void StateManager::ApplyTextures() +{ + const int textureMaskShift = Common::LeastSignificantSetBit((u32)DirtyFlag_Texture0); + const int samplerMaskShift = Common::LeastSignificantSetBit((u32)DirtyFlag_Sampler0); + + u32 dirtyTextures = + (m_dirtyFlags & + (DirtyFlag_Texture0 | DirtyFlag_Texture1 | DirtyFlag_Texture2 | DirtyFlag_Texture3 | + DirtyFlag_Texture4 | DirtyFlag_Texture5 | DirtyFlag_Texture6 | DirtyFlag_Texture7)) >> + textureMaskShift; + u32 dirtySamplers = + (m_dirtyFlags & + (DirtyFlag_Sampler0 | DirtyFlag_Sampler1 | DirtyFlag_Sampler2 | DirtyFlag_Sampler3 | + DirtyFlag_Sampler4 | DirtyFlag_Sampler5 | DirtyFlag_Sampler6 | DirtyFlag_Sampler7)) >> + samplerMaskShift; + while (dirtyTextures) + { + const int index = Common::LeastSignificantSetBit(dirtyTextures); + if (m_current.textures[index] != m_pending.textures[index]) + { + D3D::context->PSSetShaderResources(index, 1, &m_pending.textures[index]); + m_current.textures[index] = m_pending.textures[index]; + } + + dirtyTextures &= ~(1 << index); + } + + while (dirtySamplers) + { + const int index = Common::LeastSignificantSetBit(dirtySamplers); + if (m_current.samplers[index] != m_pending.samplers[index]) + { + D3D::context->PSSetSamplers(index, 1, &m_pending.samplers[index]); + m_current.samplers[index] = m_pending.samplers[index]; + } + + dirtySamplers &= ~(1 << index); + } +} + u32 StateManager::UnsetTexture(ID3D11ShaderResourceView* srv) { u32 mask = 0; @@ -193,6 +224,78 @@ void StateManager::SetTextureByMask(u32 textureSlotMask, ID3D11ShaderResourceVie } } +void StateManager::SetComputeUAV(ID3D11UnorderedAccessView* uav) +{ + if (m_compute_image == uav) + return; + + m_compute_image = uav; + D3D::context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); +} + +void StateManager::SetComputeShader(ID3D11ComputeShader* shader) +{ + if (m_compute_shader == shader) + return; + + m_compute_shader = shader; + D3D::context->CSSetShader(shader, nullptr, 0); +} + +void StateManager::SyncComputeBindings() +{ + if (m_compute_constants != m_pending.pixelConstants[0]) + { + m_compute_constants = m_pending.pixelConstants[0]; + D3D::context->CSSetConstantBuffers(0, 1, &m_compute_constants); + } + + for (u32 start = 0; start < static_cast(m_compute_textures.size());) + { + if (m_compute_textures[start] == m_pending.textures[start]) + { + start++; + continue; + } + + m_compute_textures[start] = m_pending.textures[start]; + + u32 end = start + 1; + for (; end < static_cast(m_compute_textures.size()); end++) + { + if (m_compute_textures[end] == m_pending.textures[end]) + break; + + m_compute_textures[end] = m_pending.textures[end]; + } + + D3D::context->CSSetShaderResources(start, end - start, &m_compute_textures[start]); + start = end; + } + + for (u32 start = 0; start < static_cast(m_compute_samplers.size());) + { + if (m_compute_samplers[start] == m_pending.samplers[start]) + { + start++; + continue; + } + + m_compute_samplers[start] = m_pending.samplers[start]; + + u32 end = start + 1; + for (; end < static_cast(m_compute_samplers.size()); end++) + { + if (m_compute_samplers[end] == m_pending.samplers[end]) + break; + + m_compute_samplers[end] = m_pending.samplers[end]; + } + + D3D::context->CSSetSamplers(start, end - start, &m_compute_samplers[start]); + start = end; + } +} } // namespace D3D StateCache::~StateCache() diff --git a/Source/Core/VideoBackends/D3D/D3DState.h b/Source/Core/VideoBackends/D3D/D3DState.h index db48e7f18b..66542c6250 100644 --- a/Source/Core/VideoBackends/D3D/D3DState.h +++ b/Source/Core/VideoBackends/D3D/D3DState.h @@ -16,6 +16,8 @@ namespace DX11 { +class DXFramebuffer; + class StateCache { public: @@ -112,14 +114,6 @@ public: m_pending.geometryConstants = buffer; } - void SetComputeConstants(ID3D11Buffer* buffer) - { - if (m_current.computeConstants != buffer) - m_dirtyFlags |= DirtyFlag_ComputeConstants; - - m_pending.computeConstants = buffer; - } - void SetVertexBuffer(ID3D11Buffer* buffer, u32 stride, u32 offset) { if (m_current.vertexBuffer != buffer || m_current.vertexBufferStride != stride || @@ -187,22 +181,45 @@ public: m_pending.geometryShader = shader; } - void SetComputeShader(ID3D11ComputeShader* shader) + void SetFramebuffer(DXFramebuffer* fb) { - if (m_current.computeShader != shader) - m_dirtyFlags |= DirtyFlag_ComputeShader; + if (m_current.framebuffer != fb) + m_dirtyFlags |= DirtyFlag_Framebuffer; - m_pending.computeShader = shader; + m_pending.framebuffer = fb; + } + + void SetOMUAV(ID3D11UnorderedAccessView* uav) + { + if (m_current.uav != uav) + m_dirtyFlags |= DirtyFlag_Framebuffer; + + m_pending.uav = uav; + } + + void SetIntegerRTV(bool enable) + { + if (m_current.use_integer_rtv != enable) + m_dirtyFlags |= DirtyFlag_Framebuffer; + + m_pending.use_integer_rtv = enable; } // removes currently set texture from all slots, returns mask of previously bound slots u32 UnsetTexture(ID3D11ShaderResourceView* srv); void SetTextureByMask(u32 textureSlotMask, ID3D11ShaderResourceView* srv); + void ApplyTextures(); // call this immediately before any drawing operation or to explicitly apply pending resource // state changes void Apply(); + // Binds constant buffers/textures/samplers to the compute shader stage. + // We don't track these explicitly because it's not often-used. + void SetComputeUAV(ID3D11UnorderedAccessView* uav); + void SetComputeShader(ID3D11ComputeShader* shader); + void SyncComputeBindings(); + private: enum DirtyFlags { @@ -227,20 +244,19 @@ private: DirtyFlag_PixelConstants = 1 << 16, DirtyFlag_VertexConstants = 1 << 17, DirtyFlag_GeometryConstants = 1 << 18, - DirtyFlag_ComputeConstants = 1 << 19, - DirtyFlag_VertexBuffer = 1 << 20, - DirtyFlag_IndexBuffer = 1 << 21, + DirtyFlag_VertexBuffer = 1 << 19, + DirtyFlag_IndexBuffer = 1 << 20, - DirtyFlag_PixelShader = 1 << 22, - DirtyFlag_VertexShader = 1 << 23, - DirtyFlag_GeometryShader = 1 << 24, - DirtyFlag_ComputeShader = 1 << 25, + DirtyFlag_PixelShader = 1 << 21, + DirtyFlag_VertexShader = 1 << 22, + DirtyFlag_GeometryShader = 1 << 23, - DirtyFlag_InputAssembler = 1 << 26, - DirtyFlag_BlendState = 1 << 27, - DirtyFlag_DepthState = 1 << 28, - DirtyFlag_RasterizerState = 1 << 29, + DirtyFlag_InputAssembler = 1 << 24, + DirtyFlag_BlendState = 1 << 25, + DirtyFlag_DepthState = 1 << 26, + DirtyFlag_RasterizerState = 1 << 27, + DirtyFlag_Framebuffer = 1 << 28 }; u32 m_dirtyFlags = ~0u; @@ -252,7 +268,6 @@ private: std::array pixelConstants; ID3D11Buffer* vertexConstants; ID3D11Buffer* geometryConstants; - ID3D11Buffer* computeConstants; ID3D11Buffer* vertexBuffer; ID3D11Buffer* indexBuffer; u32 vertexBufferStride; @@ -262,18 +277,27 @@ private: ID3D11PixelShader* pixelShader; ID3D11VertexShader* vertexShader; ID3D11GeometryShader* geometryShader; - ID3D11ComputeShader* computeShader; ID3D11BlendState* blendState; ID3D11DepthStencilState* depthState; ID3D11RasterizerState* rasterizerState; + DXFramebuffer* framebuffer; + ID3D11UnorderedAccessView* uav; + bool use_integer_rtv; }; Resources m_pending = {}; Resources m_current = {}; + + // Compute resources are synced with the graphics resources when we need them. + ID3D11Buffer* m_compute_constants = nullptr; + std::array m_compute_textures{}; + std::array m_compute_samplers{}; + ID3D11UnorderedAccessView* m_compute_image = nullptr; + ID3D11ComputeShader* m_compute_shader = nullptr; }; extern StateManager* stateman; -} // namespace +} // namespace D3D } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/D3DTexture.cpp b/Source/Core/VideoBackends/D3D/D3DTexture.cpp deleted file mode 100644 index 6ca2247a47..0000000000 --- a/Source/Core/VideoBackends/D3D/D3DTexture.cpp +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include - -#include "Common/CommonTypes.h" -#include "Common/MsgHandler.h" -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DTexture.h" - -namespace DX11 -{ -D3DTexture2D* D3DTexture2D::Create(unsigned int width, unsigned int height, D3D11_BIND_FLAG bind, - D3D11_USAGE usage, DXGI_FORMAT fmt, unsigned int levels, - unsigned int slices, D3D11_SUBRESOURCE_DATA* data) -{ - ID3D11Texture2D* pTexture = nullptr; - HRESULT hr; - - D3D11_CPU_ACCESS_FLAG cpuflags; - if (usage == D3D11_USAGE_STAGING) - cpuflags = (D3D11_CPU_ACCESS_FLAG)((int)D3D11_CPU_ACCESS_WRITE | (int)D3D11_CPU_ACCESS_READ); - else if (usage == D3D11_USAGE_DYNAMIC) - cpuflags = D3D11_CPU_ACCESS_WRITE; - else - cpuflags = (D3D11_CPU_ACCESS_FLAG)0; - D3D11_TEXTURE2D_DESC texdesc = - CD3D11_TEXTURE2D_DESC(fmt, width, height, slices, levels, bind, usage, cpuflags); - hr = D3D::device->CreateTexture2D(&texdesc, data, &pTexture); - if (FAILED(hr)) - { - PanicAlert("Failed to create texture at %s, line %d: hr=%#x\n", __FILE__, __LINE__, hr); - return nullptr; - } - - D3DTexture2D* ret = new D3DTexture2D(pTexture, bind); - SAFE_RELEASE(pTexture); - return ret; -} - -void D3DTexture2D::AddRef() -{ - ++ref; -} - -UINT D3DTexture2D::Release() -{ - --ref; - if (ref == 0) - { - delete this; - return 0; - } - return ref; -} - -ID3D11Texture2D*& D3DTexture2D::GetTex() -{ - return tex; -} -ID3D11ShaderResourceView*& D3DTexture2D::GetSRV() -{ - return srv; -} -ID3D11RenderTargetView*& D3DTexture2D::GetRTV() -{ - return rtv; -} -ID3D11DepthStencilView*& D3DTexture2D::GetDSV() -{ - return dsv; -} - -D3DTexture2D::D3DTexture2D(ID3D11Texture2D* texptr, D3D11_BIND_FLAG bind, DXGI_FORMAT srv_format, - DXGI_FORMAT dsv_format, DXGI_FORMAT rtv_format, bool multisampled) - : tex{texptr} -{ - D3D11_SRV_DIMENSION srv_dim = - multisampled ? D3D11_SRV_DIMENSION_TEXTURE2DMSARRAY : D3D11_SRV_DIMENSION_TEXTURE2DARRAY; - D3D11_DSV_DIMENSION dsv_dim = - multisampled ? D3D11_DSV_DIMENSION_TEXTURE2DMSARRAY : D3D11_DSV_DIMENSION_TEXTURE2DARRAY; - D3D11_RTV_DIMENSION rtv_dim = - multisampled ? D3D11_RTV_DIMENSION_TEXTURE2DMSARRAY : D3D11_RTV_DIMENSION_TEXTURE2DARRAY; - D3D11_SHADER_RESOURCE_VIEW_DESC srv_desc = CD3D11_SHADER_RESOURCE_VIEW_DESC(srv_dim, srv_format); - D3D11_DEPTH_STENCIL_VIEW_DESC dsv_desc = CD3D11_DEPTH_STENCIL_VIEW_DESC(dsv_dim, dsv_format); - D3D11_RENDER_TARGET_VIEW_DESC rtv_desc = CD3D11_RENDER_TARGET_VIEW_DESC(rtv_dim, rtv_format); - if (bind & D3D11_BIND_SHADER_RESOURCE) - D3D::device->CreateShaderResourceView(tex, &srv_desc, &srv); - if (bind & D3D11_BIND_RENDER_TARGET) - D3D::device->CreateRenderTargetView(tex, &rtv_desc, &rtv); - if (bind & D3D11_BIND_DEPTH_STENCIL) - D3D::device->CreateDepthStencilView(tex, &dsv_desc, &dsv); - tex->AddRef(); -} - -D3DTexture2D::~D3DTexture2D() -{ - SAFE_RELEASE(srv); - SAFE_RELEASE(rtv); - SAFE_RELEASE(dsv); - SAFE_RELEASE(tex); -} - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/D3DTexture.h b/Source/Core/VideoBackends/D3D/D3DTexture.h deleted file mode 100644 index 609ba877c8..0000000000 --- a/Source/Core/VideoBackends/D3D/D3DTexture.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include "Common/CommonTypes.h" - -namespace DX11 -{ -class D3DTexture2D -{ -public: - // there are two ways to create a D3DTexture2D object: - // either create an ID3D11Texture2D object, pass it to the constructor and specify what views - // to create - // or let the texture automatically be created by D3DTexture2D::Create - - D3DTexture2D(ID3D11Texture2D* texptr, D3D11_BIND_FLAG bind, - DXGI_FORMAT srv_format = DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT dsv_format = DXGI_FORMAT_UNKNOWN, - DXGI_FORMAT rtv_format = DXGI_FORMAT_UNKNOWN, bool multisampled = false); - static D3DTexture2D* Create(unsigned int width, unsigned int height, D3D11_BIND_FLAG bind, - D3D11_USAGE usage, DXGI_FORMAT, unsigned int levels = 1, - unsigned int slices = 1, D3D11_SUBRESOURCE_DATA* data = nullptr); - - // reference counting, use AddRef() when creating a new reference and Release() it when you don't - // need it anymore - void AddRef(); - UINT Release(); - - ID3D11Texture2D*& GetTex(); - ID3D11ShaderResourceView*& GetSRV(); - ID3D11RenderTargetView*& GetRTV(); - ID3D11DepthStencilView*& GetDSV(); - -private: - ~D3DTexture2D(); - - ID3D11Texture2D* tex; - ID3D11ShaderResourceView* srv = nullptr; - ID3D11RenderTargetView* rtv = nullptr; - ID3D11DepthStencilView* dsv = nullptr; - UINT ref = 1; -}; - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/D3DUtil.cpp b/Source/Core/VideoBackends/D3D/D3DUtil.cpp deleted file mode 100644 index 0ad02c1ca4..0000000000 --- a/Source/Core/VideoBackends/D3D/D3DUtil.cpp +++ /dev/null @@ -1,407 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/D3D/D3DUtil.h" - -#include -#include -#include - -#include "Common/Align.h" -#include "Common/Assert.h" -#include "Common/Logging/Log.h" -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DShader.h" -#include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/GeometryShaderCache.h" -#include "VideoBackends/D3D/PixelShaderCache.h" -#include "VideoBackends/D3D/VertexShaderCache.h" -#include "VideoCommon/VideoBackendBase.h" - -namespace DX11 -{ -namespace D3D -{ -// Ring buffer class, shared between the draw* functions -class UtilVertexBuffer -{ -public: - UtilVertexBuffer(unsigned int size) : max_size(size) - { - D3D11_BUFFER_DESC desc = CD3D11_BUFFER_DESC(max_size, D3D11_BIND_VERTEX_BUFFER, - D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE); - device->CreateBuffer(&desc, nullptr, &buf); - } - ~UtilVertexBuffer() { buf->Release(); } - int GetSize() const { return max_size; } - // returns vertex offset to the new data - int AppendData(void* data, unsigned int size, unsigned int vertex_size) - { - D3D11_MAPPED_SUBRESOURCE map; - if (offset + size >= max_size) - { - // wrap buffer around and notify observers - offset = 0; - context->Map(buf, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); - - for (bool* observer : observers) - *observer = true; - } - else - { - context->Map(buf, 0, D3D11_MAP_WRITE_NO_OVERWRITE, 0, &map); - } - offset = Common::AlignUp(offset, vertex_size); - memcpy((u8*)map.pData + offset, data, size); - context->Unmap(buf, 0); - - offset += size; - return (offset - size) / vertex_size; - } - - int BeginAppendData(void** write_ptr, unsigned int size, unsigned int vertex_size) - { - DEBUG_ASSERT(size < max_size); - - D3D11_MAPPED_SUBRESOURCE map; - unsigned int aligned_offset = Common::AlignUp(offset, vertex_size); - if (aligned_offset + size > max_size) - { - // wrap buffer around and notify observers - offset = 0; - aligned_offset = 0; - context->Map(buf, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); - - for (bool* observer : observers) - *observer = true; - } - else - { - context->Map(buf, 0, D3D11_MAP_WRITE_NO_OVERWRITE, 0, &map); - } - - *write_ptr = reinterpret_cast(map.pData) + aligned_offset; - offset = aligned_offset + size; - return aligned_offset / vertex_size; - } - - void EndAppendData() { context->Unmap(buf, 0); } - void AddWrapObserver(bool* observer) { observers.push_back(observer); } - inline ID3D11Buffer*& GetBuffer() { return buf; } - -private: - ID3D11Buffer* buf = nullptr; - unsigned int offset = 0; - unsigned int max_size; - - std::list observers; -}; - -static UtilVertexBuffer* util_vbuf = nullptr; -static ID3D11SamplerState* linear_copy_sampler = nullptr; -static ID3D11SamplerState* point_copy_sampler = nullptr; - -struct STQVertex -{ - float x, y, z, u, v, w; -}; -struct ClearVertex -{ - float x, y, z; - u32 col; -}; -struct ColVertex -{ - float x, y, z; - u32 col; -}; - -struct TexQuadData -{ - float u1, v1, u2, v2, S, G; -}; -static TexQuadData tex_quad_data; - -struct DrawQuadData -{ - float x1, y1, x2, y2, z; - u32 col; -}; -static DrawQuadData draw_quad_data; - -struct ClearQuadData -{ - u32 col; - float z; -}; -static ClearQuadData clear_quad_data; - -// ring buffer offsets -static int stq_offset, cq_offset, clearq_offset; - -// observer variables for ring buffer wraps -static bool stq_observer, cq_observer, clearq_observer; - -void InitUtils() -{ - util_vbuf = new UtilVertexBuffer(65536); // 64KiB - - float border[4] = {0.f, 0.f, 0.f, 0.f}; - D3D11_SAMPLER_DESC samDesc = CD3D11_SAMPLER_DESC( - D3D11_FILTER_MIN_MAG_MIP_POINT, D3D11_TEXTURE_ADDRESS_BORDER, D3D11_TEXTURE_ADDRESS_BORDER, - D3D11_TEXTURE_ADDRESS_BORDER, 0.f, 1, D3D11_COMPARISON_ALWAYS, border, 0.f, 0.f); - HRESULT hr = D3D::device->CreateSamplerState(&samDesc, &point_copy_sampler); - if (FAILED(hr)) - PanicAlert("Failed to create sampler state at %s %d\n", __FILE__, __LINE__); - else - SetDebugObjectName(point_copy_sampler, "point copy sampler state"); - - samDesc = CD3D11_SAMPLER_DESC(D3D11_FILTER_MIN_MAG_MIP_LINEAR, D3D11_TEXTURE_ADDRESS_BORDER, - D3D11_TEXTURE_ADDRESS_BORDER, D3D11_TEXTURE_ADDRESS_BORDER, 0.f, 1, - D3D11_COMPARISON_ALWAYS, border, 0.f, 0.f); - hr = D3D::device->CreateSamplerState(&samDesc, &linear_copy_sampler); - if (FAILED(hr)) - PanicAlert("Failed to create sampler state at %s %d\n", __FILE__, __LINE__); - else - SetDebugObjectName(linear_copy_sampler, "linear copy sampler state"); - - // cached data used to avoid unnecessarily reloading the vertex buffers - memset(&tex_quad_data, 0, sizeof(tex_quad_data)); - memset(&draw_quad_data, 0, sizeof(draw_quad_data)); - memset(&clear_quad_data, 0, sizeof(clear_quad_data)); - - // make sure to properly load the vertex data whenever the corresponding functions get called the - // first time - stq_observer = cq_observer = clearq_observer = true; - util_vbuf->AddWrapObserver(&stq_observer); - util_vbuf->AddWrapObserver(&cq_observer); - util_vbuf->AddWrapObserver(&clearq_observer); -} - -void ShutdownUtils() -{ - SAFE_RELEASE(point_copy_sampler); - SAFE_RELEASE(linear_copy_sampler); - SAFE_DELETE(util_vbuf); -} - -void SetPointCopySampler() -{ - D3D::stateman->SetSampler(0, point_copy_sampler); -} - -void SetLinearCopySampler() -{ - D3D::stateman->SetSampler(0, linear_copy_sampler); -} - -void drawShadedTexQuad(ID3D11ShaderResourceView* texture, const D3D11_RECT* rSource, - int SourceWidth, int SourceHeight, ID3D11PixelShader* PShader, - ID3D11VertexShader* VShader, ID3D11InputLayout* layout, - ID3D11GeometryShader* GShader, u32 slice) -{ - float sw = 1.0f / (float)SourceWidth; - float sh = 1.0f / (float)SourceHeight; - float u1 = ((float)rSource->left) * sw; - float u2 = ((float)rSource->right) * sw; - float v1 = ((float)rSource->top) * sh; - float v2 = ((float)rSource->bottom) * sh; - float S = (float)slice; - - STQVertex coords[4] = { - {-1.0f, 1.0f, 0.0f, u1, v1, S}, - {1.0f, 1.0f, 0.0f, u2, v1, S}, - {-1.0f, -1.0f, 0.0f, u1, v2, S}, - {1.0f, -1.0f, 0.0f, u2, v2, S}, - }; - - // only upload the data to VRAM if it changed - if (stq_observer || tex_quad_data.u1 != u1 || tex_quad_data.v1 != v1 || tex_quad_data.u2 != u2 || - tex_quad_data.v2 != v2 || tex_quad_data.S != S) - { - stq_offset = util_vbuf->AppendData(coords, sizeof(coords), sizeof(STQVertex)); - stq_observer = false; - - tex_quad_data.u1 = u1; - tex_quad_data.v1 = v1; - tex_quad_data.u2 = u2; - tex_quad_data.v2 = v2; - tex_quad_data.S = S; - } - UINT stride = sizeof(STQVertex); - UINT offset = 0; - - D3D::stateman->SetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - D3D::stateman->SetInputLayout(layout); - D3D::stateman->SetVertexBuffer(util_vbuf->GetBuffer(), stride, offset); - D3D::stateman->SetPixelShader(PShader); - D3D::stateman->SetTexture(0, texture); - D3D::stateman->SetVertexShader(VShader); - D3D::stateman->SetGeometryShader(GShader); - - D3D::stateman->Apply(); - D3D::context->Draw(4, stq_offset); - - D3D::stateman->SetTexture(0, nullptr); // immediately unbind the texture - D3D::stateman->Apply(); - - D3D::stateman->SetGeometryShader(nullptr); -} - -// Fills a certain area of the current render target with the specified color -// destination coordinates normalized to (-1;1) -void drawColorQuad(u32 Color, float z, float x1, float y1, float x2, float y2) -{ - ColVertex coords[4] = { - {x1, y1, z, Color}, - {x2, y1, z, Color}, - {x1, y2, z, Color}, - {x2, y2, z, Color}, - }; - - if (cq_observer || draw_quad_data.x1 != x1 || draw_quad_data.y1 != y1 || - draw_quad_data.x2 != x2 || draw_quad_data.y2 != y2 || draw_quad_data.col != Color || - draw_quad_data.z != z) - { - cq_offset = util_vbuf->AppendData(coords, sizeof(coords), sizeof(ColVertex)); - cq_observer = false; - - draw_quad_data.x1 = x1; - draw_quad_data.y1 = y1; - draw_quad_data.x2 = x2; - draw_quad_data.y2 = y2; - draw_quad_data.col = Color; - draw_quad_data.z = z; - } - - stateman->SetVertexShader(VertexShaderCache::GetClearVertexShader()); - stateman->SetGeometryShader(GeometryShaderCache::GetClearGeometryShader()); - stateman->SetPixelShader(PixelShaderCache::GetClearProgram()); - stateman->SetInputLayout(VertexShaderCache::GetClearInputLayout()); - - UINT stride = sizeof(ColVertex); - UINT offset = 0; - stateman->SetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - stateman->SetVertexBuffer(util_vbuf->GetBuffer(), stride, offset); - - stateman->Apply(); - context->Draw(4, cq_offset); - - stateman->SetGeometryShader(nullptr); -} - -void drawClearQuad(u32 Color, float z) -{ - ClearVertex coords[4] = { - {-1.0f, 1.0f, z, Color}, - {1.0f, 1.0f, z, Color}, - {-1.0f, -1.0f, z, Color}, - {1.0f, -1.0f, z, Color}, - }; - - if (clearq_observer || clear_quad_data.col != Color || clear_quad_data.z != z) - { - clearq_offset = util_vbuf->AppendData(coords, sizeof(coords), sizeof(ClearVertex)); - clearq_observer = false; - - clear_quad_data.col = Color; - clear_quad_data.z = z; - } - - stateman->SetVertexShader(VertexShaderCache::GetClearVertexShader()); - stateman->SetGeometryShader(GeometryShaderCache::GetClearGeometryShader()); - stateman->SetPixelShader(PixelShaderCache::GetClearProgram()); - stateman->SetInputLayout(VertexShaderCache::GetClearInputLayout()); - - UINT stride = sizeof(ClearVertex); - UINT offset = 0; - stateman->SetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - stateman->SetVertexBuffer(util_vbuf->GetBuffer(), stride, offset); - - stateman->Apply(); - context->Draw(4, clearq_offset); - - stateman->SetGeometryShader(nullptr); -} - -static void InitColVertex(ColVertex* vert, float x, float y, float z, u32 col) -{ - vert->x = x; - vert->y = y; - vert->z = z; - vert->col = col; -} - -void DrawEFBPokeQuads(EFBAccessType type, const EfbPokeData* points, size_t num_points) -{ - const size_t COL_QUAD_SIZE = sizeof(ColVertex) * 6; - - // Set common state - stateman->SetVertexShader(VertexShaderCache::GetClearVertexShader()); - stateman->SetGeometryShader(GeometryShaderCache::GetClearGeometryShader()); - stateman->SetPixelShader(PixelShaderCache::GetClearProgram()); - stateman->SetInputLayout(VertexShaderCache::GetClearInputLayout()); - stateman->SetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - stateman->SetVertexBuffer(util_vbuf->GetBuffer(), sizeof(ColVertex), 0); - stateman->Apply(); - - // if drawing a large number of points at once, this will have to be split into multiple passes. - size_t points_per_draw = util_vbuf->GetSize() / COL_QUAD_SIZE; - size_t current_point_index = 0; - while (current_point_index < num_points) - { - size_t points_to_draw = std::min(num_points - current_point_index, points_per_draw); - size_t required_bytes = COL_QUAD_SIZE * points_to_draw; - - // map and reserve enough buffer space for this draw - void* buffer_ptr; - int base_vertex_index = - util_vbuf->BeginAppendData(&buffer_ptr, (int)required_bytes, sizeof(ColVertex)); - - // generate quads for each efb point - ColVertex* base_vertex_ptr = reinterpret_cast(buffer_ptr); - for (size_t i = 0; i < points_to_draw; i++) - { - // generate quad from the single point (clip-space coordinates) - const EfbPokeData* point = &points[current_point_index]; - float x1 = float(point->x) * 2.0f / EFB_WIDTH - 1.0f; - float y1 = -float(point->y) * 2.0f / EFB_HEIGHT + 1.0f; - float x2 = float(point->x + 1) * 2.0f / EFB_WIDTH - 1.0f; - float y2 = -float(point->y + 1) * 2.0f / EFB_HEIGHT + 1.0f; - float z = 0.0f; - u32 col = 0; - - if (type == EFBAccessType::PokeZ) - { - z = 1.0f - static_cast(point->data & 0xFFFFFF) / 16777216.0f; - } - else - { - col = ((point->data & 0xFF00FF00) | ((point->data >> 16) & 0xFF) | - ((point->data << 16) & 0xFF0000)); - } - - current_point_index++; - - // quad -> triangles - ColVertex* vertex = &base_vertex_ptr[i * 6]; - InitColVertex(&vertex[0], x1, y1, z, col); - InitColVertex(&vertex[1], x2, y1, z, col); - InitColVertex(&vertex[2], x1, y2, z, col); - InitColVertex(&vertex[3], x1, y2, z, col); - InitColVertex(&vertex[4], x2, y1, z, col); - InitColVertex(&vertex[5], x2, y2, z, col); - } - - // unmap the util buffer, and issue the draw - util_vbuf->EndAppendData(); - context->Draw(6 * (UINT)points_to_draw, base_vertex_index); - } - - stateman->SetGeometryShader(GeometryShaderCache::GetClearGeometryShader()); -} - -} // namespace D3D - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/D3DUtil.h b/Source/Core/VideoBackends/D3D/D3DUtil.h deleted file mode 100644 index cfbe2e4889..0000000000 --- a/Source/Core/VideoBackends/D3D/D3DUtil.h +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "Common/CommonTypes.h" -#include "VideoCommon/RenderBase.h" - -namespace DX11 -{ -namespace D3D -{ -void InitUtils(); -void ShutdownUtils(); - -void SetPointCopySampler(); -void SetLinearCopySampler(); - -void drawShadedTexQuad(ID3D11ShaderResourceView* texture, const D3D11_RECT* rSource, - int SourceWidth, int SourceHeight, ID3D11PixelShader* PShader, - ID3D11VertexShader* VShader, ID3D11InputLayout* layout, - ID3D11GeometryShader* GShader = nullptr, u32 slice = 0); -void drawClearQuad(u32 Color, float z); -void drawColorQuad(u32 Color, float z, float x1, float y1, float x2, float y2); - -void DrawEFBPokeQuads(EFBAccessType type, const EfbPokeData* points, size_t num_points); -} -} diff --git a/Source/Core/VideoBackends/D3D/DXPipeline.cpp b/Source/Core/VideoBackends/D3D/DXPipeline.cpp index df827e1fba..9c694d20cd 100644 --- a/Source/Core/VideoBackends/D3D/DXPipeline.cpp +++ b/Source/Core/VideoBackends/D3D/DXPipeline.cpp @@ -22,11 +22,11 @@ DXPipeline::DXPipeline(ID3D11InputLayout* input_layout, ID3D11VertexShader* vert ID3D11GeometryShader* geometry_shader, ID3D11PixelShader* pixel_shader, ID3D11RasterizerState* rasterizer_state, ID3D11DepthStencilState* depth_state, ID3D11BlendState* blend_state, - D3D11_PRIMITIVE_TOPOLOGY primitive_topology) + D3D11_PRIMITIVE_TOPOLOGY primitive_topology, bool use_logic_op) : m_input_layout(input_layout), m_vertex_shader(vertex_shader), m_geometry_shader(geometry_shader), m_pixel_shader(pixel_shader), m_rasterizer_state(rasterizer_state), m_depth_state(depth_state), m_blend_state(blend_state), - m_primitive_topology(primitive_topology) + m_primitive_topology(primitive_topology), m_use_logic_op(use_logic_op) { if (m_input_layout) m_input_layout->AddRef(); @@ -84,13 +84,16 @@ std::unique_ptr DXPipeline::Create(const AbstractPipelineConfig& con ASSERT(vertex_shader != nullptr && pixel_shader != nullptr); ID3D11InputLayout* input_layout = - const_cast(static_cast(config.vertex_format)) - ->GetInputLayout(vertex_shader->GetByteCode()); + config.vertex_format ? + const_cast(static_cast(config.vertex_format)) + ->GetInputLayout(vertex_shader->GetByteCode().data(), + vertex_shader->GetByteCode().size()) : + nullptr; - return std::make_unique(input_layout, vertex_shader->GetD3DVertexShader(), - geometry_shader ? geometry_shader->GetD3DGeometryShader() : - nullptr, - pixel_shader->GetD3DPixelShader(), rasterizer_state, - depth_state, blend_state, primitive_topology); + return std::make_unique( + input_layout, vertex_shader->GetD3DVertexShader(), + geometry_shader ? geometry_shader->GetD3DGeometryShader() : nullptr, + pixel_shader->GetD3DPixelShader(), rasterizer_state, depth_state, blend_state, + primitive_topology, config.blending_state.logicopenable); } } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/DXPipeline.h b/Source/Core/VideoBackends/D3D/DXPipeline.h index f7b02a7d47..3a03bf948d 100644 --- a/Source/Core/VideoBackends/D3D/DXPipeline.h +++ b/Source/Core/VideoBackends/D3D/DXPipeline.h @@ -16,7 +16,8 @@ public: DXPipeline(ID3D11InputLayout* input_layout, ID3D11VertexShader* vertex_shader, ID3D11GeometryShader* geometry_shader, ID3D11PixelShader* pixel_shader, ID3D11RasterizerState* rasterizer_state, ID3D11DepthStencilState* depth_state, - ID3D11BlendState* blend_state, D3D11_PRIMITIVE_TOPOLOGY primitive_topology); + ID3D11BlendState* blend_state, D3D11_PRIMITIVE_TOPOLOGY primitive_topology, + bool use_logic_op); ~DXPipeline() override; ID3D11InputLayout* GetInputLayout() const { return m_input_layout; } @@ -28,6 +29,8 @@ public: ID3D11BlendState* GetBlendState() const { return m_blend_state; } D3D11_PRIMITIVE_TOPOLOGY GetPrimitiveTopology() const { return m_primitive_topology; } bool HasGeometryShader() const { return m_geometry_shader != nullptr; } + bool UseLogicOp() const { return m_use_logic_op; } + static std::unique_ptr Create(const AbstractPipelineConfig& config); private: @@ -39,5 +42,6 @@ private: ID3D11DepthStencilState* m_depth_state; ID3D11BlendState* m_blend_state; D3D11_PRIMITIVE_TOPOLOGY m_primitive_topology; + bool m_use_logic_op; }; } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/DXShader.cpp b/Source/Core/VideoBackends/D3D/DXShader.cpp index 588cd2b631..18f9aa0d3d 100644 --- a/Source/Core/VideoBackends/D3D/DXShader.cpp +++ b/Source/Core/VideoBackends/D3D/DXShader.cpp @@ -2,43 +2,28 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. +#include + #include "Common/Assert.h" +#include "Common/FileUtil.h" +#include "Common/Logging/Log.h" +#include "Common/MsgHandler.h" +#include "Common/StringUtil.h" #include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DShader.h" #include "VideoBackends/D3D/DXShader.h" +#include "VideoCommon/VideoConfig.h" namespace DX11 { -DXShader::DXShader(D3DBlob* bytecode, ID3D11VertexShader* vs) - : AbstractShader(ShaderStage::Vertex), m_bytecode(bytecode), m_shader(vs) -{ -} - -DXShader::DXShader(D3DBlob* bytecode, ID3D11GeometryShader* gs) - : AbstractShader(ShaderStage::Geometry), m_bytecode(bytecode), m_shader(gs) -{ -} - -DXShader::DXShader(D3DBlob* bytecode, ID3D11PixelShader* ps) - : AbstractShader(ShaderStage::Pixel), m_bytecode(bytecode), m_shader(ps) -{ -} - -DXShader::DXShader(D3DBlob* bytecode, ID3D11ComputeShader* cs) - : AbstractShader(ShaderStage::Compute), m_bytecode(bytecode), m_shader(cs) +DXShader::DXShader(ShaderStage stage, BinaryData bytecode, ID3D11DeviceChild* shader) + : AbstractShader(stage), m_bytecode(bytecode), m_shader(shader) { } DXShader::~DXShader() { m_shader->Release(); - m_bytecode->Release(); -} - -D3DBlob* DXShader::GetByteCode() const -{ - return m_bytecode; } ID3D11VertexShader* DXShader::GetD3DVertexShader() const @@ -67,48 +52,62 @@ ID3D11ComputeShader* DXShader::GetD3DComputeShader() const bool DXShader::HasBinary() const { - ASSERT(m_bytecode); return true; } AbstractShader::BinaryData DXShader::GetBinary() const { - return BinaryData(m_bytecode->Data(), m_bytecode->Data() + m_bytecode->Size()); + return m_bytecode; } -std::unique_ptr DXShader::CreateFromBlob(ShaderStage stage, D3DBlob* bytecode) +std::unique_ptr DXShader::CreateFromBytecode(ShaderStage stage, BinaryData bytecode) { switch (stage) { case ShaderStage::Vertex: { - ID3D11VertexShader* vs = D3D::CreateVertexShaderFromByteCode(bytecode); - if (vs) - return std::make_unique(bytecode, vs); + ID3D11VertexShader* vs; + HRESULT hr = D3D::device->CreateVertexShader(bytecode.data(), bytecode.size(), nullptr, &vs); + CHECK(SUCCEEDED(hr), "Create vertex shader"); + if (FAILED(hr)) + return nullptr; + + return std::make_unique(ShaderStage::Vertex, std::move(bytecode), vs); } - break; case ShaderStage::Geometry: { - ID3D11GeometryShader* gs = D3D::CreateGeometryShaderFromByteCode(bytecode); - if (gs) - return std::make_unique(bytecode, gs); + ID3D11GeometryShader* gs; + HRESULT hr = D3D::device->CreateGeometryShader(bytecode.data(), bytecode.size(), nullptr, &gs); + CHECK(SUCCEEDED(hr), "Create geometry shader"); + if (FAILED(hr)) + return nullptr; + + return std::make_unique(ShaderStage::Geometry, std::move(bytecode), gs); } break; case ShaderStage::Pixel: { - ID3D11PixelShader* ps = D3D::CreatePixelShaderFromByteCode(bytecode); - if (ps) - return std::make_unique(bytecode, ps); + ID3D11PixelShader* ps; + HRESULT hr = D3D::device->CreatePixelShader(bytecode.data(), bytecode.size(), nullptr, &ps); + CHECK(SUCCEEDED(hr), "Create pixel shader"); + if (FAILED(hr)) + return nullptr; + + return std::make_unique(ShaderStage::Pixel, std::move(bytecode), ps); } break; case ShaderStage::Compute: { - ID3D11ComputeShader* cs = D3D::CreateComputeShaderFromByteCode(bytecode); - if (cs) - return std::make_unique(bytecode, cs); + ID3D11ComputeShader* cs; + HRESULT hr = D3D::device->CreateComputeShader(bytecode.data(), bytecode.size(), nullptr, &cs); + CHECK(SUCCEEDED(hr), "Create compute shader"); + if (FAILED(hr)) + return nullptr; + + return std::make_unique(ShaderStage::Compute, std::move(bytecode), cs); } break; @@ -119,65 +118,85 @@ std::unique_ptr DXShader::CreateFromBlob(ShaderStage stage, D3DBlob* b return nullptr; } -std::unique_ptr DXShader::CreateFromSource(ShaderStage stage, const char* source, - size_t length) +static const char* GetCompileTarget(ShaderStage stage) { - D3DBlob* bytecode; switch (stage) { case ShaderStage::Vertex: - { - if (!D3D::CompileVertexShader(std::string(source, length), &bytecode)) - return nullptr; - } - break; - + return D3D::VertexShaderVersionString(); case ShaderStage::Geometry: - { - if (!D3D::CompileGeometryShader(std::string(source, length), &bytecode)) - return nullptr; - } - break; - + return D3D::GeometryShaderVersionString(); case ShaderStage::Pixel: - { - if (!D3D::CompilePixelShader(std::string(source, length), &bytecode)) - return nullptr; - } - break; - + return D3D::PixelShaderVersionString(); case ShaderStage::Compute: - { - if (!D3D::CompileComputeShader(std::string(source, length), &bytecode)) - return nullptr; - } - + return D3D::ComputeShaderVersionString(); default: - return nullptr; + return ""; } +} - std::unique_ptr shader = CreateFromBlob(stage, bytecode); - if (!shader) +bool DXShader::CompileShader(BinaryData* out_bytecode, ShaderStage stage, const char* source, + size_t length) +{ + static constexpr D3D_SHADER_MACRO macros[] = {{"API_D3D", "1"}, {nullptr, nullptr}}; + const UINT flags = g_ActiveConfig.bEnableValidationLayer ? + (D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION) : + (D3DCOMPILE_OPTIMIZATION_LEVEL3 | D3DCOMPILE_SKIP_VALIDATION); + const char* target = GetCompileTarget(stage); + + ID3DBlob* code = nullptr; + ID3DBlob* errors = nullptr; + HRESULT hr = PD3DCompile(source, length, nullptr, macros, nullptr, "main", target, flags, 0, + &code, &errors); + if (FAILED(hr)) { - bytecode->Release(); - return nullptr; + static int num_failures = 0; + std::string filename = StringFromFormat( + "%sbad_%s_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), target, num_failures++); + std::ofstream file; + File::OpenFStream(file, filename, std::ios_base::out); + file.write(source, length); + file << "\n"; + file.write(static_cast(errors->GetBufferPointer()), errors->GetBufferSize()); + file.close(); + + PanicAlert("Failed to compile %s:\nDebug info (%s):\n%s", filename.c_str(), target, + static_cast(errors->GetBufferPointer())); + errors->Release(); + return false; } - return shader; + if (errors && errors->GetBufferSize() > 0) + { + WARN_LOG(VIDEO, "%s compilation succeeded with warnings:\n%s", target, + static_cast(errors->GetBufferPointer())); + } + SAFE_RELEASE(errors); + + out_bytecode->resize(code->GetBufferSize()); + std::memcpy(out_bytecode->data(), code->GetBufferPointer(), code->GetBufferSize()); + code->Release(); + return true; +} + +std::unique_ptr DXShader::CreateFromSource(ShaderStage stage, const char* source, + size_t length) +{ + BinaryData bytecode; + if (!CompileShader(&bytecode, stage, source, length)) + return nullptr; + + return CreateFromBytecode(stage, std::move(bytecode)); } std::unique_ptr DXShader::CreateFromBinary(ShaderStage stage, const void* data, size_t length) { - D3DBlob* bytecode = new D3DBlob(static_cast(length), static_cast(data)); - std::unique_ptr shader = CreateFromBlob(stage, bytecode); - if (!shader) - { - bytecode->Release(); + if (length == 0) return nullptr; - } - return shader; + BinaryData bytecode(length); + std::memcpy(bytecode.data(), data, length); + return CreateFromBytecode(stage, std::move(bytecode)); } - } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/DXShader.h b/Source/Core/VideoBackends/D3D/DXShader.h index d39e638dac..a86a993b80 100644 --- a/Source/Core/VideoBackends/D3D/DXShader.h +++ b/Source/Core/VideoBackends/D3D/DXShader.h @@ -3,13 +3,9 @@ // Refer to the license.txt file included. #pragma once - -#include #include #include -#include "Common/CommonTypes.h" -#include "VideoBackends/D3D/D3DBlob.h" #include "VideoCommon/AbstractShader.h" namespace DX11 @@ -17,14 +13,11 @@ namespace DX11 class DXShader final : public AbstractShader { public: - // Note: vs/gs/ps/cs references are transferred. - DXShader(D3DBlob* bytecode, ID3D11VertexShader* vs); - DXShader(D3DBlob* bytecode, ID3D11GeometryShader* gs); - DXShader(D3DBlob* bytecode, ID3D11PixelShader* ps); - DXShader(D3DBlob* bytecode, ID3D11ComputeShader* cs); + DXShader(ShaderStage stage, BinaryData bytecode, ID3D11DeviceChild* shader); ~DXShader() override; - D3DBlob* GetByteCode() const; + const BinaryData& GetByteCode() const { return m_bytecode; } + ID3D11VertexShader* GetD3DVertexShader() const; ID3D11GeometryShader* GetD3DGeometryShader() const; ID3D11PixelShader* GetD3DPixelShader() const; @@ -33,8 +26,11 @@ public: bool HasBinary() const override; BinaryData GetBinary() const override; - // Creates a new shader object. The reference to bytecode is not transfered upon failure. - static std::unique_ptr CreateFromBlob(ShaderStage stage, D3DBlob* bytecode); + // Creates a new shader object. + static std::unique_ptr CreateFromBytecode(ShaderStage stage, BinaryData bytecode); + static bool CompileShader(BinaryData* out_bytecode, ShaderStage stage, const char* source, + size_t length); + static std::unique_ptr CreateFromBinary(ShaderStage stage, const void* data, size_t length); static std::unique_ptr CreateFromSource(ShaderStage stage, const char* source, @@ -42,7 +38,7 @@ public: private: ID3D11DeviceChild* m_shader; - D3DBlob* m_bytecode; + BinaryData m_bytecode; }; } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/DXTexture.cpp b/Source/Core/VideoBackends/D3D/DXTexture.cpp index 127a922146..dc1c04991a 100644 --- a/Source/Core/VideoBackends/D3D/DXTexture.cpp +++ b/Source/Core/VideoBackends/D3D/DXTexture.cpp @@ -9,25 +9,47 @@ #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" -#include "VideoBackends/D3D/D3DBase.h" #include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/D3DTexture.h" -#include "VideoBackends/D3D/D3DUtil.h" #include "VideoBackends/D3D/DXTexture.h" -#include "VideoBackends/D3D/FramebufferManager.h" -#include "VideoBackends/D3D/GeometryShaderCache.h" -#include "VideoBackends/D3D/PixelShaderCache.h" -#include "VideoBackends/D3D/TextureCache.h" -#include "VideoBackends/D3D/VertexShaderCache.h" - -#include "VideoCommon/ImageWrite.h" -#include "VideoCommon/TextureConfig.h" namespace DX11 { namespace { -DXGI_FORMAT GetDXGIFormatForHostFormat(AbstractTextureFormat format) +DXGI_FORMAT GetDXGIFormatForHostFormat(AbstractTextureFormat format, bool typeless) +{ + switch (format) + { + case AbstractTextureFormat::DXT1: + return DXGI_FORMAT_BC1_UNORM; + case AbstractTextureFormat::DXT3: + return DXGI_FORMAT_BC2_UNORM; + case AbstractTextureFormat::DXT5: + return DXGI_FORMAT_BC3_UNORM; + case AbstractTextureFormat::BPTC: + return DXGI_FORMAT_BC7_UNORM; + case AbstractTextureFormat::RGBA8: + return typeless ? DXGI_FORMAT_R8G8B8A8_TYPELESS : DXGI_FORMAT_R8G8B8A8_UNORM; + case AbstractTextureFormat::BGRA8: + return typeless ? DXGI_FORMAT_B8G8R8A8_TYPELESS : DXGI_FORMAT_B8G8R8A8_UNORM; + case AbstractTextureFormat::R16: + return typeless ? DXGI_FORMAT_R16_TYPELESS : DXGI_FORMAT_R16_UNORM; + case AbstractTextureFormat::R32F: + return typeless ? DXGI_FORMAT_R32_TYPELESS : DXGI_FORMAT_R32_FLOAT; + case AbstractTextureFormat::D16: + return DXGI_FORMAT_R16_TYPELESS; + case AbstractTextureFormat::D24_S8: + return DXGI_FORMAT_R24G8_TYPELESS; + case AbstractTextureFormat::D32F: + return DXGI_FORMAT_R32_TYPELESS; + case AbstractTextureFormat::D32F_S8: + return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS; + default: + PanicAlert("Unhandled texture format."); + return DXGI_FORMAT_R8G8B8A8_UNORM; + } +} +DXGI_FORMAT GetSRVFormatForHostFormat(AbstractTextureFormat format) { switch (format) { @@ -47,23 +69,6 @@ DXGI_FORMAT GetDXGIFormatForHostFormat(AbstractTextureFormat format) return DXGI_FORMAT_R16_UNORM; case AbstractTextureFormat::R32F: return DXGI_FORMAT_R32_FLOAT; - case AbstractTextureFormat::D16: - return DXGI_FORMAT_R16_TYPELESS; - case AbstractTextureFormat::D24_S8: - return DXGI_FORMAT_R24G8_TYPELESS; - case AbstractTextureFormat::D32F: - return DXGI_FORMAT_R32_TYPELESS; - case AbstractTextureFormat::D32F_S8: - return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS; - default: - PanicAlert("Unhandled texture format."); - return DXGI_FORMAT_R8G8B8A8_UNORM; - } -} -DXGI_FORMAT GetSRVFormatForHostFormat(AbstractTextureFormat format) -{ - switch (format) - { case AbstractTextureFormat::D16: return DXGI_FORMAT_R16_UNORM; case AbstractTextureFormat::D24_S8: @@ -73,7 +78,25 @@ DXGI_FORMAT GetSRVFormatForHostFormat(AbstractTextureFormat format) case AbstractTextureFormat::D32F_S8: return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS; default: - return GetDXGIFormatForHostFormat(format); + PanicAlert("Unhandled SRV format"); + return DXGI_FORMAT_UNKNOWN; + } +} +DXGI_FORMAT GetRTVFormatForHostFormat(AbstractTextureFormat format, bool integer) +{ + switch (format) + { + case AbstractTextureFormat::RGBA8: + return integer ? DXGI_FORMAT_R8G8B8A8_UINT : DXGI_FORMAT_R8G8B8A8_UNORM; + case AbstractTextureFormat::BGRA8: + return DXGI_FORMAT_B8G8R8A8_UNORM; + case AbstractTextureFormat::R16: + return integer ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R16_UNORM; + case AbstractTextureFormat::R32F: + return DXGI_FORMAT_R32_FLOAT; + default: + PanicAlert("Unhandled RTV format"); + return DXGI_FORMAT_UNKNOWN; } } DXGI_FORMAT GetDSVFormatForHostFormat(AbstractTextureFormat format) @@ -89,55 +112,87 @@ DXGI_FORMAT GetDSVFormatForHostFormat(AbstractTextureFormat format) case AbstractTextureFormat::D32F_S8: return DXGI_FORMAT_D32_FLOAT_S8X24_UINT; default: - return GetDXGIFormatForHostFormat(format); + PanicAlert("Unhandled DSV format"); + return DXGI_FORMAT_UNKNOWN; } } } // Anonymous namespace -DXTexture::DXTexture(const TextureConfig& tex_config) : AbstractTexture(tex_config) +DXTexture::DXTexture(const TextureConfig& tex_config, ID3D11Texture2D* d3d_texture, + ID3D11ShaderResourceView* d3d_srv, ID3D11UnorderedAccessView* d3d_uav) + : AbstractTexture(tex_config), m_d3d_texture(d3d_texture), m_d3d_srv(d3d_srv), + m_d3d_uav(d3d_uav) { - DXGI_FORMAT tex_format = GetDXGIFormatForHostFormat(m_config.format); - DXGI_FORMAT srv_format = GetSRVFormatForHostFormat(m_config.format); - DXGI_FORMAT rtv_format = DXGI_FORMAT_UNKNOWN; - DXGI_FORMAT dsv_format = DXGI_FORMAT_UNKNOWN; - UINT bind_flags = D3D11_BIND_SHADER_RESOURCE; - if (tex_config.rendertarget) - { - if (IsDepthFormat(tex_config.format)) - { - bind_flags |= D3D11_BIND_DEPTH_STENCIL; - dsv_format = GetDSVFormatForHostFormat(m_config.format); - } - else - { - bind_flags |= D3D11_BIND_RENDER_TARGET; - rtv_format = tex_format; - } - } - - CD3D11_TEXTURE2D_DESC texdesc(tex_format, tex_config.width, tex_config.height, tex_config.layers, - tex_config.levels, bind_flags, D3D11_USAGE_DEFAULT, 0, - tex_config.samples, 0, 0); - - ID3D11Texture2D* pTexture; - HRESULT hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &pTexture); - CHECK(SUCCEEDED(hr), "Create backing DXTexture"); - - m_texture = new D3DTexture2D(pTexture, static_cast(bind_flags), srv_format, - dsv_format, rtv_format, tex_config.samples > 1); - - SAFE_RELEASE(pTexture); } DXTexture::~DXTexture() { - g_renderer->UnbindTexture(this); - m_texture->Release(); + if (m_d3d_uav) + m_d3d_uav->Release(); + + if (m_d3d_srv) + { + if (D3D::stateman->UnsetTexture(m_d3d_srv) != 0) + D3D::stateman->ApplyTextures(); + + m_d3d_srv->Release(); + } + m_d3d_texture->Release(); } -D3DTexture2D* DXTexture::GetRawTexIdentifier() const +std::unique_ptr DXTexture::Create(const TextureConfig& config) { - return m_texture; + // Use typeless to create the texture when it's a render target, so we can alias it with an + // integer format (for EFB). + const DXGI_FORMAT tex_format = GetDXGIFormatForHostFormat(config.format, config.IsRenderTarget()); + const DXGI_FORMAT srv_format = GetSRVFormatForHostFormat(config.format); + UINT bindflags = D3D11_BIND_SHADER_RESOURCE; + if (config.IsRenderTarget()) + bindflags |= IsDepthFormat(config.format) ? D3D11_BIND_DEPTH_STENCIL : D3D11_BIND_RENDER_TARGET; + if (config.IsComputeImage()) + bindflags |= D3D11_BIND_UNORDERED_ACCESS; + + CD3D11_TEXTURE2D_DESC desc(tex_format, config.width, config.height, config.layers, config.levels, + bindflags, D3D11_USAGE_DEFAULT, 0, config.samples, 0, 0); + ID3D11Texture2D* d3d_texture; + HRESULT hr = D3D::device->CreateTexture2D(&desc, nullptr, &d3d_texture); + if (FAILED(hr)) + { + PanicAlert("Failed to create %ux%ux%u D3D backing texture", config.width, config.height, + config.layers); + return nullptr; + } + + ID3D11ShaderResourceView* d3d_srv; + const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(d3d_texture, + config.IsMultisampled() ? + D3D11_SRV_DIMENSION_TEXTURE2DMSARRAY : + D3D11_SRV_DIMENSION_TEXTURE2DARRAY, + srv_format, 0, config.levels, 0, config.layers); + hr = D3D::device->CreateShaderResourceView(d3d_texture, &srv_desc, &d3d_srv); + if (FAILED(hr)) + { + PanicAlert("Failed to create %ux%ux%u D3D SRV", config.width, config.height, config.layers); + d3d_texture->Release(); + return nullptr; + } + + ID3D11UnorderedAccessView* d3d_uav = nullptr; + if (config.IsComputeImage()) + { + const CD3D11_UNORDERED_ACCESS_VIEW_DESC uav_desc( + d3d_texture, D3D11_UAV_DIMENSION_TEXTURE2DARRAY, srv_format, 0, 0, config.layers); + hr = D3D::device->CreateUnorderedAccessView(d3d_texture, &uav_desc, &d3d_uav); + if (FAILED(hr)) + { + PanicAlert("Failed to create %ux%ux%u D3D UAV", config.width, config.height, config.layers); + d3d_uav->Release(); + d3d_texture->Release(); + return nullptr; + } + } + + return std::make_unique(config, d3d_texture, d3d_srv, d3d_uav); } void DXTexture::CopyRectangleFromTexture(const AbstractTexture* src, @@ -158,42 +213,11 @@ void DXTexture::CopyRectangleFromTexture(const AbstractTexture* src, src_box.back = 1; D3D::context->CopySubresourceRegion( - m_texture->GetTex(), D3D11CalcSubresource(dst_level, dst_layer, m_config.levels), - dst_rect.left, dst_rect.top, 0, srcentry->m_texture->GetTex(), + m_d3d_texture, D3D11CalcSubresource(dst_level, dst_layer, m_config.levels), dst_rect.left, + dst_rect.top, 0, srcentry->m_d3d_texture, D3D11CalcSubresource(src_level, src_layer, srcentry->m_config.levels), &src_box); } -void DXTexture::ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& srcrect, - const MathUtil::Rectangle& dstrect) -{ - const DXTexture* srcentry = static_cast(source); - ASSERT(m_config.rendertarget); - - g_renderer->ResetAPIState(); // reset any game specific settings - - const D3D11_VIEWPORT vp = CD3D11_VIEWPORT(float(dstrect.left), float(dstrect.top), - float(dstrect.GetWidth()), float(dstrect.GetHeight())); - - D3D::stateman->UnsetTexture(m_texture->GetSRV()); - D3D::stateman->Apply(); - - D3D::context->OMSetRenderTargets(1, &m_texture->GetRTV(), nullptr); - D3D::context->RSSetViewports(1, &vp); - D3D::SetLinearCopySampler(); - D3D11_RECT srcRC; - srcRC.left = srcrect.left; - srcRC.right = srcrect.right; - srcRC.top = srcrect.top; - srcRC.bottom = srcrect.bottom; - D3D::drawShadedTexQuad( - srcentry->m_texture->GetSRV(), &srcRC, srcentry->m_config.width, srcentry->m_config.height, - PixelShaderCache::GetColorCopyProgram(false), VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader(), 0); - - g_renderer->RestoreAPIState(); -} - void DXTexture::ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, u32 layer, u32 level) { @@ -204,16 +228,16 @@ void DXTexture::ResolveFromTexture(const AbstractTexture* src, const MathUtil::R rect.top + rect.GetHeight() <= static_cast(srcentry->m_config.height)); D3D::context->ResolveSubresource( - m_texture->GetTex(), D3D11CalcSubresource(level, layer, m_config.levels), - srcentry->m_texture->GetTex(), D3D11CalcSubresource(level, layer, srcentry->m_config.levels), - GetDXGIFormatForHostFormat(m_config.format)); + m_d3d_texture, D3D11CalcSubresource(level, layer, m_config.levels), srcentry->m_d3d_texture, + D3D11CalcSubresource(level, layer, srcentry->m_config.levels), + GetDXGIFormatForHostFormat(m_config.format, false)); } void DXTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, size_t buffer_size) { size_t src_pitch = CalculateStrideForFormat(m_config.format, row_length); - D3D::context->UpdateSubresource(m_texture->GetTex(), level, nullptr, buffer, + D3D::context->UpdateSubresource(m_d3d_texture, level, nullptr, buffer, static_cast(src_pitch), 0); } @@ -251,8 +275,8 @@ std::unique_ptr DXStagingTexture::Create(StagingTextureType ty cpu_flags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; } - CD3D11_TEXTURE2D_DESC desc(GetDXGIFormatForHostFormat(config.format), config.width, config.height, - 1, 1, 0, usage, cpu_flags); + CD3D11_TEXTURE2D_DESC desc(GetDXGIFormatForHostFormat(config.format, false), config.width, + config.height, 1, 1, 0, usage, cpu_flags); ID3D11Texture2D* texture; HRESULT hr = D3D::device->CreateTexture2D(&desc, nullptr, &texture); @@ -267,22 +291,33 @@ void DXStagingTexture::CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect) { - ASSERT(m_type == StagingTextureType::Readback); + ASSERT(m_type == StagingTextureType::Readback || m_type == StagingTextureType::Mutable); ASSERT(src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.GetHeight() == dst_rect.GetHeight()); - ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= src->GetConfig().width && - src_rect.top >= 0 && static_cast(src_rect.bottom) <= src->GetConfig().height); + ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= src->GetWidth() && + src_rect.top >= 0 && static_cast(src_rect.bottom) <= src->GetHeight()); ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= m_config.width && dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= m_config.height); if (IsMapped()) DXStagingTexture::Unmap(); - CD3D11_BOX src_box(src_rect.left, src_rect.top, 0, src_rect.right, src_rect.bottom, 1); - D3D::context->CopySubresourceRegion( - m_tex, 0, static_cast(dst_rect.left), static_cast(dst_rect.top), 0, - static_cast(src)->GetRawTexIdentifier()->GetTex(), - D3D11CalcSubresource(src_level, src_layer, src->GetConfig().levels), &src_box); + if (static_cast(src_rect.GetWidth()) == GetWidth() && + static_cast(src_rect.GetHeight()) == GetHeight()) + { + // Copy whole resource, needed for depth textures. + D3D::context->CopySubresourceRegion( + m_tex, 0, 0, 0, 0, static_cast(src)->GetD3DTexture(), + D3D11CalcSubresource(src_level, src_layer, src->GetLevels()), nullptr); + } + else + { + CD3D11_BOX src_box(src_rect.left, src_rect.top, 0, src_rect.right, src_rect.bottom, 1); + D3D::context->CopySubresourceRegion( + m_tex, 0, static_cast(dst_rect.left), static_cast(dst_rect.top), 0, + static_cast(src)->GetD3DTexture(), + D3D11CalcSubresource(src_level, src_layer, src->GetLevels()), &src_box); + } m_needs_flush = true; } @@ -294,19 +329,29 @@ void DXStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, A ASSERT(m_type == StagingTextureType::Upload); ASSERT(src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.GetHeight() == dst_rect.GetHeight()); - ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= m_config.width && - src_rect.top >= 0 && static_cast(src_rect.bottom) <= m_config.height); - ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= dst->GetConfig().width && - dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= dst->GetConfig().height); + ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= GetWidth() && + src_rect.top >= 0 && static_cast(src_rect.bottom) <= GetHeight()); + ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= dst->GetWidth() && + dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= dst->GetHeight()); if (IsMapped()) DXStagingTexture::Unmap(); - CD3D11_BOX src_box(src_rect.left, src_rect.top, 0, src_rect.right, src_rect.bottom, 1); - D3D::context->CopySubresourceRegion( - static_cast(dst)->GetRawTexIdentifier()->GetTex(), - D3D11CalcSubresource(dst_level, dst_layer, dst->GetConfig().levels), - static_cast(dst_rect.left), static_cast(dst_rect.top), 0, m_tex, 0, &src_box); + if (static_cast(src_rect.GetWidth()) == dst->GetWidth() && + static_cast(src_rect.GetHeight()) == dst->GetHeight()) + { + D3D::context->CopySubresourceRegion( + static_cast(dst)->GetD3DTexture(), + D3D11CalcSubresource(dst_level, dst_layer, dst->GetLevels()), 0, 0, 0, m_tex, 0, nullptr); + } + else + { + CD3D11_BOX src_box(src_rect.left, src_rect.top, 0, src_rect.right, src_rect.bottom, 1); + D3D::context->CopySubresourceRegion( + static_cast(dst)->GetD3DTexture(), + D3D11CalcSubresource(dst_level, dst_layer, dst->GetLevels()), + static_cast(dst_rect.left), static_cast(dst_rect.top), 0, m_tex, 0, &src_box); + } } bool DXStagingTexture::Map() @@ -348,11 +393,14 @@ void DXStagingTexture::Flush() m_needs_flush = false; } -DXFramebuffer::DXFramebuffer(AbstractTextureFormat color_format, AbstractTextureFormat depth_format, +DXFramebuffer::DXFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples, - ID3D11RenderTargetView* rtv, ID3D11DepthStencilView* dsv) - : AbstractFramebuffer(color_format, depth_format, width, height, layers, samples), m_rtv(rtv), - m_dsv(dsv) + ID3D11RenderTargetView* rtv, ID3D11RenderTargetView* integer_rtv, + ID3D11DepthStencilView* dsv) + : AbstractFramebuffer(color_attachment, depth_attachment, color_format, depth_format, width, + height, layers, samples), + m_rtv(rtv), m_integer_rtv(integer_rtv), m_dsv(dsv) { } @@ -360,12 +408,14 @@ DXFramebuffer::~DXFramebuffer() { if (m_rtv) m_rtv->Release(); + if (m_integer_rtv) + m_integer_rtv->Release(); if (m_dsv) m_dsv->Release(); } -std::unique_ptr DXFramebuffer::Create(const DXTexture* color_attachment, - const DXTexture* depth_attachment) +std::unique_ptr DXFramebuffer::Create(DXTexture* color_attachment, + DXTexture* depth_attachment) { if (!ValidateConfig(color_attachment, depth_attachment)) return nullptr; @@ -381,55 +431,45 @@ std::unique_ptr DXFramebuffer::Create(const DXTexture* color_atta const u32 samples = either_attachment->GetSamples(); ID3D11RenderTargetView* rtv = nullptr; + ID3D11RenderTargetView* integer_rtv = nullptr; if (color_attachment) { - D3D11_RENDER_TARGET_VIEW_DESC desc; - desc.Format = GetDXGIFormatForHostFormat(color_attachment->GetConfig().format); - if (color_attachment->GetConfig().IsMultisampled()) - { - desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2DMSARRAY; - desc.Texture2DMSArray.ArraySize = color_attachment->GetConfig().layers; - desc.Texture2DMSArray.FirstArraySlice = 0; - } - else - { - desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2DARRAY; - desc.Texture2DArray.ArraySize = color_attachment->GetConfig().layers; - desc.Texture2DArray.FirstArraySlice = 0; - desc.Texture2DArray.MipSlice = 0; - } - - HRESULT hr = D3D::device->CreateRenderTargetView( - color_attachment->GetRawTexIdentifier()->GetTex(), &desc, &rtv); + CD3D11_RENDER_TARGET_VIEW_DESC desc( + color_attachment->IsMultisampled() ? D3D11_RTV_DIMENSION_TEXTURE2DMSARRAY : + D3D11_RTV_DIMENSION_TEXTURE2DARRAY, + GetRTVFormatForHostFormat(color_attachment->GetFormat(), false), 0, 0, + color_attachment->GetLayers()); + HRESULT hr = + D3D::device->CreateRenderTargetView(color_attachment->GetD3DTexture(), &desc, &rtv); CHECK(SUCCEEDED(hr), "Create render target view for framebuffer"); + + // Only create the integer RTV on Win8+. + DXGI_FORMAT integer_format = GetRTVFormatForHostFormat(color_attachment->GetFormat(), true); + if (D3D::device1 && integer_format != desc.Format) + { + desc.Format = integer_format; + hr = D3D::device->CreateRenderTargetView(color_attachment->GetD3DTexture(), &desc, + &integer_rtv); + CHECK(SUCCEEDED(hr), "Create integer render target view for framebuffer"); + } } ID3D11DepthStencilView* dsv = nullptr; if (depth_attachment) { - D3D11_DEPTH_STENCIL_VIEW_DESC desc; - desc.Format = GetDXGIFormatForHostFormat(depth_attachment->GetConfig().format); - if (depth_attachment->GetConfig().IsMultisampled()) - { - desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2DMSARRAY; - desc.Texture2DMSArray.ArraySize = depth_attachment->GetConfig().layers; - desc.Texture2DMSArray.FirstArraySlice = 0; - } - else - { - desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2DARRAY; - desc.Texture2DArray.ArraySize = depth_attachment->GetConfig().layers; - desc.Texture2DArray.FirstArraySlice = 0; - desc.Texture2DArray.MipSlice = 0; - } - - HRESULT hr = D3D::device->CreateDepthStencilView( - depth_attachment->GetRawTexIdentifier()->GetTex(), &desc, &dsv); + const CD3D11_DEPTH_STENCIL_VIEW_DESC desc( + depth_attachment->GetConfig().IsMultisampled() ? D3D11_DSV_DIMENSION_TEXTURE2DMSARRAY : + D3D11_DSV_DIMENSION_TEXTURE2DARRAY, + GetDSVFormatForHostFormat(depth_attachment->GetFormat()), 0, 0, + depth_attachment->GetLayers(), 0); + HRESULT hr = + D3D::device->CreateDepthStencilView(depth_attachment->GetD3DTexture(), &desc, &dsv); CHECK(SUCCEEDED(hr), "Create depth stencil view for framebuffer"); } - return std::make_unique(color_format, depth_format, width, height, layers, samples, - rtv, dsv); + return std::make_unique(color_attachment, depth_attachment, color_format, + depth_format, width, height, layers, samples, rtv, + integer_rtv, dsv); } } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/DXTexture.h b/Source/Core/VideoBackends/D3D/DXTexture.h index 96d8f13919..0a4e0ace48 100644 --- a/Source/Core/VideoBackends/D3D/DXTexture.h +++ b/Source/Core/VideoBackends/D3D/DXTexture.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include "Common/CommonTypes.h" @@ -11,32 +12,34 @@ #include "VideoCommon/AbstractStagingTexture.h" #include "VideoCommon/AbstractTexture.h" -class D3DTexture2D; - namespace DX11 { class DXTexture final : public AbstractTexture { public: - explicit DXTexture(const TextureConfig& tex_config); + explicit DXTexture(const TextureConfig& tex_config, ID3D11Texture2D* d3d_texture, + ID3D11ShaderResourceView* d3d_srv, ID3D11UnorderedAccessView* d3d_uav); ~DXTexture(); + static std::unique_ptr Create(const TextureConfig& config); + void CopyRectangleFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) override; - void ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& srcrect, - const MathUtil::Rectangle& dstrect) override; void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, u32 layer, u32 level) override; void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, size_t buffer_size) override; - D3DTexture2D* GetRawTexIdentifier() const; + ID3D11Texture2D* GetD3DTexture() const { return m_d3d_texture; } + ID3D11ShaderResourceView* GetD3DSRV() const { return m_d3d_srv; } + ID3D11UnorderedAccessView* GetD3DUAV() const { return m_d3d_uav; } private: - D3DTexture2D* m_texture; + ID3D11Texture2D* m_d3d_texture; + ID3D11ShaderResourceView* m_d3d_srv; + ID3D11UnorderedAccessView* m_d3d_uav; }; class DXStagingTexture final : public AbstractStagingTexture @@ -68,19 +71,22 @@ private: class DXFramebuffer final : public AbstractFramebuffer { public: - DXFramebuffer(AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, + DXFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples, ID3D11RenderTargetView* rtv, - ID3D11DepthStencilView* dsv); + ID3D11RenderTargetView* integer_rtv, ID3D11DepthStencilView* dsv); ~DXFramebuffer() override; ID3D11RenderTargetView* const* GetRTVArray() const { return &m_rtv; } + ID3D11RenderTargetView* const* GetIntegerRTVArray() const { return &m_integer_rtv; } UINT GetNumRTVs() const { return m_rtv ? 1 : 0; } ID3D11DepthStencilView* GetDSV() const { return m_dsv; } - static std::unique_ptr Create(const DXTexture* color_attachment, - const DXTexture* depth_attachment); + static std::unique_ptr Create(DXTexture* color_attachment, + DXTexture* depth_attachment); protected: ID3D11RenderTargetView* m_rtv; + ID3D11RenderTargetView* m_integer_rtv; ID3D11DepthStencilView* m_dsv; }; diff --git a/Source/Core/VideoBackends/D3D/FramebufferManager.cpp b/Source/Core/VideoBackends/D3D/FramebufferManager.cpp deleted file mode 100644 index 5a2088f40d..0000000000 --- a/Source/Core/VideoBackends/D3D/FramebufferManager.cpp +++ /dev/null @@ -1,303 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/D3D/FramebufferManager.h" - -#include -#include - -#include "Common/CommonTypes.h" -#include "Core/HW/Memmap.h" -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/D3DUtil.h" -#include "VideoBackends/D3D/GeometryShaderCache.h" -#include "VideoBackends/D3D/PixelShaderCache.h" -#include "VideoBackends/D3D/Render.h" -#include "VideoBackends/D3D/VertexShaderCache.h" -#include "VideoCommon/VideoConfig.h" - -namespace DX11 -{ -static bool s_integer_efb_render_target = false; - -FramebufferManager::Efb FramebufferManager::m_efb; -unsigned int FramebufferManager::m_target_width; -unsigned int FramebufferManager::m_target_height; - -D3DTexture2D*& FramebufferManager::GetEFBColorTexture() -{ - return m_efb.color_tex; -} - -D3DTexture2D*& FramebufferManager::GetEFBColorReadTexture() -{ - return m_efb.color_read_texture; -} -ID3D11Texture2D*& FramebufferManager::GetEFBColorStagingBuffer() -{ - return m_efb.color_staging_buf; -} - -D3DTexture2D*& FramebufferManager::GetEFBDepthTexture() -{ - return m_efb.depth_tex; -} -D3DTexture2D*& FramebufferManager::GetEFBDepthReadTexture() -{ - return m_efb.depth_read_texture; -} -ID3D11Texture2D*& FramebufferManager::GetEFBDepthStagingBuffer() -{ - return m_efb.depth_staging_buf; -} - -D3DTexture2D*& FramebufferManager::GetResolvedEFBColorTexture() -{ - if (g_ActiveConfig.iMultisamples > 1) - { - for (int i = 0; i < m_efb.slices; i++) - D3D::context->ResolveSubresource(m_efb.resolved_color_tex->GetTex(), - D3D11CalcSubresource(0, i, 1), m_efb.color_tex->GetTex(), - D3D11CalcSubresource(0, i, 1), DXGI_FORMAT_R8G8B8A8_UNORM); - return m_efb.resolved_color_tex; - } - else - { - return m_efb.color_tex; - } -} - -D3DTexture2D*& FramebufferManager::GetResolvedEFBDepthTexture() -{ - if (g_ActiveConfig.iMultisamples > 1) - { - // ResolveSubresource does not work with depth textures. - // Instead, we use a shader that selects the minimum depth from all samples. - g_renderer->ResetAPIState(); - - CD3D11_VIEWPORT viewport(0.f, 0.f, (float)m_target_width, (float)m_target_height); - D3D::context->RSSetViewports(1, &viewport); - D3D::context->OMSetRenderTargets(1, &m_efb.resolved_depth_tex->GetRTV(), nullptr); - - const D3D11_RECT source_rect = CD3D11_RECT(0, 0, m_target_width, m_target_height); - D3D::drawShadedTexQuad( - m_efb.depth_tex->GetSRV(), &source_rect, m_target_width, m_target_height, - PixelShaderCache::GetDepthResolveProgram(), VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader()); - - g_renderer->RestoreAPIState(); - return m_efb.resolved_depth_tex; - } - else - { - return m_efb.depth_tex; - } -} - -void FramebufferManager::SwapReinterpretTexture() -{ - std::swap(m_efb.color_tex, m_efb.color_temp_tex); - std::swap(m_efb.color_int_rtv, m_efb.color_temp_int_rtv); -} - -void FramebufferManager::SetIntegerEFBRenderTarget(bool enabled) -{ - if (s_integer_efb_render_target == enabled) - return; - - // We only use UINT render targets for logic ops, which is only supported with D3D11.1. - if (!D3D::device1) - return; - - s_integer_efb_render_target = enabled; - BindEFBRenderTarget(); -} - -void FramebufferManager::BindEFBRenderTarget(bool bind_depth) -{ - ID3D11RenderTargetView* rtv = - s_integer_efb_render_target ? m_efb.color_int_rtv : m_efb.color_tex->GetRTV(); - ID3D11DepthStencilView* dsv = bind_depth ? m_efb.depth_tex->GetDSV() : nullptr; - D3D::context->OMSetRenderTargets(1, &rtv, dsv); -} - -FramebufferManager::FramebufferManager(int target_width, int target_height) -{ - static constexpr std::array clear_color = {0.0f, 0.0f, 0.0f, 1.0f}; - m_target_width = static_cast(std::max(target_width, 1)); - m_target_height = static_cast(std::max(target_height, 1)); - DXGI_SAMPLE_DESC sample_desc; - sample_desc.Count = g_ActiveConfig.iMultisamples; - sample_desc.Quality = 0; - - ID3D11Texture2D* buf; - D3D11_TEXTURE2D_DESC texdesc; - HRESULT hr; - - m_EFBLayers = m_efb.slices = (g_ActiveConfig.stereo_mode != StereoMode::Off) ? 2 : 1; - - // EFB color texture - primary render target - texdesc = - CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R8G8B8A8_TYPELESS, m_target_width, m_target_height, - m_efb.slices, 1, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET, - D3D11_USAGE_DEFAULT, 0, sample_desc.Count, sample_desc.Quality); - hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); - CHECK(hr == S_OK, "create EFB color texture (size: %dx%d; hr=%#x)", m_target_width, - m_target_height, hr); - m_efb.color_tex = new D3DTexture2D( - buf, (D3D11_BIND_FLAG)(D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET), - DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM, - (sample_desc.Count > 1)); - - SAFE_RELEASE(buf); - D3D::SetDebugObjectName(m_efb.color_tex->GetTex(), "EFB color texture"); - D3D::SetDebugObjectName(m_efb.color_tex->GetSRV(), "EFB color texture shader resource view"); - D3D::SetDebugObjectName(m_efb.color_tex->GetRTV(), "EFB color texture render target view"); - D3D::context->ClearRenderTargetView(m_efb.color_tex->GetRTV(), clear_color.data()); - - // Temporary EFB color texture - used in ReinterpretPixelData - texdesc = - CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R8G8B8A8_TYPELESS, m_target_width, m_target_height, - m_efb.slices, 1, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET, - D3D11_USAGE_DEFAULT, 0, sample_desc.Count, sample_desc.Quality); - hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); - CHECK(hr == S_OK, "create EFB color temp texture (size: %dx%d; hr=%#x)", m_target_width, - m_target_height, hr); - m_efb.color_temp_tex = new D3DTexture2D( - buf, (D3D11_BIND_FLAG)(D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET), - DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R8G8B8A8_UNORM, - (sample_desc.Count > 1)); - SAFE_RELEASE(buf); - D3D::SetDebugObjectName(m_efb.color_temp_tex->GetTex(), "EFB color temp texture"); - D3D::SetDebugObjectName(m_efb.color_temp_tex->GetSRV(), - "EFB color temp texture shader resource view"); - D3D::SetDebugObjectName(m_efb.color_temp_tex->GetRTV(), - "EFB color temp texture render target view"); - D3D::context->ClearRenderTargetView(m_efb.color_temp_tex->GetRTV(), clear_color.data()); - - // Integer render targets for EFB, used for logic op - CD3D11_RENDER_TARGET_VIEW_DESC int_rtv_desc(m_efb.color_tex->GetTex(), - g_ActiveConfig.iMultisamples > 1 ? - D3D11_RTV_DIMENSION_TEXTURE2DMS : - D3D11_RTV_DIMENSION_TEXTURE2D, - DXGI_FORMAT_R8G8B8A8_UINT); - hr = D3D::device->CreateRenderTargetView(m_efb.color_tex->GetTex(), &int_rtv_desc, - &m_efb.color_int_rtv); - CHECK(hr == S_OK, "create EFB integer RTV(hr=%#x)", hr); - hr = D3D::device->CreateRenderTargetView(m_efb.color_temp_tex->GetTex(), &int_rtv_desc, - &m_efb.color_temp_int_rtv); - CHECK(hr == S_OK, "create EFB integer RTV(hr=%#x)", hr); - - // Render buffer for AccessEFB (color data) - texdesc = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R8G8B8A8_UNORM, 1, 1, 1, 1, D3D11_BIND_RENDER_TARGET); - hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); - CHECK(hr == S_OK, "create EFB color read texture (hr=%#x)", hr); - m_efb.color_read_texture = new D3DTexture2D(buf, D3D11_BIND_RENDER_TARGET); - SAFE_RELEASE(buf); - D3D::SetDebugObjectName(m_efb.color_read_texture->GetTex(), - "EFB color read texture (used in Renderer::AccessEFB)"); - D3D::SetDebugObjectName( - m_efb.color_read_texture->GetRTV(), - "EFB color read texture render target view (used in Renderer::AccessEFB)"); - - // AccessEFB - Sysmem buffer used to retrieve the pixel data from depth_read_texture - texdesc = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R8G8B8A8_UNORM, 1, 1, 1, 1, 0, D3D11_USAGE_STAGING, - D3D11_CPU_ACCESS_READ); - hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &m_efb.color_staging_buf); - CHECK(hr == S_OK, "create EFB color staging buffer (hr=%#x)", hr); - D3D::SetDebugObjectName(m_efb.color_staging_buf, - "EFB color staging texture (used for Renderer::AccessEFB)"); - - // EFB depth buffer - primary depth buffer - texdesc = - CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R32_TYPELESS, m_target_width, m_target_height, m_efb.slices, - 1, D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE, - D3D11_USAGE_DEFAULT, 0, sample_desc.Count, sample_desc.Quality); - hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); - CHECK(hr == S_OK, "create EFB depth texture (size: %dx%d; hr=%#x)", m_target_width, - m_target_height, hr); - m_efb.depth_tex = new D3DTexture2D( - buf, (D3D11_BIND_FLAG)(D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE), - DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_UNKNOWN, (sample_desc.Count > 1)); - SAFE_RELEASE(buf); - D3D::SetDebugObjectName(m_efb.depth_tex->GetTex(), "EFB depth texture"); - D3D::SetDebugObjectName(m_efb.depth_tex->GetDSV(), "EFB depth texture depth stencil view"); - D3D::SetDebugObjectName(m_efb.depth_tex->GetSRV(), "EFB depth texture shader resource view"); - D3D::context->ClearDepthStencilView(m_efb.depth_tex->GetDSV(), D3D11_CLEAR_DEPTH, 0.0f, 0); - - // Render buffer for AccessEFB (depth data) - texdesc = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R32_FLOAT, 1, 1, 1, 1, D3D11_BIND_RENDER_TARGET); - hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); - CHECK(hr == S_OK, "create EFB depth read texture (hr=%#x)", hr); - m_efb.depth_read_texture = new D3DTexture2D(buf, D3D11_BIND_RENDER_TARGET); - SAFE_RELEASE(buf); - D3D::SetDebugObjectName(m_efb.depth_read_texture->GetTex(), - "EFB depth read texture (used in Renderer::AccessEFB)"); - D3D::SetDebugObjectName( - m_efb.depth_read_texture->GetRTV(), - "EFB depth read texture render target view (used in Renderer::AccessEFB)"); - - // AccessEFB - Sysmem buffer used to retrieve the pixel data from depth_read_texture - texdesc = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R32_FLOAT, 1, 1, 1, 1, 0, D3D11_USAGE_STAGING, - D3D11_CPU_ACCESS_READ); - hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &m_efb.depth_staging_buf); - CHECK(hr == S_OK, "create EFB depth staging buffer (hr=%#x)", hr); - D3D::SetDebugObjectName(m_efb.depth_staging_buf, - "EFB depth staging texture (used for Renderer::AccessEFB)"); - - if (g_ActiveConfig.iMultisamples > 1) - { - // Framebuffer resolve textures (color+depth) - texdesc = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R8G8B8A8_UNORM, m_target_width, m_target_height, - m_efb.slices, 1, D3D11_BIND_SHADER_RESOURCE, - D3D11_USAGE_DEFAULT, 0, 1); - hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); - CHECK(hr == S_OK, "create EFB color resolve texture (size: %dx%d; hr=%#x)", m_target_width, - m_target_height, hr); - m_efb.resolved_color_tex = - new D3DTexture2D(buf, D3D11_BIND_SHADER_RESOURCE, DXGI_FORMAT_R8G8B8A8_UNORM); - SAFE_RELEASE(buf); - D3D::SetDebugObjectName(m_efb.resolved_color_tex->GetTex(), "EFB color resolve texture"); - D3D::SetDebugObjectName(m_efb.resolved_color_tex->GetSRV(), - "EFB color resolve texture shader resource view"); - - texdesc = - CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R32_FLOAT, m_target_width, m_target_height, m_efb.slices, - 1, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET); - hr = D3D::device->CreateTexture2D(&texdesc, nullptr, &buf); - CHECK(hr == S_OK, "create EFB depth resolve texture (size: %dx%d; hr=%#x)", m_target_width, - m_target_height, hr); - m_efb.resolved_depth_tex = new D3DTexture2D( - buf, (D3D11_BIND_FLAG)(D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET), - DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_R32_FLOAT); - SAFE_RELEASE(buf); - D3D::SetDebugObjectName(m_efb.resolved_depth_tex->GetTex(), "EFB depth resolve texture"); - D3D::SetDebugObjectName(m_efb.resolved_depth_tex->GetSRV(), - "EFB depth resolve texture shader resource view"); - } - else - { - m_efb.resolved_color_tex = nullptr; - m_efb.resolved_depth_tex = nullptr; - } - s_integer_efb_render_target = false; -} - -FramebufferManager::~FramebufferManager() -{ - SAFE_RELEASE(m_efb.color_tex); - SAFE_RELEASE(m_efb.color_int_rtv); - SAFE_RELEASE(m_efb.color_temp_tex); - SAFE_RELEASE(m_efb.color_temp_int_rtv); - SAFE_RELEASE(m_efb.color_staging_buf); - SAFE_RELEASE(m_efb.color_read_texture); - SAFE_RELEASE(m_efb.resolved_color_tex); - SAFE_RELEASE(m_efb.depth_tex); - SAFE_RELEASE(m_efb.depth_staging_buf); - SAFE_RELEASE(m_efb.depth_read_texture); - SAFE_RELEASE(m_efb.resolved_depth_tex); -} - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/FramebufferManager.h b/Source/Core/VideoBackends/D3D/FramebufferManager.h deleted file mode 100644 index f8767a2c5d..0000000000 --- a/Source/Core/VideoBackends/D3D/FramebufferManager.h +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2009 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "Common/CommonTypes.h" -#include "VideoBackends/D3D/D3DTexture.h" -#include "VideoCommon/FramebufferManagerBase.h" - -namespace DX11 -{ -// On the GameCube, the game sends a request for the graphics processor to -// transfer its internal EFB (Embedded Framebuffer) to an area in GameCube RAM -// called the XFB (External Framebuffer). The size and location of the XFB is -// decided at the time of the copy, and the format is always YUYV. The video -// interface is given a pointer to the XFB, which will be decoded and -// displayed on the TV. -// -// There are two ways for Dolphin to emulate this: -// -// Real XFB mode: -// -// Dolphin will behave like the GameCube and encode the EFB to -// a portion of GameCube RAM. The emulated video interface will decode the data -// for output to the screen. -// -// Advantages: Behaves exactly like the GameCube. -// Disadvantages: Resolution will be limited. -// -// Virtual XFB mode: -// -// When a request is made to copy the EFB to an XFB, Dolphin -// will remember the RAM location and size of the XFB in a Virtual XFB list. -// The video interface will look up the XFB in the list and use the enhanced -// data stored there, if available. -// -// Advantages: Enables high resolution graphics, better than real hardware. -// Disadvantages: If the GameCube CPU writes directly to the XFB (which is -// possible but uncommon), the Virtual XFB will not capture this information. - -// There may be multiple XFBs in GameCube RAM. This is the maximum number to -// virtualize. - -class FramebufferManager : public FramebufferManagerBase -{ -public: - FramebufferManager(int target_width, int target_height); - ~FramebufferManager(); - - static D3DTexture2D*& GetEFBColorTexture(); - static D3DTexture2D*& GetEFBColorReadTexture(); - static ID3D11Texture2D*& GetEFBColorStagingBuffer(); - - static D3DTexture2D*& GetEFBDepthTexture(); - static D3DTexture2D*& GetEFBDepthReadTexture(); - static ID3D11Texture2D*& GetEFBDepthStagingBuffer(); - - static D3DTexture2D*& GetResolvedEFBColorTexture(); - static D3DTexture2D*& GetResolvedEFBDepthTexture(); - - static D3DTexture2D*& GetEFBColorTempTexture() { return m_efb.color_temp_tex; } - static void SwapReinterpretTexture(); - static void SetIntegerEFBRenderTarget(bool enabled); - static void BindEFBRenderTarget(bool bind_depth = true); - -private: - static struct Efb - { - D3DTexture2D* color_tex; - ID3D11RenderTargetView* color_int_rtv; - ID3D11Texture2D* color_staging_buf; - D3DTexture2D* color_read_texture; - - D3DTexture2D* depth_tex; - ID3D11Texture2D* depth_staging_buf; - D3DTexture2D* depth_read_texture; - - D3DTexture2D* color_temp_tex; - ID3D11RenderTargetView* color_temp_int_rtv; - - D3DTexture2D* resolved_color_tex; - D3DTexture2D* resolved_depth_tex; - - int slices; - } m_efb; - - static unsigned int m_target_width; - static unsigned int m_target_height; -}; - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp b/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp deleted file mode 100644 index baa444bf47..0000000000 --- a/Source/Core/VideoBackends/D3D/GeometryShaderCache.cpp +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include - -#include "Common/FileUtil.h" -#include "Common/StringUtil.h" - -#include "Core/ConfigManager.h" - -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DShader.h" -#include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/FramebufferManager.h" -#include "VideoBackends/D3D/GeometryShaderCache.h" - -#include "VideoCommon/Debugger.h" -#include "VideoCommon/GeometryShaderGen.h" -#include "VideoCommon/VideoConfig.h" - -namespace DX11 -{ -ID3D11GeometryShader* ClearGeometryShader = nullptr; -ID3D11GeometryShader* CopyGeometryShader = nullptr; - -ID3D11GeometryShader* GeometryShaderCache::GetClearGeometryShader() -{ - return (g_ActiveConfig.stereo_mode != StereoMode::Off) ? ClearGeometryShader : nullptr; -} -ID3D11GeometryShader* GeometryShaderCache::GetCopyGeometryShader() -{ - return (g_ActiveConfig.stereo_mode != StereoMode::Off) ? CopyGeometryShader : nullptr; -} - -const char clear_shader_code[] = { - "struct VSOUTPUT\n" - "{\n" - " float4 vPosition : POSITION;\n" - " float4 vColor0 : COLOR0;\n" - "};\n" - "struct GSOUTPUT\n" - "{\n" - " float4 vPosition : POSITION;\n" - " float4 vColor0 : COLOR0;\n" - " uint slice : SV_RenderTargetArrayIndex;\n" - "};\n" - "[maxvertexcount(6)]\n" - "void main(triangle VSOUTPUT o[3], inout TriangleStream Output)\n" - "{\n" - "for(int slice = 0; slice < 2; slice++)\n" - "{\n" - " for(int i = 0; i < 3; i++)\n" - " {\n" - " GSOUTPUT OUT;\n" - " OUT.vPosition = o[i].vPosition;\n" - " OUT.vColor0 = o[i].vColor0;\n" - " OUT.slice = slice;\n" - " Output.Append(OUT);\n" - " }\n" - " Output.RestartStrip();\n" - "}\n" - "}\n"}; - -const char copy_shader_code[] = { - "struct VSOUTPUT\n" - "{\n" - " float4 vPosition : POSITION;\n" - " float3 vTexCoord : TEXCOORD0;\n" - "};\n" - "struct GSOUTPUT\n" - "{\n" - " float4 vPosition : POSITION;\n" - " float3 vTexCoord : TEXCOORD0;\n" - " uint slice : SV_RenderTargetArrayIndex;\n" - "};\n" - "[maxvertexcount(6)]\n" - "void main(triangle VSOUTPUT o[3], inout TriangleStream Output)\n" - "{\n" - "for(int slice = 0; slice < 2; slice++)\n" - "{\n" - " for(int i = 0; i < 3; i++)\n" - " {\n" - " GSOUTPUT OUT;\n" - " OUT.vPosition = o[i].vPosition;\n" - " OUT.vTexCoord = o[i].vTexCoord;\n" - " OUT.vTexCoord.z = float(slice);\n" - " OUT.slice = slice;\n" - " Output.Append(OUT);\n" - " }\n" - " Output.RestartStrip();\n" - "}\n" - "}\n"}; - -void GeometryShaderCache::Init() -{ - // used when drawing clear quads - ClearGeometryShader = D3D::CompileAndCreateGeometryShader(clear_shader_code); - CHECK(ClearGeometryShader != nullptr, "Create clear geometry shader"); - D3D::SetDebugObjectName(ClearGeometryShader, "clear geometry shader"); - - // used for buffer copy - CopyGeometryShader = D3D::CompileAndCreateGeometryShader(copy_shader_code); - CHECK(CopyGeometryShader != nullptr, "Create copy geometry shader"); - D3D::SetDebugObjectName(CopyGeometryShader, "copy geometry shader"); -} - -void GeometryShaderCache::Shutdown() -{ - SAFE_RELEASE(ClearGeometryShader); - SAFE_RELEASE(CopyGeometryShader); -} -} // DX11 diff --git a/Source/Core/VideoBackends/D3D/GeometryShaderCache.h b/Source/Core/VideoBackends/D3D/GeometryShaderCache.h deleted file mode 100644 index 38ffde1b51..0000000000 --- a/Source/Core/VideoBackends/D3D/GeometryShaderCache.h +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2014 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "VideoCommon/GeometryShaderGen.h" - -namespace DX11 -{ -class GeometryShaderCache -{ -public: - static void Init(); - static void Shutdown(); - - static ID3D11GeometryShader* GetClearGeometryShader(); - static ID3D11GeometryShader* GetCopyGeometryShader(); - - static ID3D11Buffer* GetConstantBuffer(); - static void UpdateConstantBuffer(const void* data, u32 data_size); -}; - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/NativeVertexFormat.cpp b/Source/Core/VideoBackends/D3D/NativeVertexFormat.cpp index 421a1019bd..4d3407ed04 100644 --- a/Source/Core/VideoBackends/D3D/NativeVertexFormat.cpp +++ b/Source/Core/VideoBackends/D3D/NativeVertexFormat.cpp @@ -5,10 +5,10 @@ #include #include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DBlob.h" #include "VideoBackends/D3D/D3DState.h" +#include "VideoBackends/D3D/DXShader.h" +#include "VideoBackends/D3D/Render.h" #include "VideoBackends/D3D/VertexManager.h" -#include "VideoBackends/D3D/VertexShaderCache.h" #include "VideoCommon/NativeVertexFormat.h" namespace DX11 @@ -16,7 +16,7 @@ namespace DX11 std::mutex s_input_layout_lock; std::unique_ptr -VertexManager::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) +Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) { return std::make_unique(vtx_decl); } @@ -77,11 +77,11 @@ DXGI_FORMAT VarToD3D(VarType t, int size, bool integer) return retval; } -D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& _vtx_decl) -{ - this->vtx_decl = _vtx_decl; +D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& vtx_decl) + : NativeVertexFormat(vtx_decl) - const AttributeFormat* format = &_vtx_decl.position; +{ + const AttributeFormat* format = &vtx_decl.position; if (format->enable) { m_elems[m_num_elems].SemanticName = "POSITION"; @@ -93,7 +93,7 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& _vtx_decl) for (int i = 0; i < 3; i++) { - format = &_vtx_decl.normals[i]; + format = &vtx_decl.normals[i]; if (format->enable) { m_elems[m_num_elems].SemanticName = "NORMAL"; @@ -107,7 +107,7 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& _vtx_decl) for (int i = 0; i < 2; i++) { - format = &_vtx_decl.colors[i]; + format = &vtx_decl.colors[i]; if (format->enable) { m_elems[m_num_elems].SemanticName = "COLOR"; @@ -121,7 +121,7 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& _vtx_decl) for (int i = 0; i < 8; i++) { - format = &_vtx_decl.texcoords[i]; + format = &vtx_decl.texcoords[i]; if (format->enable) { m_elems[m_num_elems].SemanticName = "TEXCOORD"; @@ -133,7 +133,7 @@ D3DVertexFormat::D3DVertexFormat(const PortableVertexDeclaration& _vtx_decl) } } - format = &_vtx_decl.posmtx; + format = &vtx_decl.posmtx; if (format->enable) { m_elems[m_num_elems].SemanticName = "BLENDINDICES"; @@ -150,7 +150,7 @@ D3DVertexFormat::~D3DVertexFormat() SAFE_RELEASE(layout); } -ID3D11InputLayout* D3DVertexFormat::GetInputLayout(D3DBlob* vs_bytecode) +ID3D11InputLayout* D3DVertexFormat::GetInputLayout(const void* vs_bytecode, size_t vs_bytecode_size) { // CreateInputLayout requires a shader input, but it only looks at the signature of the shader, // so we don't need to recompute it if the shader changes. @@ -158,8 +158,8 @@ ID3D11InputLayout* D3DVertexFormat::GetInputLayout(D3DBlob* vs_bytecode) if (layout) return layout; - HRESULT hr = DX11::D3D::device->CreateInputLayout( - m_elems.data(), m_num_elems, vs_bytecode->Data(), vs_bytecode->Size(), &layout); + HRESULT hr = D3D::device->CreateInputLayout(m_elems.data(), m_num_elems, vs_bytecode, + vs_bytecode_size, &layout); if (FAILED(hr)) PanicAlert("Failed to create input layout, %s %d\n", __FILE__, __LINE__); DX11::D3D::SetDebugObjectName(m_layout, "input layout used to emulate the GX pipeline"); diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp deleted file mode 100644 index 231f317488..0000000000 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp +++ /dev/null @@ -1,160 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/D3D/PSTextureEncoder.h" - -#include "Common/Assert.h" -#include "Common/Logging/Log.h" -#include "Core/HW/Memmap.h" -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DShader.h" -#include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/D3DUtil.h" -#include "VideoBackends/D3D/DXTexture.h" -#include "VideoBackends/D3D/FramebufferManager.h" -#include "VideoBackends/D3D/Render.h" -#include "VideoBackends/D3D/TextureCache.h" -#include "VideoBackends/D3D/VertexShaderCache.h" - -#include "VideoCommon/AbstractStagingTexture.h" -#include "VideoCommon/AbstractTexture.h" -#include "VideoCommon/TextureConversionShader.h" -#include "VideoCommon/VideoCommon.h" - -namespace DX11 -{ -struct EFBEncodeParams -{ - s32 SrcLeft; - s32 SrcTop; - u32 DestWidth; - u32 ScaleFactor; - float y_scale; - float gamma_rcp; - float clamp_top; - float clamp_bottom; - float filter_coefficients[3]; - u32 padding; -}; - -PSTextureEncoder::PSTextureEncoder() -{ -} - -PSTextureEncoder::~PSTextureEncoder() = default; - -void PSTextureEncoder::Init() -{ - m_encoding_render_texture = g_renderer->CreateTexture(TextureCache::GetEncodingTextureConfig()); - ASSERT(m_encoding_render_texture); - - // Create constant buffer for uploading data to shaders - D3D11_BUFFER_DESC bd = CD3D11_BUFFER_DESC(sizeof(EFBEncodeParams), D3D11_BIND_CONSTANT_BUFFER); - HRESULT hr = D3D::device->CreateBuffer(&bd, nullptr, &m_encode_params); - CHECK(SUCCEEDED(hr), "create efb encode params buffer"); - D3D::SetDebugObjectName(m_encode_params, "efb encoder params buffer"); -} - -void PSTextureEncoder::Shutdown() -{ - for (auto& it : m_encoding_shaders) - SAFE_RELEASE(it.second); - m_encoding_shaders.clear(); - - SAFE_RELEASE(m_encode_params); -} - -void PSTextureEncoder::Encode( - AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, - float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients) -{ - // Resolve MSAA targets before copying. - // FIXME: Instead of resolving EFB, it would be better to pick out a - // single sample from each pixel. The game may break if it isn't - // expecting the blurred edges around multisampled shapes. - ID3D11ShaderResourceView* pEFB = params.depth ? - FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() : - FramebufferManager::GetResolvedEFBColorTexture()->GetSRV(); - - // Reset API - g_renderer->ResetAPIState(); - - // Set up all the state for EFB encoding - { - const u32 words_per_row = bytes_per_row / sizeof(u32); - - D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, FLOAT(words_per_row), FLOAT(num_blocks_y)); - D3D::context->RSSetViewports(1, &vp); - - constexpr EFBRectangle fullSrcRect(0, 0, EFB_WIDTH, EFB_HEIGHT); - TargetRectangle targetRect = g_renderer->ConvertEFBRectangle(fullSrcRect); - - D3D::context->OMSetRenderTargets( - 1, - &static_cast(m_encoding_render_texture.get())->GetRawTexIdentifier()->GetRTV(), - nullptr); - - EFBEncodeParams encode_params; - encode_params.SrcLeft = src_rect.left; - encode_params.SrcTop = src_rect.top; - encode_params.DestWidth = native_width; - encode_params.ScaleFactor = scale_by_half ? 2 : 1; - encode_params.y_scale = y_scale; - encode_params.gamma_rcp = 1.0f / gamma; - encode_params.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f; - encode_params.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f; - for (size_t i = 0; i < filter_coefficients.size(); i++) - encode_params.filter_coefficients[i] = filter_coefficients[i]; - - D3D::context->UpdateSubresource(m_encode_params, 0, nullptr, &encode_params, 0, 0); - D3D::stateman->SetPixelConstants(m_encode_params); - - // We also linear filtering for both box filtering and downsampling higher resolutions to 1x - // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more - // complex down filtering to average all pixels and produce the correct result. - // Also, box filtering won't be correct for anything other than 1x IR - if (scale_by_half || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f) - D3D::SetLinearCopySampler(); - else - D3D::SetPointCopySampler(); - - D3D::drawShadedTexQuad(pEFB, targetRect.AsRECT(), g_renderer->GetTargetWidth(), - g_renderer->GetTargetHeight(), GetEncodingPixelShader(params), - VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout()); - - // Copy to staging buffer - MathUtil::Rectangle copy_rect(0, 0, words_per_row, num_blocks_y); - dst->CopyFromTexture(m_encoding_render_texture.get(), copy_rect, 0, 0, copy_rect); - } - - g_renderer->RestoreAPIState(); -} - -ID3D11PixelShader* PSTextureEncoder::GetEncodingPixelShader(const EFBCopyParams& params) -{ - auto iter = m_encoding_shaders.find(params); - if (iter != m_encoding_shaders.end()) - return iter->second; - - D3DBlob* bytecode = nullptr; - const char* shader = TextureConversionShaderTiled::GenerateEncodingShader(params, APIType::D3D); - if (!D3D::CompilePixelShader(shader, &bytecode)) - { - PanicAlert("Failed to compile texture encoding shader."); - m_encoding_shaders[params] = nullptr; - return nullptr; - } - - ID3D11PixelShader* newShader; - HRESULT hr = - D3D::device->CreatePixelShader(bytecode->Data(), bytecode->Size(), nullptr, &newShader); - CHECK(SUCCEEDED(hr), "create efb encoder pixel shader"); - - m_encoding_shaders.emplace(params, newShader); - return newShader; -} -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.h b/Source/Core/VideoBackends/D3D/PSTextureEncoder.h deleted file mode 100644 index 43f153c4cf..0000000000 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.h +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2011 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "Common/CommonTypes.h" -#include "VideoCommon/TextureCacheBase.h" -#include "VideoCommon/TextureConversionShader.h" -#include "VideoCommon/VideoCommon.h" - -class AbstractTexture; -class AbstractStagingTexture; - -struct ID3D11Texture2D; -struct ID3D11RenderTargetView; -struct ID3D11Buffer; -struct ID3D11InputLayout; -struct ID3D11VertexShader; -struct ID3D11PixelShader; -struct ID3D11ClassLinkage; -struct ID3D11ClassInstance; -struct ID3D11BlendState; -struct ID3D11DepthStencilState; -struct ID3D11RasterizerState; -struct ID3D11SamplerState; - -namespace DX11 -{ -class PSTextureEncoder final -{ -public: - PSTextureEncoder(); - ~PSTextureEncoder(); - - void Init(); - void Shutdown(); - void Encode(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients); - -private: - ID3D11PixelShader* GetEncodingPixelShader(const EFBCopyParams& params); - - ID3D11Buffer* m_encode_params = nullptr; - std::unique_ptr m_encoding_render_texture; - std::map m_encoding_shaders; -}; -} diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp b/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp deleted file mode 100644 index c86086614f..0000000000 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.cpp +++ /dev/null @@ -1,315 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include - -#include "Common/CommonTypes.h" -#include "Common/FileUtil.h" -#include "Common/MsgHandler.h" -#include "Common/StringUtil.h" - -#include "Core/ConfigManager.h" -#include "Core/Host.h" - -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DShader.h" -#include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/PixelShaderCache.h" - -#include "VideoCommon/Debugger.h" -#include "VideoCommon/PixelShaderGen.h" -#include "VideoCommon/VideoConfig.h" - -namespace DX11 -{ -ID3D11PixelShader* s_ColorCopyProgram[2] = {nullptr}; -ID3D11PixelShader* s_ClearProgram = nullptr; -ID3D11PixelShader* s_AnaglyphProgram = nullptr; -ID3D11PixelShader* s_DepthResolveProgram = nullptr; -ID3D11PixelShader* s_rgba6_to_rgb8[2] = {nullptr}; -ID3D11PixelShader* s_rgb8_to_rgba6[2] = {nullptr}; - -const char clear_program_code[] = {"void main(\n" - "out float4 ocol0 : SV_Target,\n" - "in float4 pos : SV_Position,\n" - "in float4 incol0 : COLOR0){\n" - "ocol0 = incol0;\n" - "}\n"}; - -// TODO: Find some way to avoid having separate shaders for non-MSAA and MSAA... -const char color_copy_program_code[] = {"sampler samp0 : register(s0);\n" - "Texture2DArray Tex0 : register(t0);\n" - "void main(\n" - "out float4 ocol0 : SV_Target,\n" - "in float4 pos : SV_Position,\n" - "in float3 uv0 : TEXCOORD0){\n" - "ocol0 = Tex0.Sample(samp0,uv0);\n" - "}\n"}; - -// Anaglyph Red-Cyan shader based on Dubois algorithm -// Constants taken from the paper: -// "Conversion of a Stereo Pair to Anaglyph with -// the Least-Squares Projection Method" -// Eric Dubois, March 2009 -const char anaglyph_program_code[] = {"sampler samp0 : register(s0);\n" - "Texture2DArray Tex0 : register(t0);\n" - "void main(\n" - "out float4 ocol0 : SV_Target,\n" - "in float4 pos : SV_Position,\n" - "in float3 uv0 : TEXCOORD0){\n" - "float4 c0 = Tex0.Sample(samp0, float3(uv0.xy, 0.0));\n" - "float4 c1 = Tex0.Sample(samp0, float3(uv0.xy, 1.0));\n" - "float3x3 l = float3x3( 0.437, 0.449, 0.164,\n" - " -0.062,-0.062,-0.024,\n" - " -0.048,-0.050,-0.017);\n" - "float3x3 r = float3x3(-0.011,-0.032,-0.007,\n" - " 0.377, 0.761, 0.009,\n" - " -0.026,-0.093, 1.234);\n" - "ocol0 = float4(mul(l, c0.rgb) + mul(r, c1.rgb), c0.a);\n" - "}\n"}; - -// TODO: Improve sampling algorithm! -const char color_copy_program_code_msaa[] = { - "#define SAMPLES %d\n" - "sampler samp0 : register(s0);\n" - "Texture2DMSArray Tex0 : register(t0);\n" - "void main(\n" - "out float4 ocol0 : SV_Target,\n" - "in float4 pos : SV_Position,\n" - "in float3 uv0 : TEXCOORD0){\n" - "int width, height, slices, samples;\n" - "Tex0.GetDimensions(width, height, slices, samples);\n" - "ocol0 = 0;\n" - "for(int i = 0; i < SAMPLES; ++i)\n" - " ocol0 += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n" - "ocol0 /= SAMPLES;\n" - "}\n"}; - -const char depth_resolve_program[] = { - "#define SAMPLES %d\n" - "Texture2DMSArray Tex0 : register(t0);\n" - "void main(\n" - " out float ocol0 : SV_Target,\n" - " in float4 pos : SV_Position,\n" - " in float3 uv0 : TEXCOORD0)\n" - "{\n" - " int width, height, slices, samples;\n" - " Tex0.GetDimensions(width, height, slices, samples);\n" - " ocol0 = Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), 0).x;\n" - " for(int i = 1; i < SAMPLES; ++i)\n" - " ocol0 = min(ocol0, Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i).x);\n" - "}\n"}; - -const char reint_rgba6_to_rgb8[] = {"sampler samp0 : register(s0);\n" - "Texture2DArray Tex0 : register(t0);\n" - "void main(\n" - " out float4 ocol0 : SV_Target,\n" - " in float4 pos : SV_Position,\n" - " in float3 uv0 : TEXCOORD0)\n" - "{\n" - " int4 src6 = round(Tex0.Sample(samp0,uv0) * 63.f);\n" - " int4 dst8;\n" - " dst8.r = (src6.r << 2) | (src6.g >> 4);\n" - " dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n" - " dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n" - " dst8.a = 255;\n" - " ocol0 = (float4)dst8 / 255.f;\n" - "}"}; - -const char reint_rgba6_to_rgb8_msaa[] = { - "#define SAMPLES %d\n" - "sampler samp0 : register(s0);\n" - "Texture2DMSArray Tex0 : register(t0);\n" - "void main(\n" - " out float4 ocol0 : SV_Target,\n" - " in float4 pos : SV_Position,\n" - " in float3 uv0 : TEXCOORD0)\n" - "{\n" - " int width, height, slices, samples;\n" - " Tex0.GetDimensions(width, height, slices, samples);\n" - " float4 texcol = 0;\n" - " for (int i = 0; i < SAMPLES; ++i)\n" - " texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n" - " texcol /= SAMPLES;\n" - " int4 src6 = round(texcol * 63.f);\n" - " int4 dst8;\n" - " dst8.r = (src6.r << 2) | (src6.g >> 4);\n" - " dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n" - " dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n" - " dst8.a = 255;\n" - " ocol0 = (float4)dst8 / 255.f;\n" - "}"}; - -const char reint_rgb8_to_rgba6[] = {"sampler samp0 : register(s0);\n" - "Texture2DArray Tex0 : register(t0);\n" - "void main(\n" - " out float4 ocol0 : SV_Target,\n" - " in float4 pos : SV_Position,\n" - " in float3 uv0 : TEXCOORD0)\n" - "{\n" - " int4 src8 = round(Tex0.Sample(samp0,uv0) * 255.f);\n" - " int4 dst6;\n" - " dst6.r = src8.r >> 2;\n" - " dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n" - " dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n" - " dst6.a = src8.b & 0x3F;\n" - " ocol0 = (float4)dst6 / 63.f;\n" - "}\n"}; - -const char reint_rgb8_to_rgba6_msaa[] = { - "#define SAMPLES %d\n" - "sampler samp0 : register(s0);\n" - "Texture2DMSArray Tex0 : register(t0);\n" - "void main(\n" - " out float4 ocol0 : SV_Target,\n" - " in float4 pos : SV_Position,\n" - " in float3 uv0 : TEXCOORD0)\n" - "{\n" - " int width, height, slices, samples;\n" - " Tex0.GetDimensions(width, height, slices, samples);\n" - " float4 texcol = 0;\n" - " for (int i = 0; i < SAMPLES; ++i)\n" - " texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n" - " texcol /= SAMPLES;\n" - " int4 src8 = round(texcol * 255.f);\n" - " int4 dst6;\n" - " dst6.r = src8.r >> 2;\n" - " dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n" - " dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n" - " dst6.a = src8.b & 0x3F;\n" - " ocol0 = (float4)dst6 / 63.f;\n" - "}\n"}; - -ID3D11PixelShader* PixelShaderCache::ReinterpRGBA6ToRGB8(bool multisampled) -{ - if (!multisampled || g_ActiveConfig.iMultisamples <= 1) - { - if (!s_rgba6_to_rgb8[0]) - { - s_rgba6_to_rgb8[0] = D3D::CompileAndCreatePixelShader(reint_rgba6_to_rgb8); - CHECK(s_rgba6_to_rgb8[0], "Create RGBA6 to RGB8 pixel shader"); - D3D::SetDebugObjectName(s_rgba6_to_rgb8[0], "RGBA6 to RGB8 pixel shader"); - } - return s_rgba6_to_rgb8[0]; - } - else if (!s_rgba6_to_rgb8[1]) - { - // create MSAA shader for current AA mode - std::string buf = StringFromFormat(reint_rgba6_to_rgb8_msaa, g_ActiveConfig.iMultisamples); - s_rgba6_to_rgb8[1] = D3D::CompileAndCreatePixelShader(buf); - - CHECK(s_rgba6_to_rgb8[1], "Create RGBA6 to RGB8 MSAA pixel shader"); - D3D::SetDebugObjectName(s_rgba6_to_rgb8[1], "RGBA6 to RGB8 MSAA pixel shader"); - } - return s_rgba6_to_rgb8[1]; -} - -ID3D11PixelShader* PixelShaderCache::ReinterpRGB8ToRGBA6(bool multisampled) -{ - if (!multisampled || g_ActiveConfig.iMultisamples <= 1) - { - if (!s_rgb8_to_rgba6[0]) - { - s_rgb8_to_rgba6[0] = D3D::CompileAndCreatePixelShader(reint_rgb8_to_rgba6); - CHECK(s_rgb8_to_rgba6[0], "Create RGB8 to RGBA6 pixel shader"); - D3D::SetDebugObjectName(s_rgb8_to_rgba6[0], "RGB8 to RGBA6 pixel shader"); - } - return s_rgb8_to_rgba6[0]; - } - else if (!s_rgb8_to_rgba6[1]) - { - // create MSAA shader for current AA mode - std::string buf = StringFromFormat(reint_rgb8_to_rgba6_msaa, g_ActiveConfig.iMultisamples); - s_rgb8_to_rgba6[1] = D3D::CompileAndCreatePixelShader(buf); - - CHECK(s_rgb8_to_rgba6[1], "Create RGB8 to RGBA6 MSAA pixel shader"); - D3D::SetDebugObjectName(s_rgb8_to_rgba6[1], "RGB8 to RGBA6 MSAA pixel shader"); - } - return s_rgb8_to_rgba6[1]; -} - -ID3D11PixelShader* PixelShaderCache::GetColorCopyProgram(bool multisampled) -{ - if (!multisampled || g_ActiveConfig.iMultisamples <= 1) - { - return s_ColorCopyProgram[0]; - } - else if (s_ColorCopyProgram[1]) - { - return s_ColorCopyProgram[1]; - } - else - { - // create MSAA shader for current AA mode - std::string buf = StringFromFormat(color_copy_program_code_msaa, g_ActiveConfig.iMultisamples); - s_ColorCopyProgram[1] = D3D::CompileAndCreatePixelShader(buf); - CHECK(s_ColorCopyProgram[1] != nullptr, "Create color copy MSAA pixel shader"); - D3D::SetDebugObjectName(s_ColorCopyProgram[1], "color copy MSAA pixel shader"); - return s_ColorCopyProgram[1]; - } -} - -ID3D11PixelShader* PixelShaderCache::GetClearProgram() -{ - return s_ClearProgram; -} - -ID3D11PixelShader* PixelShaderCache::GetAnaglyphProgram() -{ - return s_AnaglyphProgram; -} - -ID3D11PixelShader* PixelShaderCache::GetDepthResolveProgram() -{ - if (s_DepthResolveProgram != nullptr) - return s_DepthResolveProgram; - - // create MSAA shader for current AA mode - std::string buf = StringFromFormat(depth_resolve_program, g_ActiveConfig.iMultisamples); - s_DepthResolveProgram = D3D::CompileAndCreatePixelShader(buf); - CHECK(s_DepthResolveProgram != nullptr, "Create depth matrix MSAA pixel shader"); - D3D::SetDebugObjectName(s_DepthResolveProgram, "depth resolve pixel shader"); - return s_DepthResolveProgram; -} - -void PixelShaderCache::Init() -{ - // used when drawing clear quads - s_ClearProgram = D3D::CompileAndCreatePixelShader(clear_program_code); - CHECK(s_ClearProgram != nullptr, "Create clear pixel shader"); - D3D::SetDebugObjectName(s_ClearProgram, "clear pixel shader"); - - // used for anaglyph stereoscopy - s_AnaglyphProgram = D3D::CompileAndCreatePixelShader(anaglyph_program_code); - CHECK(s_AnaglyphProgram != nullptr, "Create anaglyph pixel shader"); - D3D::SetDebugObjectName(s_AnaglyphProgram, "anaglyph pixel shader"); - - // used when copying/resolving the color buffer - s_ColorCopyProgram[0] = D3D::CompileAndCreatePixelShader(color_copy_program_code); - CHECK(s_ColorCopyProgram[0] != nullptr, "Create color copy pixel shader"); - D3D::SetDebugObjectName(s_ColorCopyProgram[0], "color copy pixel shader"); -} - -// Used in Swap() when AA mode has changed -void PixelShaderCache::InvalidateMSAAShaders() -{ - SAFE_RELEASE(s_ColorCopyProgram[1]); - SAFE_RELEASE(s_rgb8_to_rgba6[1]); - SAFE_RELEASE(s_rgba6_to_rgb8[1]); - SAFE_RELEASE(s_DepthResolveProgram); -} - -void PixelShaderCache::Shutdown() -{ - SAFE_RELEASE(s_ClearProgram); - SAFE_RELEASE(s_AnaglyphProgram); - SAFE_RELEASE(s_DepthResolveProgram); - for (int i = 0; i < 2; ++i) - { - SAFE_RELEASE(s_ColorCopyProgram[i]); - SAFE_RELEASE(s_rgba6_to_rgb8[i]); - SAFE_RELEASE(s_rgb8_to_rgba6[i]); - } -} -} // DX11 diff --git a/Source/Core/VideoBackends/D3D/PixelShaderCache.h b/Source/Core/VideoBackends/D3D/PixelShaderCache.h deleted file mode 100644 index da1b3b720d..0000000000 --- a/Source/Core/VideoBackends/D3D/PixelShaderCache.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "VideoCommon/AsyncShaderCompiler.h" -#include "VideoCommon/PixelShaderGen.h" -#include "VideoCommon/UberShaderPixel.h" - -namespace DX11 -{ -class D3DBlob; - -class PixelShaderCache -{ -public: - static void Init(); - static void Shutdown(); - - static ID3D11PixelShader* GetColorCopyProgram(bool multisampled); - static ID3D11PixelShader* GetClearProgram(); - static ID3D11PixelShader* GetAnaglyphProgram(); - static ID3D11PixelShader* GetDepthResolveProgram(); - static ID3D11PixelShader* ReinterpRGBA6ToRGB8(bool multisampled); - static ID3D11PixelShader* ReinterpRGB8ToRGBA6(bool multisampled); - - static void InvalidateMSAAShaders(); -}; - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/Render.cpp b/Source/Core/VideoBackends/D3D/Render.cpp index 2a82924708..a172425e3f 100644 --- a/Source/Core/VideoBackends/D3D/Render.cpp +++ b/Source/Core/VideoBackends/D3D/Render.cpp @@ -23,33 +23,19 @@ #include "VideoBackends/D3D/BoundingBox.h" #include "VideoBackends/D3D/D3DBase.h" #include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/D3DUtil.h" #include "VideoBackends/D3D/DXPipeline.h" #include "VideoBackends/D3D/DXShader.h" #include "VideoBackends/D3D/DXTexture.h" -#include "VideoBackends/D3D/FramebufferManager.h" -#include "VideoBackends/D3D/GeometryShaderCache.h" -#include "VideoBackends/D3D/PixelShaderCache.h" -#include "VideoBackends/D3D/TextureCache.h" -#include "VideoBackends/D3D/VertexShaderCache.h" #include "VideoCommon/BPFunctions.h" -#include "VideoCommon/OnScreenDisplay.h" -#include "VideoCommon/PixelEngine.h" +#include "VideoCommon/FramebufferManager.h" +#include "VideoCommon/PostProcessing.h" #include "VideoCommon/RenderState.h" -#include "VideoCommon/VideoBackendBase.h" -#include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" #include "VideoCommon/XFMemory.h" namespace DX11 { -// Reserve 512KB for vertices, and 64KB for uniforms. -// This should be sufficient for our usages, and if more is required, -// we split it into multiple draws. -constexpr u32 UTILITY_VBO_SIZE = 512 * 1024; -constexpr u32 UTILITY_UBO_SIZE = 64 * 1024; - // Nvidia stereo blitting struct defined in "nvstereo.h" from the Nvidia SDK typedef struct _Nv_Stereo_Image_Header { @@ -67,118 +53,9 @@ Renderer::Renderer(int backbuffer_width, int backbuffer_height, float backbuffer AbstractTextureFormat::RGBA8) { m_last_fullscreen_state = D3D::GetFullscreenState(); - g_framebuffer_manager = std::make_unique(m_target_width, m_target_height); - SetupDeviceObjects(); - - D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, (float)m_target_width, (float)m_target_height); - D3D::context->RSSetViewports(1, &vp); - FramebufferManager::BindEFBRenderTarget(); - m_current_framebuffer_width = m_target_width; - m_current_framebuffer_height = m_target_height; } -Renderer::~Renderer() -{ - TeardownDeviceObjects(); -} - -void Renderer::SetupDeviceObjects() -{ - HRESULT hr; - - D3D11_DEPTH_STENCIL_DESC ddesc; - ddesc.DepthEnable = FALSE; - ddesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; - ddesc.DepthFunc = D3D11_COMPARISON_ALWAYS; - ddesc.StencilEnable = FALSE; - ddesc.StencilReadMask = D3D11_DEFAULT_STENCIL_READ_MASK; - ddesc.StencilWriteMask = D3D11_DEFAULT_STENCIL_WRITE_MASK; - hr = D3D::device->CreateDepthStencilState(&ddesc, &m_clear_depth_states[0]); - CHECK(hr == S_OK, "Create depth state for Renderer::ClearScreen"); - ddesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL; - ddesc.DepthEnable = TRUE; - hr = D3D::device->CreateDepthStencilState(&ddesc, &m_clear_depth_states[1]); - CHECK(hr == S_OK, "Create depth state for Renderer::ClearScreen"); - ddesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; - hr = D3D::device->CreateDepthStencilState(&ddesc, &m_clear_depth_states[2]); - CHECK(hr == S_OK, "Create depth state for Renderer::ClearScreen"); - D3D::SetDebugObjectName(m_clear_depth_states[0], - "depth state for Renderer::ClearScreen (depth buffer disabled)"); - D3D::SetDebugObjectName( - m_clear_depth_states[1], - "depth state for Renderer::ClearScreen (depth buffer enabled, writing enabled)"); - D3D::SetDebugObjectName( - m_clear_depth_states[2], - "depth state for Renderer::ClearScreen (depth buffer enabled, writing disabled)"); - - D3D11_BLEND_DESC blenddesc; - blenddesc.AlphaToCoverageEnable = FALSE; - blenddesc.IndependentBlendEnable = FALSE; - blenddesc.RenderTarget[0].BlendEnable = FALSE; - blenddesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; - blenddesc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE; - blenddesc.RenderTarget[0].DestBlend = D3D11_BLEND_ZERO; - blenddesc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD; - blenddesc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; - blenddesc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; - blenddesc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; - hr = D3D::device->CreateBlendState(&blenddesc, &m_reset_blend_state); - CHECK(hr == S_OK, "Create blend state for Renderer::ResetAPIState"); - D3D::SetDebugObjectName(m_reset_blend_state, "blend state for Renderer::ResetAPIState"); - - m_clear_blend_states[0] = m_reset_blend_state; - m_reset_blend_state->AddRef(); - - blenddesc.RenderTarget[0].RenderTargetWriteMask = - D3D11_COLOR_WRITE_ENABLE_RED | D3D11_COLOR_WRITE_ENABLE_GREEN | D3D11_COLOR_WRITE_ENABLE_BLUE; - hr = D3D::device->CreateBlendState(&blenddesc, &m_clear_blend_states[1]); - CHECK(hr == S_OK, "Create blend state for Renderer::ClearScreen"); - - blenddesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALPHA; - hr = D3D::device->CreateBlendState(&blenddesc, &m_clear_blend_states[2]); - CHECK(hr == S_OK, "Create blend state for Renderer::ClearScreen"); - - blenddesc.RenderTarget[0].RenderTargetWriteMask = 0; - hr = D3D::device->CreateBlendState(&blenddesc, &m_clear_blend_states[3]); - CHECK(hr == S_OK, "Create blend state for Renderer::ClearScreen"); - - ddesc.DepthEnable = FALSE; - ddesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; - ddesc.DepthFunc = D3D11_COMPARISON_LESS; - ddesc.StencilEnable = FALSE; - ddesc.StencilReadMask = D3D11_DEFAULT_STENCIL_READ_MASK; - ddesc.StencilWriteMask = D3D11_DEFAULT_STENCIL_WRITE_MASK; - hr = D3D::device->CreateDepthStencilState(&ddesc, &m_reset_depth_state); - CHECK(hr == S_OK, "Create depth state for Renderer::ResetAPIState"); - D3D::SetDebugObjectName(m_reset_depth_state, "depth stencil state for Renderer::ResetAPIState"); - - D3D11_RASTERIZER_DESC rastdesc = CD3D11_RASTERIZER_DESC(D3D11_FILL_SOLID, D3D11_CULL_NONE, false, - 0, 0.f, 0.f, false, false, false, false); - hr = D3D::device->CreateRasterizerState(&rastdesc, &m_reset_rast_state); - CHECK(hr == S_OK, "Create rasterizer state for Renderer::ResetAPIState"); - D3D::SetDebugObjectName(m_reset_rast_state, "rasterizer state for Renderer::ResetAPIState"); - - m_screenshot_texture = nullptr; -} - -// Kill off all device objects -void Renderer::TeardownDeviceObjects() -{ - g_framebuffer_manager.reset(); - - SAFE_RELEASE(m_clear_blend_states[0]); - SAFE_RELEASE(m_clear_blend_states[1]); - SAFE_RELEASE(m_clear_blend_states[2]); - SAFE_RELEASE(m_clear_blend_states[3]); - SAFE_RELEASE(m_clear_depth_states[0]); - SAFE_RELEASE(m_clear_depth_states[1]); - SAFE_RELEASE(m_clear_depth_states[2]); - SAFE_RELEASE(m_reset_blend_state); - SAFE_RELEASE(m_reset_depth_state); - SAFE_RELEASE(m_reset_rast_state); - SAFE_RELEASE(m_screenshot_texture); - SAFE_RELEASE(m_3d_vision_texture); -} +Renderer::~Renderer() = default; void Renderer::Create3DVisionTexture(int width, int height) { @@ -200,9 +77,17 @@ void Renderer::Create3DVisionTexture(int width, int height) sys_data.SysMemPitch = pitch; sys_data.pSysMem = memory.get(); - m_3d_vision_texture = - D3DTexture2D::Create(width * 2, height + 1, D3D11_BIND_RENDER_TARGET, D3D11_USAGE_DEFAULT, - DXGI_FORMAT_R8G8B8A8_UNORM, 1, 1, &sys_data); + CD3D11_TEXTURE2D_DESC texture_desc(DXGI_FORMAT_R8G8B8A8_UNORM, width * 2, height + 1, 1, 1, + D3D11_BIND_RENDER_TARGET, D3D11_USAGE_DEFAULT, 0, 1, 0, 0); + ID3D11Texture2D* texture; + HRESULT hr = D3D::device->CreateTexture2D(&texture_desc, &sys_data, &texture); + CHECK(SUCCEEDED(hr), "Create 3D Vision Texture"); + m_3d_vision_texture = std::make_unique(TextureConfig(width * 2, height + 1, 1, 1, 1, + AbstractTextureFormat::RGBA8, + AbstractTextureFlag_RenderTarget), + texture, nullptr, nullptr); + m_3d_vision_framebuffer = + DXFramebuffer::Create(static_cast(m_3d_vision_texture.get()), nullptr); } bool Renderer::IsHeadless() const @@ -212,7 +97,7 @@ bool Renderer::IsHeadless() const std::unique_ptr Renderer::CreateTexture(const TextureConfig& config) { - return std::make_unique(config); + return DXTexture::Create(config); } std::unique_ptr Renderer::CreateStagingTexture(StagingTextureType type, @@ -221,12 +106,11 @@ std::unique_ptr Renderer::CreateStagingTexture(StagingTe return DXStagingTexture::Create(type, config); } -std::unique_ptr -Renderer::CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) +std::unique_ptr Renderer::CreateFramebuffer(AbstractTexture* color_attachment, + AbstractTexture* depth_attachment) { - return DXFramebuffer::Create(static_cast(color_attachment), - static_cast(depth_attachment)); + return DXFramebuffer::Create(static_cast(color_attachment), + static_cast(depth_attachment)); } std::unique_ptr Renderer::CreateShaderFromSource(ShaderStage stage, @@ -249,220 +133,44 @@ std::unique_ptr Renderer::CreatePipeline(const AbstractPipelin void Renderer::SetPipeline(const AbstractPipeline* pipeline) { const DXPipeline* dx_pipeline = static_cast(pipeline); - if (!dx_pipeline) + if (m_current_pipeline == dx_pipeline) return; - D3D::stateman->SetRasterizerState(dx_pipeline->GetRasterizerState()); - D3D::stateman->SetDepthState(dx_pipeline->GetDepthState()); - D3D::stateman->SetBlendState(dx_pipeline->GetBlendState()); - D3D::stateman->SetPrimitiveTopology(dx_pipeline->GetPrimitiveTopology()); - D3D::stateman->SetInputLayout(dx_pipeline->GetInputLayout()); - D3D::stateman->SetVertexShader(dx_pipeline->GetVertexShader()); - D3D::stateman->SetGeometryShader(dx_pipeline->GetGeometryShader()); - D3D::stateman->SetPixelShader(dx_pipeline->GetPixelShader()); -} - -TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc) -{ - TargetRectangle result; - result.left = EFBToScaledX(rc.left); - result.top = EFBToScaledY(rc.top); - result.right = EFBToScaledX(rc.right); - result.bottom = EFBToScaledY(rc.bottom); - return result; + if (dx_pipeline) + { + D3D::stateman->SetRasterizerState(dx_pipeline->GetRasterizerState()); + D3D::stateman->SetDepthState(dx_pipeline->GetDepthState()); + D3D::stateman->SetBlendState(dx_pipeline->GetBlendState()); + D3D::stateman->SetPrimitiveTopology(dx_pipeline->GetPrimitiveTopology()); + D3D::stateman->SetInputLayout(dx_pipeline->GetInputLayout()); + D3D::stateman->SetVertexShader(dx_pipeline->GetVertexShader()); + D3D::stateman->SetGeometryShader(dx_pipeline->GetGeometryShader()); + D3D::stateman->SetPixelShader(dx_pipeline->GetPixelShader()); + D3D::stateman->SetIntegerRTV(dx_pipeline->UseLogicOp()); + } + else + { + // These will be destroyed at pipeline destruction. + D3D::stateman->SetInputLayout(nullptr); + D3D::stateman->SetVertexShader(nullptr); + D3D::stateman->SetGeometryShader(nullptr); + D3D::stateman->SetPixelShader(nullptr); + } } void Renderer::SetScissorRect(const MathUtil::Rectangle& rc) { - const RECT rect = {rc.left, rc.top, rc.right, rc.bottom}; + // TODO: Move to stateman + const CD3D11_RECT rect(rc.left, rc.top, std::max(rc.right, rc.left + 1), + std::max(rc.bottom, rc.top + 1)); D3D::context->RSSetScissorRects(1, &rect); } -// This function allows the CPU to directly access the EFB. -// There are EFB peeks (which will read the color or depth of a pixel) -// and EFB pokes (which will change the color or depth of a pixel). -// -// The behavior of EFB peeks can only be modified by: -// - GX_PokeAlphaRead -// The behavior of EFB pokes can be modified by: -// - GX_PokeAlphaMode (TODO) -// - GX_PokeAlphaUpdate (TODO) -// - GX_PokeBlendMode (TODO) -// - GX_PokeColorUpdate (TODO) -// - GX_PokeDither (TODO) -// - GX_PokeDstAlpha (TODO) -// - GX_PokeZMode (TODO) -u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) -{ - // Convert EFB dimensions to the ones of our render target - EFBRectangle efbPixelRc; - efbPixelRc.left = x; - efbPixelRc.top = y; - efbPixelRc.right = x + 1; - efbPixelRc.bottom = y + 1; - TargetRectangle targetPixelRc = Renderer::ConvertEFBRectangle(efbPixelRc); - - // Take the mean of the resulting dimensions; TODO: Don't use the center pixel, compute the - // average color instead - D3D11_RECT RectToLock; - if (type == EFBAccessType::PeekColor || type == EFBAccessType::PeekZ) - { - RectToLock.left = (targetPixelRc.left + targetPixelRc.right) / 2; - RectToLock.top = (targetPixelRc.top + targetPixelRc.bottom) / 2; - RectToLock.right = RectToLock.left + 1; - RectToLock.bottom = RectToLock.top + 1; - } - else - { - RectToLock.left = targetPixelRc.left; - RectToLock.right = targetPixelRc.right; - RectToLock.top = targetPixelRc.top; - RectToLock.bottom = targetPixelRc.bottom; - } - - // Reset any game specific settings. - ResetAPIState(); - D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, 1.f, 1.f); - D3D::context->RSSetViewports(1, &vp); - D3D::SetPointCopySampler(); - - // Select copy and read textures depending on if we are doing a color or depth read (since they - // are different formats). - D3DTexture2D* source_tex; - D3DTexture2D* read_tex; - ID3D11Texture2D* staging_tex; - if (type == EFBAccessType::PeekColor) - { - source_tex = FramebufferManager::GetEFBColorTexture(); - read_tex = FramebufferManager::GetEFBColorReadTexture(); - staging_tex = FramebufferManager::GetEFBColorStagingBuffer(); - } - else - { - source_tex = FramebufferManager::GetEFBDepthTexture(); - read_tex = FramebufferManager::GetEFBDepthReadTexture(); - staging_tex = FramebufferManager::GetEFBDepthStagingBuffer(); - } - - // Select pixel shader (we don't want to average depth samples, instead select the minimum). - ID3D11PixelShader* copy_pixel_shader; - if (type == EFBAccessType::PeekZ && g_ActiveConfig.iMultisamples > 1) - copy_pixel_shader = PixelShaderCache::GetDepthResolveProgram(); - else - copy_pixel_shader = PixelShaderCache::GetColorCopyProgram(true); - - // Draw a quad to grab the texel we want to read. - D3D::context->OMSetRenderTargets(1, &read_tex->GetRTV(), nullptr); - D3D::drawShadedTexQuad(source_tex->GetSRV(), &RectToLock, Renderer::GetTargetWidth(), - Renderer::GetTargetHeight(), copy_pixel_shader, - VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout()); - - // Restore expected game state. - RestoreAPIState(); - - // Copy the pixel from the renderable to cpu-readable buffer. - D3D11_BOX box = CD3D11_BOX(0, 0, 0, 1, 1, 1); - D3D::context->CopySubresourceRegion(staging_tex, 0, 0, 0, 0, read_tex->GetTex(), 0, &box); - D3D11_MAPPED_SUBRESOURCE map; - CHECK(D3D::context->Map(staging_tex, 0, D3D11_MAP_READ, 0, &map) == S_OK, - "Map staging buffer failed"); - - // Convert the framebuffer data to the format the game is expecting to receive. - u32 ret; - if (type == EFBAccessType::PeekColor) - { - u32 val; - memcpy(&val, map.pData, sizeof(val)); - - // our buffers are RGBA, yet a BGRA value is expected - val = ((val & 0xFF00FF00) | ((val >> 16) & 0xFF) | ((val << 16) & 0xFF0000)); - - // check what to do with the alpha channel (GX_PokeAlphaRead) - PixelEngine::UPEAlphaReadReg alpha_read_mode = PixelEngine::GetAlphaReadMode(); - - if (bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24) - { - val = RGBA8ToRGBA6ToRGBA8(val); - } - else if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) - { - val = RGBA8ToRGB565ToRGBA8(val); - } - if (bpmem.zcontrol.pixel_format != PEControl::RGBA6_Z24) - { - val |= 0xFF000000; - } - - if (alpha_read_mode.ReadMode == 2) - ret = val; // GX_READ_NONE - else if (alpha_read_mode.ReadMode == 1) - ret = (val | 0xFF000000); // GX_READ_FF - else /*if(alpha_read_mode.ReadMode == 0)*/ - ret = (val & 0x00FFFFFF); // GX_READ_00 - } - else // type == EFBAccessType::PeekZ - { - float val; - memcpy(&val, map.pData, sizeof(val)); - - // depth buffer is inverted in the d3d backend - val = 1.0f - val; - - if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) - { - // if Z is in 16 bit format you must return a 16 bit integer - ret = MathUtil::Clamp(static_cast(val * 65536.0f), 0, 0xFFFF); - } - else - { - ret = MathUtil::Clamp(static_cast(val * 16777216.0f), 0, 0xFFFFFF); - } - } - - D3D::context->Unmap(staging_tex, 0); - return ret; -} - -void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) -{ - ResetAPIState(); - - if (type == EFBAccessType::PokeColor) - { - D3D11_VIEWPORT vp = - CD3D11_VIEWPORT(0.0f, 0.0f, (float)GetTargetWidth(), (float)GetTargetHeight()); - D3D::context->RSSetViewports(1, &vp); - } - else // if (type == EFBAccessType::PokeZ) - { - D3D::stateman->SetBlendState(m_clear_blend_states[3]); - D3D::stateman->SetDepthState(m_clear_depth_states[1]); - - D3D11_VIEWPORT vp = - CD3D11_VIEWPORT(0.0f, 0.0f, (float)GetTargetWidth(), (float)GetTargetHeight()); - - D3D::context->RSSetViewports(1, &vp); - } - - D3D::DrawEFBPokeQuads(type, points, num_points); - - RestoreAPIState(); -} - void Renderer::SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) { - // In D3D, the viewport rectangle must fit within the render target. - D3D11_VIEWPORT vp; - vp.TopLeftX = MathUtil::Clamp(x, 0.0f, static_cast(m_current_framebuffer_width - 1)); - vp.TopLeftY = MathUtil::Clamp(y, 0.0f, static_cast(m_current_framebuffer_height - 1)); - vp.Width = - MathUtil::Clamp(width, 1.0f, static_cast(m_current_framebuffer_width) - vp.TopLeftX); - vp.Height = - MathUtil::Clamp(height, 1.0f, static_cast(m_current_framebuffer_height) - vp.TopLeftY); - vp.MinDepth = near_depth; - vp.MaxDepth = far_depth; + // TODO: Move to stateman + const CD3D11_VIEWPORT vp(x, y, width, height, near_depth, far_depth); D3D::context->RSSetViewports(1, &vp); } @@ -478,89 +186,19 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) D3D::context->DrawIndexed(num_indices, base_index, base_vertex); } -void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, - u32 color, u32 z) +void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, + u32 groups_z) { - ResetAPIState(); - - if (colorEnable && alphaEnable) - D3D::stateman->SetBlendState(m_clear_blend_states[0]); - else if (colorEnable) - D3D::stateman->SetBlendState(m_clear_blend_states[1]); - else if (alphaEnable) - D3D::stateman->SetBlendState(m_clear_blend_states[2]); - else - D3D::stateman->SetBlendState(m_clear_blend_states[3]); - - // TODO: Should we enable Z testing here? - // if (!bpmem.zmode.testenable) D3D::stateman->PushDepthState(s_clear_depth_states[0]); - // else - if (zEnable) - D3D::stateman->SetDepthState(m_clear_depth_states[1]); - else /*if (!zEnable)*/ - D3D::stateman->SetDepthState(m_clear_depth_states[2]); - - // Update the view port for clearing the picture - TargetRectangle targetRc = Renderer::ConvertEFBRectangle(rc); - D3D11_VIEWPORT vp = - CD3D11_VIEWPORT((float)targetRc.left, (float)targetRc.top, (float)targetRc.GetWidth(), - (float)targetRc.GetHeight(), 0.f, 1.f); - D3D::context->RSSetViewports(1, &vp); - FramebufferManager::SetIntegerEFBRenderTarget(false); - - // Color is passed in bgra mode so we need to convert it to rgba - u32 rgbaColor = (color & 0xFF00FF00) | ((color >> 16) & 0xFF) | ((color << 16) & 0xFF0000); - D3D::drawClearQuad(rgbaColor, 1.0f - (z & 0xFFFFFF) / 16777216.0f); - - RestoreAPIState(); -} - -void Renderer::ReinterpretPixelData(unsigned int convtype) -{ - // TODO: MSAA support.. - D3D11_RECT source = CD3D11_RECT(0, 0, GetTargetWidth(), GetTargetHeight()); - - ID3D11PixelShader* pixel_shader; - if (convtype == 0) - pixel_shader = PixelShaderCache::ReinterpRGB8ToRGBA6(true); - else if (convtype == 2) - pixel_shader = PixelShaderCache::ReinterpRGBA6ToRGB8(true); - else - { - ERROR_LOG(VIDEO, "Trying to reinterpret pixel data with unsupported conversion type %d", - convtype); - return; - } - - // convert data and set the target texture as our new EFB - ResetAPIState(); - - D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, static_cast(GetTargetWidth()), - static_cast(GetTargetHeight())); - D3D::context->RSSetViewports(1, &vp); - - D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTempTexture()->GetRTV(), - nullptr); - D3D::SetPointCopySampler(); - D3D::drawShadedTexQuad( - FramebufferManager::GetEFBColorTexture()->GetSRV(), &source, GetTargetWidth(), - GetTargetHeight(), pixel_shader, VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader()); - - FramebufferManager::SwapReinterpretTexture(); - RestoreAPIState(); + D3D::stateman->SetComputeShader(static_cast(shader)->GetD3DComputeShader()); + D3D::stateman->SyncComputeBindings(); + D3D::context->Dispatch(groups_x, groups_y, groups_z); } void Renderer::BindBackbuffer(const ClearColor& clear_color) { CheckForSurfaceChange(); CheckForSurfaceResize(); - - D3D::context->OMSetRenderTargets(1, &D3D::GetBackBuffer()->GetRTV(), nullptr); - D3D::context->ClearRenderTargetView(D3D::GetBackBuffer()->GetRTV(), clear_color.data()); - m_current_framebuffer = nullptr; - m_current_framebuffer_width = m_backbuffer_width; - m_current_framebuffer_height = m_backbuffer_height; + SetAndClearFramebuffer(D3D::GetSwapChainFramebuffer(), clear_color); } void Renderer::PresentBackbuffer() @@ -570,14 +208,6 @@ void Renderer::PresentBackbuffer() void Renderer::OnConfigChanged(u32 bits) { - // Resize the back buffers NOW to avoid flickering - if (bits & (CONFIG_CHANGE_BIT_TARGET_SIZE | CONFIG_CHANGE_BIT_MULTISAMPLES | - CONFIG_CHANGE_BIT_STEREO_MODE)) - { - PixelShaderCache::InvalidateMSAAShaders(); - g_framebuffer_manager.reset(); - g_framebuffer_manager = std::make_unique(m_target_width, m_target_height); - } } void Renderer::CheckForSurfaceChange() @@ -585,8 +215,8 @@ void Renderer::CheckForSurfaceChange() if (!m_surface_changed.TestAndClear()) return; - SAFE_RELEASE(m_screenshot_texture); - SAFE_RELEASE(m_3d_vision_texture); + m_3d_vision_framebuffer.reset(); + m_3d_vision_texture.reset(); D3D::Reset(reinterpret_cast(m_new_surface_handle)); m_new_surface_handle = nullptr; @@ -601,8 +231,9 @@ void Renderer::CheckForSurfaceResize() if (!m_surface_resized.TestAndClear() && !exclusive_fullscreen_changed) return; - SAFE_RELEASE(m_screenshot_texture); - SAFE_RELEASE(m_3d_vision_texture); + m_3d_vision_framebuffer.reset(); + m_3d_vision_texture.reset(); + m_last_fullscreen_state = fullscreen_state; if (D3D::swapchain) D3D::ResizeSwapChain(); @@ -625,43 +256,38 @@ void Renderer::UpdateBackbufferSize() } } -// ALWAYS call RestoreAPIState for each ResetAPIState call you're doing -void Renderer::ResetAPIState() +void Renderer::SetFramebuffer(AbstractFramebuffer* framebuffer) { - D3D::stateman->SetBlendState(m_reset_blend_state); - D3D::stateman->SetDepthState(m_reset_depth_state); - D3D::stateman->SetRasterizerState(m_reset_rast_state); -} + if (m_current_framebuffer == framebuffer) + return; -void Renderer::RestoreAPIState() -{ - // Gets us back into a more game-like state. - m_current_framebuffer = nullptr; - m_current_framebuffer_width = m_target_width; - m_current_framebuffer_height = m_target_height; - FramebufferManager::BindEFBRenderTarget(); - BPFunctions::SetViewport(); - BPFunctions::SetScissor(); -} + // We can't leave the framebuffer bound as a texture and a render target. + DXFramebuffer* fb = static_cast(framebuffer); + if ((fb->GetColorAttachment() && + D3D::stateman->UnsetTexture( + static_cast(fb->GetColorAttachment())->GetD3DSRV()) != 0) || + (fb->GetDepthAttachment() && + D3D::stateman->UnsetTexture( + static_cast(fb->GetDepthAttachment())->GetD3DSRV()) != 0)) + { + D3D::stateman->ApplyTextures(); + } -void Renderer::SetFramebuffer(const AbstractFramebuffer* framebuffer) -{ - const DXFramebuffer* fb = static_cast(framebuffer); - D3D::context->OMSetRenderTargets(fb->GetNumRTVs(), fb->GetRTVArray(), fb->GetDSV()); + D3D::stateman->SetFramebuffer(fb); m_current_framebuffer = fb; - m_current_framebuffer_width = fb->GetWidth(); - m_current_framebuffer_height = fb->GetHeight(); } -void Renderer::SetAndDiscardFramebuffer(const AbstractFramebuffer* framebuffer) +void Renderer::SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) { SetFramebuffer(framebuffer); } -void Renderer::SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, +void Renderer::SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value, float depth_value) { SetFramebuffer(framebuffer); + D3D::stateman->Apply(); + if (framebuffer->GetColorFormat() != AbstractTextureFormat::Undefined) { D3D::context->ClearRenderTargetView( @@ -676,9 +302,8 @@ void Renderer::SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, void Renderer::SetTexture(u32 index, const AbstractTexture* texture) { - D3D::stateman->SetTexture( - index, - texture ? static_cast(texture)->GetRawTexIdentifier()->GetSRV() : nullptr); + D3D::stateman->SetTexture(index, texture ? static_cast(texture)->GetD3DSRV() : + nullptr); } void Renderer::SetSamplerState(u32 index, const SamplerState& state) @@ -686,15 +311,15 @@ void Renderer::SetSamplerState(u32 index, const SamplerState& state) D3D::stateman->SetSampler(index, m_state_cache.Get(state)); } -void Renderer::UnbindTexture(const AbstractTexture* texture) +void Renderer::SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) { - D3D::stateman->UnsetTexture( - static_cast(texture)->GetRawTexIdentifier()->GetSRV()); + D3D::stateman->SetComputeUAV(texture ? static_cast(texture)->GetD3DUAV() : nullptr); } -void Renderer::SetInterlacingMode() +void Renderer::UnbindTexture(const AbstractTexture* texture) { - // TODO + if (D3D::stateman->UnsetTexture(static_cast(texture)->GetD3DSRV()) != 0) + D3D::stateman->ApplyTextures(); } u16 Renderer::BBoxRead(int index) @@ -736,93 +361,43 @@ void Renderer::BBoxWrite(int index, u16 _value) BBox::Set(index, value); } +void Renderer::Flush() +{ + D3D::context->Flush(); +} + +void Renderer::WaitForGPUIdle() +{ + // There is no glFinish() equivalent in D3D. + D3D::context->Flush(); +} + void Renderer::RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) { - const CD3D11_RECT source_rc(rc.left, rc.top, rc.right, rc.bottom); - const TargetRectangle target_rc = GetTargetRectangle(); + if (g_ActiveConfig.stereo_mode != StereoMode::Nvidia3DVision) + return ::Renderer::RenderXFBToScreen(texture, rc); - // activate linear filtering for the buffer copies - D3D::SetLinearCopySampler(); + if (!m_3d_vision_texture) + Create3DVisionTexture(m_backbuffer_width, m_backbuffer_height); - if (g_ActiveConfig.stereo_mode == StereoMode::SBS || - g_ActiveConfig.stereo_mode == StereoMode::TAB) - { - TargetRectangle left_rc, right_rc; - std::tie(left_rc, right_rc) = ConvertStereoRectangle(target_rc); + // Render to staging texture which is double the width of the backbuffer + SetAndClearFramebuffer(m_3d_vision_framebuffer.get()); - SetViewport(static_cast(left_rc.left), static_cast(left_rc.top), - static_cast(left_rc.GetWidth()), static_cast(right_rc.GetHeight()), - 0.0f, 1.0f); - D3D::drawShadedTexQuad(static_cast(texture)->GetRawTexIdentifier()->GetSRV(), - &source_rc, texture->GetWidth(), texture->GetHeight(), - PixelShaderCache::GetColorCopyProgram(false), - VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), nullptr, 0); + const auto target_rc = GetTargetRectangle(); + m_post_processor->BlitFromTexture(target_rc, rc, texture, 0); + m_post_processor->BlitFromTexture( + MathUtil::Rectangle(target_rc.left + m_backbuffer_width, target_rc.top, + target_rc.right + m_backbuffer_width, target_rc.bottom), + rc, texture, 1); - SetViewport(static_cast(right_rc.left), static_cast(right_rc.top), - static_cast(right_rc.GetWidth()), static_cast(right_rc.GetHeight()), - 0.0f, 1.0f); - D3D::drawShadedTexQuad(static_cast(texture)->GetRawTexIdentifier()->GetSRV(), - &source_rc, texture->GetWidth(), texture->GetHeight(), - PixelShaderCache::GetColorCopyProgram(false), - VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), nullptr, 1); - } - else if (g_ActiveConfig.stereo_mode == StereoMode::Nvidia3DVision) - { - if (!m_3d_vision_texture) - Create3DVisionTexture(m_backbuffer_width, m_backbuffer_height); + // Copy the left eye to the backbuffer, if Nvidia 3D Vision is enabled it should + // recognize the signature and automatically include the right eye frame. + const CD3D11_BOX box(0, 0, 0, m_backbuffer_width, m_backbuffer_height, 1); + D3D::context->CopySubresourceRegion(D3D::GetSwapChainTexture()->GetD3DTexture(), 0, 0, 0, 0, + m_3d_vision_texture->GetD3DTexture(), 0, &box); - const CD3D11_VIEWPORT left_vp( - static_cast(target_rc.left), static_cast(target_rc.top), - static_cast(target_rc.GetWidth()), static_cast(target_rc.GetHeight())); - const CD3D11_VIEWPORT right_vp( - static_cast(target_rc.left + m_backbuffer_width), static_cast(target_rc.top), - static_cast(target_rc.GetWidth()), static_cast(target_rc.GetHeight())); - - // Render to staging texture which is double the width of the backbuffer - D3D::context->OMSetRenderTargets(1, &m_3d_vision_texture->GetRTV(), nullptr); - - D3D::context->RSSetViewports(1, &left_vp); - D3D::drawShadedTexQuad(static_cast(texture)->GetRawTexIdentifier()->GetSRV(), - &source_rc, texture->GetWidth(), texture->GetHeight(), - PixelShaderCache::GetColorCopyProgram(false), - VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), nullptr, 0); - - D3D::context->RSSetViewports(1, &right_vp); - D3D::drawShadedTexQuad(static_cast(texture)->GetRawTexIdentifier()->GetSRV(), - &source_rc, texture->GetWidth(), texture->GetHeight(), - PixelShaderCache::GetColorCopyProgram(false), - VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), nullptr, 1); - - // Copy the left eye to the backbuffer, if Nvidia 3D Vision is enabled it should - // recognize the signature and automatically include the right eye frame. - const CD3D11_BOX box(0, 0, 0, m_backbuffer_width, m_backbuffer_height, 1); - D3D::context->CopySubresourceRegion(D3D::GetBackBuffer()->GetTex(), 0, 0, 0, 0, - m_3d_vision_texture->GetTex(), 0, &box); - - // Restore render target to backbuffer - D3D::context->OMSetRenderTargets(1, &D3D::GetBackBuffer()->GetRTV(), nullptr); - } - else - { - SetViewport(static_cast(target_rc.left), static_cast(target_rc.top), - static_cast(target_rc.GetWidth()), static_cast(target_rc.GetHeight()), - 0.0f, 1.0f); - - ID3D11PixelShader* pixelShader = (g_Config.stereo_mode == StereoMode::Anaglyph) ? - PixelShaderCache::GetAnaglyphProgram() : - PixelShaderCache::GetColorCopyProgram(false); - ID3D11GeometryShader* geomShader = (g_ActiveConfig.stereo_mode == StereoMode::QuadBuffer) ? - GeometryShaderCache::GetCopyGeometryShader() : - nullptr; - D3D::drawShadedTexQuad(static_cast(texture)->GetRawTexIdentifier()->GetSRV(), - &source_rc, texture->GetWidth(), texture->GetHeight(), pixelShader, - VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), geomShader); - } + // Restore render target to backbuffer + SetFramebuffer(D3D::GetSwapChainFramebuffer()); } void Renderer::SetFullscreen(bool enable_fullscreen) diff --git a/Source/Core/VideoBackends/D3D/Render.h b/Source/Core/VideoBackends/D3D/Render.h index 399babffad..0f9b38761e 100644 --- a/Source/Core/VideoBackends/D3D/Render.h +++ b/Source/Core/VideoBackends/D3D/Render.h @@ -9,11 +9,10 @@ #include "VideoBackends/D3D/D3DState.h" #include "VideoCommon/RenderBase.h" -enum class EFBAccessType; - namespace DX11 { -class D3DTexture2D; +class DXTexture; +class DXFramebuffer; class Renderer : public ::Renderer { @@ -32,53 +31,43 @@ public: size_t length) override; std::unique_ptr CreateShaderFromBinary(ShaderStage stage, const void* data, size_t length) override; + std::unique_ptr + CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; std::unique_ptr CreatePipeline(const AbstractPipelineConfig& config) override; std::unique_ptr - CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) override; + CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override; void SetPipeline(const AbstractPipeline* pipeline) override; - void SetFramebuffer(const AbstractFramebuffer* framebuffer) override; - void SetAndDiscardFramebuffer(const AbstractFramebuffer* framebuffer) override; - void SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, - const ClearColor& color_value = {}, + void SetFramebuffer(AbstractFramebuffer* framebuffer) override; + void SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) override; + void SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value = {}, float depth_value = 0.0f) override; void SetScissorRect(const MathUtil::Rectangle& rc) override; void SetTexture(u32 index, const AbstractTexture* texture) override; void SetSamplerState(u32 index, const SamplerState& state) override; + void SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) override; void UnbindTexture(const AbstractTexture* texture) override; - void SetInterlacingMode() override; void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) override; void Draw(u32 base_vertex, u32 num_vertices) override; void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; + void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, + u32 groups_z) override; void BindBackbuffer(const ClearColor& clear_color = {}) override; void PresentBackbuffer() override; void SetFullscreen(bool enable_fullscreen) override; bool IsFullscreen() const override; - u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override; - void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override; - u16 BBoxRead(int index) override; void BBoxWrite(int index, u16 value) override; - void ResetAPIState() override; - void RestoreAPIState() override; - - TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; + void Flush() override; + void WaitForGPUIdle() override; void RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) override; void OnConfigChanged(u32 bits) override; - void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, - u32 color, u32 z) override; - - void ReinterpretPixelData(unsigned int convtype) override; - private: - void SetupDeviceObjects(); - void TeardownDeviceObjects(); void Create3DVisionTexture(int width, int height); void CheckForSurfaceChange(); void CheckForSurfaceResize(); @@ -86,15 +75,9 @@ private: StateCache m_state_cache; - std::array m_clear_blend_states{}; - std::array m_clear_depth_states{}; - ID3D11BlendState* m_reset_blend_state = nullptr; - ID3D11DepthStencilState* m_reset_depth_state = nullptr; - ID3D11RasterizerState* m_reset_rast_state = nullptr; - - ID3D11Texture2D* m_screenshot_texture = nullptr; - D3DTexture2D* m_3d_vision_texture = nullptr; + std::unique_ptr m_3d_vision_texture; + std::unique_ptr m_3d_vision_framebuffer; bool m_last_fullscreen_state = false; }; -} +} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/TextureCache.cpp b/Source/Core/VideoBackends/D3D/TextureCache.cpp deleted file mode 100644 index f17d12f4c3..0000000000 --- a/Source/Core/VideoBackends/D3D/TextureCache.cpp +++ /dev/null @@ -1,318 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/D3D/TextureCache.h" - -#include -#include - -#include "Common/CommonTypes.h" -#include "Common/Logging/Log.h" - -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DShader.h" -#include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/D3DTexture.h" -#include "VideoBackends/D3D/D3DUtil.h" -#include "VideoBackends/D3D/DXTexture.h" -#include "VideoBackends/D3D/FramebufferManager.h" -#include "VideoBackends/D3D/GeometryShaderCache.h" -#include "VideoBackends/D3D/PSTextureEncoder.h" -#include "VideoBackends/D3D/PixelShaderCache.h" -#include "VideoBackends/D3D/VertexShaderCache.h" - -#include "VideoCommon/ImageWrite.h" -#include "VideoCommon/RenderBase.h" -#include "VideoCommon/TextureConfig.h" -#include "VideoCommon/VideoConfig.h" - -namespace DX11 -{ -static std::unique_ptr g_encoder; - -void TextureCache::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, - u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half, float y_scale, - float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) -{ - g_encoder->Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect, - scale_by_half, y_scale, gamma, clamp_top, clamp_bottom, filter_coefficients); -} - -const char palette_shader[] = - R"HLSL( -sampler samp0 : register(s0); -Texture2DArray Tex0 : register(t0); -Buffer Tex1 : register(t1); -uniform float Multiply; - -uint Convert3To8(uint v) -{ - // Swizzle bits: 00000123 -> 12312312 - return (v << 5) | (v << 2) | (v >> 1); -} - -uint Convert4To8(uint v) -{ - // Swizzle bits: 00001234 -> 12341234 - return (v << 4) | v; -} - -uint Convert5To8(uint v) -{ - // Swizzle bits: 00012345 -> 12345123 - return (v << 3) | (v >> 2); -} - -uint Convert6To8(uint v) -{ - // Swizzle bits: 00123456 -> 12345612 - return (v << 2) | (v >> 4); -} - -float4 DecodePixel_RGB5A3(uint val) -{ - int r,g,b,a; - if ((val&0x8000)) - { - r=Convert5To8((val>>10) & 0x1f); - g=Convert5To8((val>>5 ) & 0x1f); - b=Convert5To8((val ) & 0x1f); - a=0xFF; - } - else - { - a=Convert3To8((val>>12) & 0x7); - r=Convert4To8((val>>8 ) & 0xf); - g=Convert4To8((val>>4 ) & 0xf); - b=Convert4To8((val ) & 0xf); - } - return float4(r, g, b, a) / 255; -} - -float4 DecodePixel_RGB565(uint val) -{ - int r, g, b, a; - r = Convert5To8((val >> 11) & 0x1f); - g = Convert6To8((val >> 5) & 0x3f); - b = Convert5To8((val) & 0x1f); - a = 0xFF; - return float4(r, g, b, a) / 255; -} - -float4 DecodePixel_IA8(uint val) -{ - int i = val & 0xFF; - int a = val >> 8; - return float4(i, i, i, a) / 255; -} - -void main( - out float4 ocol0 : SV_Target, - in float4 pos : SV_Position, - in float3 uv0 : TEXCOORD0) -{ - uint src = round(Tex0.Sample(samp0,uv0) * Multiply).r; - src = Tex1.Load(src); - src = ((src << 8) & 0xFF00) | (src >> 8); - ocol0 = DECODE(src); -} -)HLSL"; - -void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source, - const void* palette, TLUTFormat format) -{ - DXTexture* source_texture = static_cast(source->texture.get()); - DXTexture* destination_texture = static_cast(destination->texture.get()); - g_renderer->ResetAPIState(); - - // stretch picture with increased internal resolution - const D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.f, 0.f, static_cast(source->GetWidth()), - static_cast(source->GetHeight())); - D3D::context->RSSetViewports(1, &vp); - - D3D11_BOX box{0, 0, 0, 512, 1, 1}; - D3D::context->UpdateSubresource(palette_buf, 0, &box, palette, 0, 0); - - D3D::stateman->SetTexture(1, palette_buf_srv); - - // TODO: Add support for C14X2 format. (Different multiplier, more palette entries.) - float params[8] = {source->format == TextureFormat::I4 ? 15.f : 255.f}; - D3D::context->UpdateSubresource(uniform_buffer, 0, nullptr, ¶ms, 0, 0); - D3D::stateman->SetPixelConstants(uniform_buffer); - - const D3D11_RECT sourcerect = CD3D11_RECT(0, 0, source->GetWidth(), source->GetHeight()); - - D3D::SetPointCopySampler(); - - // Make sure we don't draw with the texture set as both a source and target. - // (This can happen because we don't unbind textures when we free them.) - D3D::stateman->UnsetTexture(destination_texture->GetRawTexIdentifier()->GetSRV()); - D3D::stateman->Apply(); - - D3D::context->OMSetRenderTargets(1, &destination_texture->GetRawTexIdentifier()->GetRTV(), - nullptr); - - // Create texture copy - D3D::drawShadedTexQuad( - source_texture->GetRawTexIdentifier()->GetSRV(), &sourcerect, source->GetWidth(), - source->GetHeight(), palette_pixel_shader[static_cast(format)], - VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout(), - GeometryShaderCache::GetCopyGeometryShader()); - - g_renderer->RestoreAPIState(); -} - -ID3D11PixelShader* GetConvertShader(const char* Type) -{ - std::string shader = "#define DECODE DecodePixel_"; - shader.append(Type); - shader.append("\n"); - shader.append(palette_shader); - return D3D::CompileAndCreatePixelShader(shader); -} - -TextureCache::TextureCache() -{ - // FIXME: Is it safe here? - g_encoder = std::make_unique(); - g_encoder->Init(); - - palette_buf = nullptr; - palette_buf_srv = nullptr; - uniform_buffer = nullptr; - palette_pixel_shader[static_cast(TLUTFormat::IA8)] = GetConvertShader("IA8"); - palette_pixel_shader[static_cast(TLUTFormat::RGB565)] = GetConvertShader("RGB565"); - palette_pixel_shader[static_cast(TLUTFormat::RGB5A3)] = GetConvertShader("RGB5A3"); - auto lutBd = CD3D11_BUFFER_DESC(sizeof(u16) * 256, D3D11_BIND_SHADER_RESOURCE); - HRESULT hr = D3D::device->CreateBuffer(&lutBd, nullptr, &palette_buf); - CHECK(SUCCEEDED(hr), "create palette decoder lut buffer"); - D3D::SetDebugObjectName(palette_buf, "texture decoder lut buffer"); - // TODO: C14X2 format. - auto outlutUavDesc = - CD3D11_SHADER_RESOURCE_VIEW_DESC(palette_buf, DXGI_FORMAT_R16_UINT, 0, 256, 0); - hr = D3D::device->CreateShaderResourceView(palette_buf, &outlutUavDesc, &palette_buf_srv); - CHECK(SUCCEEDED(hr), "create palette decoder lut srv"); - D3D::SetDebugObjectName(palette_buf_srv, "texture decoder lut srv"); - const D3D11_BUFFER_DESC cbdesc = - CD3D11_BUFFER_DESC(sizeof(float) * 8, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT); - hr = D3D::device->CreateBuffer(&cbdesc, nullptr, &uniform_buffer); - CHECK(SUCCEEDED(hr), "Create palette decoder constant buffer"); - D3D::SetDebugObjectName(uniform_buffer, - "a constant buffer used in TextureCache::CopyRenderTargetToTexture"); -} - -TextureCache::~TextureCache() -{ - g_encoder->Shutdown(); - g_encoder.reset(); - - SAFE_RELEASE(palette_buf); - SAFE_RELEASE(palette_buf_srv); - SAFE_RELEASE(uniform_buffer); - for (auto*& shader : palette_pixel_shader) - SAFE_RELEASE(shader); - for (auto& iter : m_efb_to_tex_pixel_shaders) - SAFE_RELEASE(iter.second); -} - -void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, - const EFBRectangle& src_rect, bool scale_by_half, - EFBCopyFormat dst_format, bool is_intensity, float gamma, - bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) -{ - auto* destination_texture = static_cast(entry->texture.get()); - - bool multisampled = g_ActiveConfig.iMultisamples > 1; - ID3D11ShaderResourceView* efb_tex_srv; - if (multisampled) - { - efb_tex_srv = is_depth_copy ? FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() : - FramebufferManager::GetResolvedEFBColorTexture()->GetSRV(); - } - else - { - efb_tex_srv = is_depth_copy ? FramebufferManager::GetEFBDepthTexture()->GetSRV() : - FramebufferManager::GetEFBColorTexture()->GetSRV(); - } - - auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity, - scale_by_half, - NeedsCopyFilterInShader(filter_coefficients)); - ID3D11PixelShader* pixel_shader = GetEFBToTexPixelShader(uid); - if (!pixel_shader) - return; - - g_renderer->ResetAPIState(); - - // stretch picture with increased internal resolution - const D3D11_VIEWPORT vp = - CD3D11_VIEWPORT(0.f, 0.f, static_cast(destination_texture->GetConfig().width), - static_cast(destination_texture->GetConfig().height)); - D3D::context->RSSetViewports(1, &vp); - - const TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(src_rect); - // TODO: try targetSource.asRECT(); - const D3D11_RECT sourcerect = - CD3D11_RECT(targetSource.left, targetSource.top, targetSource.right, targetSource.bottom); - - // Use linear filtering if (bScaleByHalf), use point filtering otherwise - if (scale_by_half) - D3D::SetLinearCopySampler(); - else - D3D::SetPointCopySampler(); - - struct PixelConstants - { - float filter_coefficients[3]; - float gamma_rcp; - float clamp_top; - float clamp_bottom; - float pixel_height; - u32 padding; - }; - PixelConstants constants; - for (size_t i = 0; i < filter_coefficients.size(); i++) - constants.filter_coefficients[i] = filter_coefficients[i]; - constants.gamma_rcp = 1.0f / gamma; - constants.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f; - constants.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f; - constants.pixel_height = - g_ActiveConfig.bCopyEFBScaled ? 1.0f / g_renderer->GetTargetHeight() : 1.0f / EFB_HEIGHT; - constants.padding = 0; - D3D::context->UpdateSubresource(uniform_buffer, 0, nullptr, &constants, 0, 0); - D3D::stateman->SetPixelConstants(uniform_buffer); - - // Make sure we don't draw with the texture set as both a source and target. - // (This can happen because we don't unbind textures when we free them.) - D3D::stateman->UnsetTexture(destination_texture->GetRawTexIdentifier()->GetSRV()); - D3D::stateman->Apply(); - - D3D::context->OMSetRenderTargets(1, &destination_texture->GetRawTexIdentifier()->GetRTV(), - nullptr); - - // Create texture copy - D3D::drawShadedTexQuad( - efb_tex_srv, &sourcerect, g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(), - pixel_shader, VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader()); - - g_renderer->RestoreAPIState(); -} - -ID3D11PixelShader* -TextureCache::GetEFBToTexPixelShader(const TextureConversionShaderGen::TCShaderUid& uid) -{ - auto iter = m_efb_to_tex_pixel_shaders.find(uid); - if (iter != m_efb_to_tex_pixel_shaders.end()) - return iter->second; - - ShaderCode code = TextureConversionShaderGen::GenerateShader(APIType::D3D, uid.GetUidData()); - ID3D11PixelShader* shader = D3D::CompileAndCreatePixelShader(code.GetBuffer()); - m_efb_to_tex_pixel_shaders.emplace(uid, shader); - return shader; -} -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/TextureCache.h b/Source/Core/VideoBackends/D3D/TextureCache.h deleted file mode 100644 index 1bfa34c045..0000000000 --- a/Source/Core/VideoBackends/D3D/TextureCache.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "VideoBackends/D3D/D3DTexture.h" -#include "VideoCommon/TextureCacheBase.h" -#include "VideoCommon/TextureConverterShaderGen.h" - -class AbstractTexture; -struct TextureConfig; - -namespace DX11 -{ -class TextureCache : public TextureCacheBase -{ -public: - TextureCache(); - ~TextureCache(); - -private: - void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette, - TLUTFormat format) override; - - void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override; - - void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, - bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, - float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override; - - bool CompileShaders() override { return true; } - void DeleteShaders() override {} - ID3D11PixelShader* GetEFBToTexPixelShader(const TextureConversionShaderGen::TCShaderUid& uid); - - ID3D11Buffer* palette_buf; - ID3D11ShaderResourceView* palette_buf_srv; - ID3D11Buffer* uniform_buffer; - ID3D11PixelShader* palette_pixel_shader[3]; - - std::map m_efb_to_tex_pixel_shaders; -}; -} diff --git a/Source/Core/VideoBackends/D3D/VertexManager.cpp b/Source/Core/VideoBackends/D3D/VertexManager.cpp index 8a84ebadc3..5159be74de 100644 --- a/Source/Core/VideoBackends/D3D/VertexManager.cpp +++ b/Source/Core/VideoBackends/D3D/VertexManager.cpp @@ -7,24 +7,19 @@ #include #include "Common/Align.h" +#include "Common/Assert.h" #include "Common/CommonTypes.h" #include "VideoBackends/D3D/BoundingBox.h" #include "VideoBackends/D3D/D3DBase.h" #include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/FramebufferManager.h" -#include "VideoBackends/D3D/GeometryShaderCache.h" -#include "VideoBackends/D3D/PixelShaderCache.h" #include "VideoBackends/D3D/Render.h" -#include "VideoBackends/D3D/VertexShaderCache.h" #include "VideoCommon/BoundingBox.h" -#include "VideoCommon/Debugger.h" #include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/PixelShaderManager.h" -#include "VideoCommon/RenderBase.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexShaderManager.h" @@ -32,11 +27,6 @@ namespace DX11 { -// TODO: Find sensible values for these two -const u32 MAX_IBUFFER_SIZE = VertexManager::MAXIBUFFERSIZE * sizeof(u16) * 8; -const u32 MAX_VBUFFER_SIZE = VertexManager::MAXVBUFFERSIZE; -const u32 MAX_BUFFER_SIZE = MAX_IBUFFER_SIZE + MAX_VBUFFER_SIZE; - static ID3D11Buffer* AllocateConstantBuffer(u32 size) { const u32 cbsize = Common::AlignUp(size, 16u); // must be a multiple of 16 @@ -59,71 +49,172 @@ static void UpdateConstantBuffer(ID3D11Buffer* const buffer, const void* data, u ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size); } -void VertexManager::CreateDeviceObjects() +static ID3D11ShaderResourceView* +CreateTexelBufferView(ID3D11Buffer* buffer, TexelBufferFormat format, DXGI_FORMAT srv_format) { - D3D11_BUFFER_DESC bufdesc = - CD3D11_BUFFER_DESC(MAX_BUFFER_SIZE, D3D11_BIND_INDEX_BUFFER | D3D11_BIND_VERTEX_BUFFER, - D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE); + ID3D11ShaderResourceView* srv; + CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(buffer, srv_format, 0, + VertexManager::TEXEL_STREAM_BUFFER_SIZE / + VertexManager::GetTexelBufferElementSize(format)); + CHECK(SUCCEEDED(D3D::device->CreateShaderResourceView(buffer, &srv_desc, &srv)), + "Create SRV for texel buffer"); + return srv; +} - for (int i = 0; i < MAX_BUFFER_COUNT; i++) +VertexManager::VertexManager() = default; + +VertexManager::~VertexManager() +{ + for (auto& srv_ptr : m_texel_buffer_views) + SAFE_RELEASE(srv_ptr); + SAFE_RELEASE(m_texel_buffer); + SAFE_RELEASE(m_pixel_constant_buffer); + SAFE_RELEASE(m_geometry_constant_buffer); + SAFE_RELEASE(m_vertex_constant_buffer); + for (auto& buffer : m_buffers) + SAFE_RELEASE(buffer); +} + +bool VertexManager::Initialize() +{ + if (!VertexManagerBase::Initialize()) + return false; + + CD3D11_BUFFER_DESC bufdesc((VERTEX_STREAM_BUFFER_SIZE + INDEX_STREAM_BUFFER_SIZE) / BUFFER_COUNT, + D3D11_BIND_INDEX_BUFFER | D3D11_BIND_VERTEX_BUFFER, + D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE); + + for (int i = 0; i < BUFFER_COUNT; i++) { - m_buffers[i] = nullptr; CHECK(SUCCEEDED(D3D::device->CreateBuffer(&bufdesc, nullptr, &m_buffers[i])), "Failed to create buffer."); D3D::SetDebugObjectName(m_buffers[i], "Buffer of VertexManager"); } - m_buffer_cursor = MAX_BUFFER_SIZE; - m_vertex_constant_buffer = AllocateConstantBuffer(sizeof(VertexShaderConstants)); m_geometry_constant_buffer = AllocateConstantBuffer(sizeof(GeometryShaderConstants)); m_pixel_constant_buffer = AllocateConstantBuffer(sizeof(PixelShaderConstants)); -} + if (!m_vertex_constant_buffer || !m_geometry_constant_buffer || !m_pixel_constant_buffer) + return false; -void VertexManager::DestroyDeviceObjects() -{ - SAFE_RELEASE(m_pixel_constant_buffer); - SAFE_RELEASE(m_geometry_constant_buffer); - SAFE_RELEASE(m_vertex_constant_buffer); - for (int i = 0; i < MAX_BUFFER_COUNT; i++) + CD3D11_BUFFER_DESC texel_buf_desc(TEXEL_STREAM_BUFFER_SIZE, D3D11_BIND_SHADER_RESOURCE, + D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE); + CHECK(SUCCEEDED(D3D::device->CreateBuffer(&texel_buf_desc, nullptr, &m_texel_buffer)), + "Creating texel buffer failed"); + if (!m_texel_buffer) + return false; + + static constexpr std::array, NUM_TEXEL_BUFFER_FORMATS> + format_mapping = {{ + {TEXEL_BUFFER_FORMAT_R8_UINT, DXGI_FORMAT_R8_UINT}, + {TEXEL_BUFFER_FORMAT_R16_UINT, DXGI_FORMAT_R16_UINT}, + {TEXEL_BUFFER_FORMAT_RGBA8_UINT, DXGI_FORMAT_R8G8B8A8_UNORM}, + {TEXEL_BUFFER_FORMAT_R32G32_UINT, DXGI_FORMAT_R32G32_UINT}, + }}; + for (const auto& it : format_mapping) { - SAFE_RELEASE(m_buffers[i]); + m_texel_buffer_views[it.first] = CreateTexelBufferView(m_texel_buffer, it.first, it.second); + if (!m_texel_buffer_views[it.first]) + return false; } -} -VertexManager::VertexManager() -{ - m_staging_vertex_buffer.resize(MAXVBUFFERSIZE); - - m_cur_buffer_pointer = m_base_buffer_pointer = &m_staging_vertex_buffer[0]; - m_end_buffer_pointer = m_base_buffer_pointer + m_staging_vertex_buffer.size(); - - m_staging_index_buffer.resize(MAXIBUFFERSIZE); - - CreateDeviceObjects(); -} - -VertexManager::~VertexManager() -{ - DestroyDeviceObjects(); + return true; } void VertexManager::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) { // Just use the one buffer for all three. + InvalidateConstants(); UpdateConstantBuffer(m_vertex_constant_buffer, uniforms, uniforms_size); D3D::stateman->SetVertexConstants(m_vertex_constant_buffer); D3D::stateman->SetGeometryConstants(m_vertex_constant_buffer); D3D::stateman->SetPixelConstants(m_vertex_constant_buffer); - VertexShaderManager::dirty = true; - GeometryShaderManager::dirty = true; - PixelShaderManager::dirty = true; } -void VertexManager::ResetBuffer(u32 vertex_stride, bool cull_all) +bool VertexManager::MapTexelBuffer(u32 required_size, D3D11_MAPPED_SUBRESOURCE& sr) { + if ((m_texel_buffer_offset + required_size) > TEXEL_STREAM_BUFFER_SIZE) + { + // Restart buffer. + HRESULT hr = D3D::context->Map(m_texel_buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &sr); + CHECK(SUCCEEDED(hr), "Map texel buffer"); + if (FAILED(hr)) + return false; + + m_texel_buffer_offset = 0; + } + else + { + // Don't overwrite the earlier-used space. + HRESULT hr = D3D::context->Map(m_texel_buffer, 0, D3D11_MAP_WRITE_NO_OVERWRITE, 0, &sr); + CHECK(SUCCEEDED(hr), "Map texel buffer"); + if (FAILED(hr)) + return false; + } + + return true; +} + +bool VertexManager::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset) +{ + if (data_size > TEXEL_STREAM_BUFFER_SIZE) + return false; + + const u32 elem_size = GetTexelBufferElementSize(format); + m_texel_buffer_offset = Common::AlignUp(m_texel_buffer_offset, elem_size); + + D3D11_MAPPED_SUBRESOURCE sr; + if (!MapTexelBuffer(data_size, sr)) + return false; + + *out_offset = m_texel_buffer_offset / elem_size; + std::memcpy(static_cast(sr.pData) + m_texel_buffer_offset, data, data_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size); + m_texel_buffer_offset += data_size; + + D3D::context->Unmap(m_texel_buffer, 0); + D3D::stateman->SetTexture(0, m_texel_buffer_views[static_cast(format)]); + return true; +} + +bool VertexManager::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset, const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, u32* out_palette_offset) +{ + const u32 elem_size = GetTexelBufferElementSize(format); + const u32 palette_elem_size = GetTexelBufferElementSize(palette_format); + const u32 reserve_size = data_size + palette_size + palette_elem_size; + if (reserve_size > TEXEL_STREAM_BUFFER_SIZE) + return false; + + m_texel_buffer_offset = Common::AlignUp(m_texel_buffer_offset, elem_size); + + D3D11_MAPPED_SUBRESOURCE sr; + if (!MapTexelBuffer(reserve_size, sr)) + return false; + + const u32 palette_byte_offset = Common::AlignUp(data_size, palette_elem_size); + std::memcpy(static_cast(sr.pData) + m_texel_buffer_offset, data, data_size); + std::memcpy(static_cast(sr.pData) + m_texel_buffer_offset + palette_byte_offset, + palette_data, palette_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, palette_byte_offset + palette_size); + *out_offset = m_texel_buffer_offset / elem_size; + *out_palette_offset = (m_texel_buffer_offset + palette_byte_offset) / palette_elem_size; + m_texel_buffer_offset += palette_byte_offset + palette_size; + + D3D::context->Unmap(m_texel_buffer, 0); + D3D::stateman->SetTexture(0, m_texel_buffer_views[static_cast(format)]); + D3D::stateman->SetTexture(1, m_texel_buffer_views[static_cast(palette_format)]); + return true; +} + +void VertexManager::ResetBuffer(u32 vertex_stride) +{ + m_base_buffer_pointer = m_cpu_vertex_buffer.data(); m_cur_buffer_pointer = m_base_buffer_pointer; - IndexGenerator::Start(m_staging_index_buffer.data()); + m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_vertex_buffer.size(); + IndexGenerator::Start(m_cpu_index_buffer.data()); } void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, @@ -143,10 +234,10 @@ void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_in } D3D11_MAP MapType = D3D11_MAP_WRITE_NO_OVERWRITE; - if (cursor + totalBufferSize >= MAX_BUFFER_SIZE) + if (cursor + totalBufferSize >= BUFFER_SIZE) { // Wrap around - m_current_buffer = (m_current_buffer + 1) % MAX_BUFFER_COUNT; + m_current_buffer = (m_current_buffer + 1) % BUFFER_COUNT; cursor = 0; MapType = D3D11_MAP_WRITE_DISCARD; } @@ -159,8 +250,7 @@ void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_in if (vertexBufferSize > 0) std::memcpy(mappedData + cursor, m_base_buffer_pointer, vertexBufferSize); if (indexBufferSize > 0) - std::memcpy(mappedData + cursor + vertexBufferSize, m_staging_index_buffer.data(), - indexBufferSize); + std::memcpy(mappedData + cursor + vertexBufferSize, m_cpu_index_buffer.data(), indexBufferSize); D3D::context->Unmap(m_buffers[m_current_buffer], 0); m_buffer_cursor = cursor + totalBufferSize; @@ -172,7 +262,7 @@ void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_in D3D::stateman->SetIndexBuffer(m_buffers[m_current_buffer]); } -void VertexManager::UploadConstants() +void VertexManager::UploadUniforms() { if (VertexShaderManager::dirty) { @@ -199,20 +289,4 @@ void VertexManager::UploadConstants() D3D::stateman->SetVertexConstants(m_vertex_constant_buffer); D3D::stateman->SetGeometryConstants(m_geometry_constant_buffer); } - -void VertexManager::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) -{ - FramebufferManager::SetIntegerEFBRenderTarget( - m_current_pipeline_config.blending_state.logicopenable); - - if (g_ActiveConfig.backend_info.bSupportsBBox && BoundingBox::active) - { - D3D::context->OMSetRenderTargetsAndUnorderedAccessViews( - D3D11_KEEP_RENDER_TARGETS_AND_DEPTH_STENCIL, nullptr, nullptr, 2, 1, &BBox::GetUAV(), - nullptr); - } - - D3D::stateman->Apply(); - D3D::context->DrawIndexed(num_indices, base_index, base_vertex); -} } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/VertexManager.h b/Source/Core/VideoBackends/D3D/VertexManager.h index a7eab78ef5..d9cb0a8755 100644 --- a/Source/Core/VideoBackends/D3D/VertexManager.h +++ b/Source/Core/VideoBackends/D3D/VertexManager.h @@ -18,13 +18,12 @@ struct ID3D11Buffer; namespace DX11 { -class D3DBlob; class D3DVertexFormat : public NativeVertexFormat { public: D3DVertexFormat(const PortableVertexDeclaration& vtx_decl); ~D3DVertexFormat(); - ID3D11InputLayout* GetInputLayout(D3DBlob* vs_bytecode); + ID3D11InputLayout* GetInputLayout(const void* vs_bytecode, size_t vs_bytecode_size); private: std::array m_elems{}; @@ -39,35 +38,39 @@ public: VertexManager(); ~VertexManager(); - std::unique_ptr - CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; + bool Initialize(); void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override; + bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset) override; + bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, u32* out_offset, + const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, u32* out_palette_offset) override; protected: - void CreateDeviceObjects() override; - void DestroyDeviceObjects() override; - void ResetBuffer(u32 vertex_stride, bool cull_all) override; + void ResetBuffer(u32 vertex_stride) override; void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, u32* out_base_index) override; - void UploadConstants() override; - void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; + void UploadUniforms() override; private: - enum - { - MAX_BUFFER_COUNT = 2 - }; - ID3D11Buffer* m_buffers[MAX_BUFFER_COUNT] = {}; + static constexpr u32 BUFFER_COUNT = 2; + static constexpr u32 BUFFER_SIZE = + (VERTEX_STREAM_BUFFER_SIZE + INDEX_STREAM_BUFFER_SIZE) / BUFFER_COUNT; + + bool MapTexelBuffer(u32 required_size, D3D11_MAPPED_SUBRESOURCE& sr); + + ID3D11Buffer* m_buffers[BUFFER_COUNT] = {}; u32 m_current_buffer = 0; u32 m_buffer_cursor = 0; - std::vector m_staging_vertex_buffer; - std::vector m_staging_index_buffer; - ID3D11Buffer* m_vertex_constant_buffer = nullptr; ID3D11Buffer* m_geometry_constant_buffer = nullptr; ID3D11Buffer* m_pixel_constant_buffer = nullptr; + + ID3D11Buffer* m_texel_buffer = nullptr; + std::array m_texel_buffer_views; + u32 m_texel_buffer_offset = 0; }; } // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp b/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp deleted file mode 100644 index b7fc3a2582..0000000000 --- a/Source/Core/VideoBackends/D3D/VertexShaderCache.cpp +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include - -#include "Common/CommonTypes.h" -#include "Common/FileUtil.h" -#include "Common/MsgHandler.h" -#include "Common/StringUtil.h" - -#include "Core/ConfigManager.h" -#include "Core/Host.h" - -#include "VideoBackends/D3D/D3DShader.h" -#include "VideoBackends/D3D/D3DState.h" -#include "VideoBackends/D3D/VertexManager.h" -#include "VideoBackends/D3D/VertexShaderCache.h" - -#include "VideoCommon/Debugger.h" -#include "VideoCommon/Statistics.h" -#include "VideoCommon/UberShaderVertex.h" -#include "VideoCommon/VertexLoaderManager.h" -#include "VideoCommon/VertexShaderGen.h" - -namespace DX11 -{ -static ID3D11VertexShader* SimpleVertexShader = nullptr; -static ID3D11VertexShader* ClearVertexShader = nullptr; -static ID3D11InputLayout* SimpleLayout = nullptr; -static ID3D11InputLayout* ClearLayout = nullptr; - -ID3D11VertexShader* VertexShaderCache::GetSimpleVertexShader() -{ - return SimpleVertexShader; -} -ID3D11VertexShader* VertexShaderCache::GetClearVertexShader() -{ - return ClearVertexShader; -} -ID3D11InputLayout* VertexShaderCache::GetSimpleInputLayout() -{ - return SimpleLayout; -} -ID3D11InputLayout* VertexShaderCache::GetClearInputLayout() -{ - return ClearLayout; -} - -// this class will load the precompiled shaders into our cache -template -class VertexShaderCacheInserter : public LinearDiskCacheReader -{ -public: - void Read(const UidType& key, const u8* value, u32 value_size) - { - D3DBlob* blob = new D3DBlob(value_size, value); - VertexShaderCache::InsertByteCode(key, blob); - blob->Release(); - } -}; - -const char simple_shader_code[] = { - "struct VSOUTPUT\n" - "{\n" - "float4 vPosition : POSITION;\n" - "float3 vTexCoord : TEXCOORD0;\n" - "};\n" - "VSOUTPUT main(float4 inPosition : POSITION,float3 inTEX0 : TEXCOORD0)\n" - "{\n" - "VSOUTPUT OUT;\n" - "OUT.vPosition = inPosition;\n" - "OUT.vTexCoord = inTEX0;\n" - "return OUT;\n" - "}\n"}; - -const char clear_shader_code[] = { - "struct VSOUTPUT\n" - "{\n" - "float4 vPosition : POSITION;\n" - "float4 vColor0 : COLOR0;\n" - "};\n" - "VSOUTPUT main(float4 inPosition : POSITION,float4 inColor0: COLOR0)\n" - "{\n" - "VSOUTPUT OUT;\n" - "OUT.vPosition = inPosition;\n" - "OUT.vColor0 = inColor0;\n" - "return OUT;\n" - "}\n"}; - -void VertexShaderCache::Init() -{ - const D3D11_INPUT_ELEMENT_DESC simpleelems[2] = { - {"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"TEXCOORD", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0}, - - }; - const D3D11_INPUT_ELEMENT_DESC clearelems[2] = { - {"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0}, - }; - - D3DBlob* blob; - D3D::CompileVertexShader(simple_shader_code, &blob); - D3D::device->CreateInputLayout(simpleelems, 2, blob->Data(), blob->Size(), &SimpleLayout); - SimpleVertexShader = D3D::CreateVertexShaderFromByteCode(blob); - if (SimpleLayout == nullptr || SimpleVertexShader == nullptr) - PanicAlert("Failed to create simple vertex shader or input layout at %s %d\n", __FILE__, - __LINE__); - blob->Release(); - D3D::SetDebugObjectName(SimpleVertexShader, "simple vertex shader"); - D3D::SetDebugObjectName(SimpleLayout, "simple input layout"); - - D3D::CompileVertexShader(clear_shader_code, &blob); - D3D::device->CreateInputLayout(clearelems, 2, blob->Data(), blob->Size(), &ClearLayout); - ClearVertexShader = D3D::CreateVertexShaderFromByteCode(blob); - if (ClearLayout == nullptr || ClearVertexShader == nullptr) - PanicAlert("Failed to create clear vertex shader or input layout at %s %d\n", __FILE__, - __LINE__); - blob->Release(); - D3D::SetDebugObjectName(ClearVertexShader, "clear vertex shader"); - D3D::SetDebugObjectName(ClearLayout, "clear input layout"); - - SETSTAT(stats.numVertexShadersCreated, 0); - SETSTAT(stats.numVertexShadersAlive, 0); -} - -void VertexShaderCache::Shutdown() -{ - SAFE_RELEASE(SimpleVertexShader); - SAFE_RELEASE(ClearVertexShader); - - SAFE_RELEASE(SimpleLayout); - SAFE_RELEASE(ClearLayout); -} -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/VertexShaderCache.h b/Source/Core/VideoBackends/D3D/VertexShaderCache.h deleted file mode 100644 index d7f1958806..0000000000 --- a/Source/Core/VideoBackends/D3D/VertexShaderCache.h +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DBlob.h" - -#include "VideoCommon/AsyncShaderCompiler.h" -#include "VideoCommon/UberShaderVertex.h" -#include "VideoCommon/VertexShaderGen.h" - -namespace DX11 -{ -class D3DVertexFormat; - -class VertexShaderCache -{ -public: - static void Init(); - static void Shutdown(); - - static ID3D11VertexShader* GetSimpleVertexShader(); - static ID3D11VertexShader* GetClearVertexShader(); - static ID3D11InputLayout* GetSimpleInputLayout(); - static ID3D11InputLayout* GetClearInputLayout(); -}; - -} // namespace DX11 diff --git a/Source/Core/VideoBackends/D3D/main.cpp b/Source/Core/VideoBackends/D3D/main.cpp index 5795fce3a2..70db72206e 100644 --- a/Source/Core/VideoBackends/D3D/main.cpp +++ b/Source/Core/VideoBackends/D3D/main.cpp @@ -12,17 +12,14 @@ #include "VideoBackends/D3D/BoundingBox.h" #include "VideoBackends/D3D/D3DBase.h" -#include "VideoBackends/D3D/D3DUtil.h" -#include "VideoBackends/D3D/GeometryShaderCache.h" #include "VideoBackends/D3D/PerfQuery.h" -#include "VideoBackends/D3D/PixelShaderCache.h" #include "VideoBackends/D3D/Render.h" -#include "VideoBackends/D3D/TextureCache.h" #include "VideoBackends/D3D/VertexManager.h" -#include "VideoBackends/D3D/VertexShaderCache.h" #include "VideoBackends/D3D/VideoBackend.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/ShaderCache.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" @@ -51,6 +48,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.api_type = APIType::D3D; g_Config.backend_info.MaxTextureSize = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION; + g_Config.backend_info.bUsesLowerLeftOrigin = false; g_Config.backend_info.bSupportsExclusiveFullscreen = true; g_Config.backend_info.bSupportsDualSourceBlend = true; g_Config.backend_info.bSupportsPrimitiveRestart = true; @@ -58,16 +56,17 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsGeometryShaders = true; g_Config.backend_info.bSupportsComputeShaders = false; g_Config.backend_info.bSupports3DVision = true; - g_Config.backend_info.bSupportsPostProcessing = false; + g_Config.backend_info.bSupportsPostProcessing = true; g_Config.backend_info.bSupportsPaletteConversion = true; g_Config.backend_info.bSupportsClipControl = true; g_Config.backend_info.bSupportsDepthClamp = true; g_Config.backend_info.bSupportsReversedDepthRange = false; g_Config.backend_info.bSupportsLogicOp = true; g_Config.backend_info.bSupportsMultithreading = false; - g_Config.backend_info.bSupportsGPUTextureDecoding = false; + g_Config.backend_info.bSupportsGPUTextureDecoding = true; g_Config.backend_info.bSupportsST3CTextures = false; g_Config.backend_info.bSupportsCopyToVram = true; + g_Config.backend_info.bSupportsLargePoints = false; g_Config.backend_info.bSupportsBitfield = false; g_Config.backend_info.bSupportsDynamicSamplerIndexing = false; g_Config.backend_info.bSupportsBPTCTextures = false; @@ -149,21 +148,20 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi) // internal interfaces g_renderer = std::make_unique(backbuffer_width, backbuffer_height, wsi.render_surface_scale); - g_shader_cache = std::make_unique(); - g_texture_cache = std::make_unique(); g_vertex_manager = std::make_unique(); + g_shader_cache = std::make_unique(); + g_framebuffer_manager = std::make_unique(); + g_texture_cache = std::make_unique(); g_perf_query = std::make_unique(); - - VertexShaderCache::Init(); - PixelShaderCache::Init(); - GeometryShaderCache::Init(); - - if (!g_renderer->Initialize() || !g_shader_cache->Initialize()) + if (!g_renderer->Initialize() || !g_vertex_manager->Initialize() || + !g_shader_cache->Initialize() || !g_framebuffer_manager->Initialize() || + !g_texture_cache->Initialize()) + { return false; + } - D3D::InitUtils(); BBox::Init(); - + g_shader_cache->InitializeShaderCache(); return true; } @@ -172,16 +170,13 @@ void VideoBackend::Shutdown() g_shader_cache->Shutdown(); g_renderer->Shutdown(); - D3D::ShutdownUtils(); - PixelShaderCache::Shutdown(); - VertexShaderCache::Shutdown(); - GeometryShaderCache::Shutdown(); BBox::Shutdown(); g_perf_query.reset(); - g_vertex_manager.reset(); g_texture_cache.reset(); + g_framebuffer_manager.reset(); g_shader_cache.reset(); + g_vertex_manager.reset(); g_renderer.reset(); ShutdownShared(); diff --git a/Source/Core/VideoBackends/Null/NullBackend.cpp b/Source/Core/VideoBackends/Null/NullBackend.cpp index d790331e36..1e5c4e1193 100644 --- a/Source/Core/VideoBackends/Null/NullBackend.cpp +++ b/Source/Core/VideoBackends/Null/NullBackend.cpp @@ -13,7 +13,9 @@ #include "VideoBackends/Null/VertexManager.h" #include "VideoBackends/Null/VideoBackend.h" -#include "VideoCommon/FramebufferManagerBase.h" +#include "Common/MsgHandler.h" + +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/VideoBackendBase.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" @@ -61,10 +63,21 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi) g_renderer = std::make_unique(); g_vertex_manager = std::make_unique(); g_perf_query = std::make_unique(); - g_framebuffer_manager = std::make_unique(); + g_framebuffer_manager = std::make_unique(); g_texture_cache = std::make_unique(); g_shader_cache = std::make_unique(); - return g_renderer->Initialize() && g_shader_cache->Initialize(); + + if (!g_vertex_manager->Initialize() || !g_shader_cache->Initialize() || + !g_renderer->Initialize() || !g_framebuffer_manager->Initialize() || + !g_texture_cache->Initialize()) + { + PanicAlert("Failed to initialize renderer classes"); + Shutdown(); + return false; + } + + g_shader_cache->InitializeShaderCache(); + return true; } void VideoBackend::Shutdown() diff --git a/Source/Core/VideoBackends/Null/NullTexture.cpp b/Source/Core/VideoBackends/Null/NullTexture.cpp index 2a21afb4b7..4b1bcba18e 100644 --- a/Source/Core/VideoBackends/Null/NullTexture.cpp +++ b/Source/Core/VideoBackends/Null/NullTexture.cpp @@ -16,11 +16,6 @@ void NullTexture::CopyRectangleFromTexture(const AbstractTexture* src, u32 dst_layer, u32 dst_level) { } -void NullTexture::ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& srcrect, - const MathUtil::Rectangle& dstrect) -{ -} void NullTexture::ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, u32 layer, u32 level) { @@ -70,15 +65,18 @@ void NullStagingTexture::Flush() m_needs_flush = false; } -NullFramebuffer::NullFramebuffer(AbstractTextureFormat color_format, +NullFramebuffer::NullFramebuffer(AbstractTexture* color_attachment, + AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples) - : AbstractFramebuffer(color_format, depth_format, width, height, layers, samples) + : AbstractFramebuffer(color_attachment, depth_attachment, color_format, depth_format, width, + height, layers, samples) { } -std::unique_ptr NullFramebuffer::Create(const NullTexture* color_attachment, - const NullTexture* depth_attachment) +std::unique_ptr NullFramebuffer::Create(NullTexture* color_attachment, + NullTexture* depth_attachment) { if (!ValidateConfig(color_attachment, depth_attachment)) return nullptr; @@ -93,8 +91,8 @@ std::unique_ptr NullFramebuffer::Create(const NullTexture* colo const u32 layers = either_attachment->GetLayers(); const u32 samples = either_attachment->GetSamples(); - return std::make_unique(color_format, depth_format, width, height, layers, - samples); + return std::make_unique(color_attachment, depth_attachment, color_format, + depth_format, width, height, layers, samples); } } // namespace Null diff --git a/Source/Core/VideoBackends/Null/NullTexture.h b/Source/Core/VideoBackends/Null/NullTexture.h index 5a48ff652c..42cbc7e542 100644 --- a/Source/Core/VideoBackends/Null/NullTexture.h +++ b/Source/Core/VideoBackends/Null/NullTexture.h @@ -25,9 +25,6 @@ public: const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) override; - void ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& srcrect, - const MathUtil::Rectangle& dstrect) override; void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, u32 layer, u32 level) override; void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, @@ -58,12 +55,13 @@ private: class NullFramebuffer final : public AbstractFramebuffer { public: - explicit NullFramebuffer(AbstractTextureFormat color_format, AbstractTextureFormat depth_format, + explicit NullFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples); ~NullFramebuffer() override = default; - static std::unique_ptr Create(const NullTexture* color_attachment, - const NullTexture* depth_attachment); + static std::unique_ptr Create(NullTexture* color_attachment, + NullTexture* depth_attachment); }; } // namespace Null diff --git a/Source/Core/VideoBackends/Null/Render.cpp b/Source/Core/VideoBackends/Null/Render.cpp index d076e0f091..ca57a0bdeb 100644 --- a/Source/Core/VideoBackends/Null/Render.cpp +++ b/Source/Core/VideoBackends/Null/Render.cpp @@ -9,6 +9,7 @@ #include "VideoCommon/AbstractPipeline.h" #include "VideoCommon/AbstractShader.h" +#include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/VideoConfig.h" namespace Null @@ -74,22 +75,16 @@ std::unique_ptr Renderer::CreatePipeline(const AbstractPipelin return std::make_unique(); } -std::unique_ptr -Renderer::CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) +std::unique_ptr Renderer::CreateFramebuffer(AbstractTexture* color_attachment, + AbstractTexture* depth_attachment) { - return NullFramebuffer::Create(static_cast(color_attachment), - static_cast(depth_attachment)); + return NullFramebuffer::Create(static_cast(color_attachment), + static_cast(depth_attachment)); } -TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc) +std::unique_ptr +Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) { - TargetRectangle result; - result.left = rc.left; - result.top = rc.top; - result.right = rc.right; - result.bottom = rc.bottom; - return result; + return std::make_unique(vtx_decl); } - } // namespace Null diff --git a/Source/Core/VideoBackends/Null/Render.h b/Source/Core/VideoBackends/Null/Render.h index 75c4adfffc..5ad4d8028e 100644 --- a/Source/Core/VideoBackends/Null/Render.h +++ b/Source/Core/VideoBackends/Null/Render.h @@ -20,26 +20,26 @@ public: std::unique_ptr CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override; std::unique_ptr - CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) override; + CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override; std::unique_ptr CreateShaderFromSource(ShaderStage stage, const char* source, size_t length) override; std::unique_ptr CreateShaderFromBinary(ShaderStage stage, const void* data, size_t length) override; + std::unique_ptr + CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; std::unique_ptr CreatePipeline(const AbstractPipelineConfig& config) override; u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override { return 0; } void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override {} u16 BBoxRead(int index) override { return 0; } void BBoxWrite(int index, u16 value) override {} - TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) override { } - void ReinterpretPixelData(unsigned int convtype) override {} + void ReinterpretPixelData(EFBReinterpretType convtype) override {} }; -} +} // namespace Null diff --git a/Source/Core/VideoBackends/Null/TextureCache.h b/Source/Core/VideoBackends/Null/TextureCache.h index 648871f626..678f7e8451 100644 --- a/Source/Core/VideoBackends/Null/TextureCache.h +++ b/Source/Core/VideoBackends/Null/TextureCache.h @@ -18,26 +18,21 @@ class TextureCache : public TextureCacheBase public: TextureCache() {} ~TextureCache() {} - bool CompileShaders() override { return true; } - void DeleteShaders() override {} - void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette, - TLUTFormat format) override - { - } +protected: void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override + const EFBCopyFilterCoefficients& filter_coefficients) override { } void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override + const EFBCopyFilterCoefficients& filter_coefficients) override { } }; -} // Null name space +} // namespace Null diff --git a/Source/Core/VideoBackends/Null/VertexManager.cpp b/Source/Core/VideoBackends/Null/VertexManager.cpp index 872e9c3657..bf70743262 100644 --- a/Source/Core/VideoBackends/Null/VertexManager.cpp +++ b/Source/Core/VideoBackends/Null/VertexManager.cpp @@ -3,52 +3,16 @@ // Refer to the license.txt file included. #include "VideoBackends/Null/VertexManager.h" +#include "VideoBackends/Null/Render.h" #include "VideoCommon/IndexGenerator.h" -#include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/VertexLoaderManager.h" namespace Null { -class NullNativeVertexFormat : public NativeVertexFormat -{ -public: - NullNativeVertexFormat(const PortableVertexDeclaration& vtx_decl_) { vtx_decl = vtx_decl_; } -}; +VertexManager::VertexManager() = default; -std::unique_ptr -VertexManager::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) -{ - return std::make_unique(vtx_decl); -} - -void VertexManager::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) -{ -} - -VertexManager::VertexManager() : m_local_v_buffer(MAXVBUFFERSIZE), m_local_i_buffer(MAXIBUFFERSIZE) -{ -} - -VertexManager::~VertexManager() -{ -} - -void VertexManager::ResetBuffer(u32 vertex_stride, bool cull_all) -{ - m_cur_buffer_pointer = m_base_buffer_pointer = m_local_v_buffer.data(); - m_end_buffer_pointer = m_cur_buffer_pointer + m_local_v_buffer.size(); - IndexGenerator::Start(&m_local_i_buffer[0]); -} - -void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, - u32* out_base_vertex, u32* out_base_index) -{ -} - -void VertexManager::UploadConstants() -{ -} +VertexManager::~VertexManager() = default; void VertexManager::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) { diff --git a/Source/Core/VideoBackends/Null/VertexManager.h b/Source/Core/VideoBackends/Null/VertexManager.h index 1d6a706ec3..7ab7557db6 100644 --- a/Source/Core/VideoBackends/Null/VertexManager.h +++ b/Source/Core/VideoBackends/Null/VertexManager.h @@ -17,20 +17,7 @@ public: VertexManager(); ~VertexManager(); - std::unique_ptr - CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; - - void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override; - protected: - void ResetBuffer(u32 vertex_stride, bool cull_all) override; - void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, - u32* out_base_index) override; - void UploadConstants() override; void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; - -private: - std::vector m_local_v_buffer; - std::vector m_local_i_buffer; }; -} +} // namespace Null diff --git a/Source/Core/VideoBackends/OGL/BoundingBox.cpp b/Source/Core/VideoBackends/OGL/BoundingBox.cpp index c50f831524..18ef7802cc 100644 --- a/Source/Core/VideoBackends/OGL/BoundingBox.cpp +++ b/Source/Core/VideoBackends/OGL/BoundingBox.cpp @@ -2,175 +2,77 @@ // Licensed under GPLv2+ // Refer to the license.txt file included. -#include -#include #include #include "Common/GL/GLUtil.h" #include "VideoBackends/OGL/BoundingBox.h" -#include "VideoBackends/OGL/FramebufferManager.h" +#include "VideoBackends/OGL/Render.h" #include "VideoCommon/DriverDetails.h" #include "VideoCommon/VideoConfig.h" static GLuint s_bbox_buffer_id; -static GLuint s_pbo; - -static std::array s_stencil_bounds; -static bool s_stencil_updated; -static bool s_stencil_cleared; - -static int s_target_width; -static int s_target_height; namespace OGL { -void BoundingBox::SetTargetSizeChanged(int target_width, int target_height) +void BoundingBox::Init() { - if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) + if (!g_ActiveConfig.backend_info.bSupportsBBox) return; - s_target_width = target_width; - s_target_height = target_height; - s_stencil_updated = false; - - glBindBuffer(GL_PIXEL_PACK_BUFFER, s_pbo); - glBufferData(GL_PIXEL_PACK_BUFFER, s_target_width * s_target_height, nullptr, GL_STREAM_READ); - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); -} - -void BoundingBox::Init(int target_width, int target_height) -{ - if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) - { - int initial_values[4] = {0, 0, 0, 0}; - glGenBuffers(1, &s_bbox_buffer_id); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); - glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(s32), initial_values, GL_DYNAMIC_DRAW); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, s_bbox_buffer_id); - } - else - { - s_stencil_bounds = {{0, 0, 0, 0}}; - glGenBuffers(1, &s_pbo); - SetTargetSizeChanged(target_width, target_height); - } + int initial_values[4] = {0, 0, 0, 0}; + glGenBuffers(1, &s_bbox_buffer_id); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); + glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(s32), initial_values, GL_DYNAMIC_DRAW); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, s_bbox_buffer_id); } void BoundingBox::Shutdown() { - if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) - { - glDeleteBuffers(1, &s_bbox_buffer_id); - } - else - { - glDeleteBuffers(1, &s_pbo); - } + if (!g_ActiveConfig.backend_info.bSupportsBBox) + return; + + glDeleteBuffers(1, &s_bbox_buffer_id); } void BoundingBox::Set(int index, int value) { - if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) - { - glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); - glBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &value); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); - } - else - { - s_stencil_bounds[index] = value; + if (!g_ActiveConfig.backend_info.bSupportsBBox) + return; - if (!s_stencil_cleared) - { - // Assumes that the EFB framebuffer is currently bound - glClearStencil(0); - glClear(GL_STENCIL_BUFFER_BIT); - s_stencil_updated = false; - s_stencil_cleared = true; - } - } + glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); + glBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &value); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); } int BoundingBox::Get(int index) { - if (g_ActiveConfig.BBoxUseFragmentShaderImplementation()) + if (!g_ActiveConfig.backend_info.bSupportsBBox) + return 0; + + int data = 0; + glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); + if (!DriverDetails::HasBug(DriverDetails::BUG_SLOW_GETBUFFERSUBDATA) && + !static_cast(g_renderer.get())->IsGLES()) { - int data = 0; - glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); - if (!DriverDetails::HasBug(DriverDetails::BUG_SLOW_GETBUFFERSUBDATA) && - !static_cast(g_renderer.get())->IsGLES()) - { - // Using glMapBufferRange to read back the contents of the SSBO is extremely slow - // on nVidia drivers. This is more noticeable at higher internal resolutions. - // Using glGetBufferSubData instead does not seem to exhibit this slowdown. - glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data); - } - else - { - // Using glMapBufferRange is faster on AMD cards by a measurable margin. - void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), - GL_MAP_READ_BIT); - if (ptr) - { - memcpy(&data, ptr, sizeof(int)); - glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); - } - } - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); - return data; + // Using glMapBufferRange to read back the contents of the SSBO is extremely slow + // on nVidia drivers. This is more noticeable at higher internal resolutions. + // Using glGetBufferSubData instead does not seem to exhibit this slowdown. + glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data); } else { - if (s_stencil_updated) + // Using glMapBufferRange is faster on AMD cards by a measurable margin. + void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), + GL_MAP_READ_BIT); + if (ptr) { - s_stencil_updated = false; - - FramebufferManager::ResolveEFBStencilTexture(); - glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferManager::GetResolvedFramebuffer()); - glBindBuffer(GL_PIXEL_PACK_BUFFER, s_pbo); - glPixelStorei(GL_PACK_ALIGNMENT, 1); - glReadPixels(0, 0, s_target_width, s_target_height, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, 0); - glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferManager::GetEFBFramebuffer()); - - // Eke every bit of performance out of the compiler that we can - std::array bounds = s_stencil_bounds; - - u8* data = static_cast(glMapBufferRange( - GL_PIXEL_PACK_BUFFER, 0, s_target_height * s_target_width, GL_MAP_READ_BIT)); - - for (int row = 0; row < s_target_height; row++) - { - for (int col = 0; col < s_target_width; col++) - { - if (data[row * s_target_width + col] == 0) - continue; - bounds[0] = std::min(bounds[0], col); - bounds[1] = std::max(bounds[1], col); - bounds[2] = std::min(bounds[2], row); - bounds[3] = std::max(bounds[3], row); - } - } - - s_stencil_bounds = bounds; - - glUnmapBuffer(GL_PIXEL_PACK_BUFFER); - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + memcpy(&data, ptr, sizeof(int)); + glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); } - - return s_stencil_bounds[index]; } + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + return data; } - -void BoundingBox::StencilWasUpdated() -{ - s_stencil_updated = true; - s_stencil_cleared = false; -} - -bool BoundingBox::NeedsStencilBuffer() -{ - return g_ActiveConfig.bBBoxEnable && !g_ActiveConfig.BBoxUseFragmentShaderImplementation(); -} -}; +}; // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/BoundingBox.h b/Source/Core/VideoBackends/OGL/BoundingBox.h index 44365c9fbc..cbf54074ab 100644 --- a/Source/Core/VideoBackends/OGL/BoundingBox.h +++ b/Source/Core/VideoBackends/OGL/BoundingBox.h @@ -9,19 +9,10 @@ namespace OGL class BoundingBox { public: - static void Init(int target_width, int target_height); + static void Init(); static void Shutdown(); - static void SetTargetSizeChanged(int target_width, int target_height); - - // When SSBO isn't available, the bounding box is calculated directly from the - // stencil buffer. - static bool NeedsStencilBuffer(); - // When the stencil buffer is changed, this function needs to be called to - // invalidate the cached bounding box data. - static void StencilWasUpdated(); - static void Set(int index, int value); static int Get(int index); }; -}; +}; // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/CMakeLists.txt b/Source/Core/VideoBackends/OGL/CMakeLists.txt index c786ba671f..da091c8e9b 100644 --- a/Source/Core/VideoBackends/OGL/CMakeLists.txt +++ b/Source/Core/VideoBackends/OGL/CMakeLists.txt @@ -1,19 +1,15 @@ add_library(videoogl BoundingBox.cpp - FramebufferManager.cpp main.cpp NativeVertexFormat.cpp OGLPipeline.cpp OGLShader.cpp OGLTexture.cpp PerfQuery.cpp - PostProcessing.cpp ProgramShaderCache.cpp Render.cpp SamplerCache.cpp StreamBuffer.cpp - TextureCache.cpp - TextureConverter.cpp VertexManager.cpp ) diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp b/Source/Core/VideoBackends/OGL/FramebufferManager.cpp deleted file mode 100644 index 12de898ff5..0000000000 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.cpp +++ /dev/null @@ -1,634 +0,0 @@ -// Copyright 2009 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/OGL/FramebufferManager.h" - -#include -#include - -#include "Common/Common.h" -#include "Common/CommonTypes.h" -#include "Common/Logging/Log.h" -#include "Common/MsgHandler.h" - -#include "Core/HW/Memmap.h" - -#include "VideoBackends/OGL/Render.h" -#include "VideoBackends/OGL/SamplerCache.h" -#include "VideoBackends/OGL/TextureConverter.h" -#include "VideoBackends/OGL/VertexManager.h" - -#include "VideoCommon/OnScreenDisplay.h" -#include "VideoCommon/VertexShaderGen.h" -#include "VideoCommon/VideoBackendBase.h" - -constexpr const char* GLSL_REINTERPRET_PIXELFMT_VS = R"GLSL( -flat out int layer; -void main(void) { - layer = 0; - vec2 rawpos = vec2(gl_VertexID & 1, gl_VertexID & 2); - gl_Position = vec4(rawpos* 2.0 - 1.0, 0.0, 1.0); -})GLSL"; - -constexpr const char* GLSL_SHADER_FS = R"GLSL( -#define MULTILAYER %d -#define MSAA %d - -#if MSAA - -#if MULTILAYER -SAMPLER_BINDING(9) uniform sampler2DMSArray samp9; -#else -SAMPLER_BINDING(9) uniform sampler2DMS samp9; -#endif - -#else -SAMPLER_BINDING(9) uniform sampler2DArray samp9; -#endif - -vec4 sampleEFB(ivec3 pos) { -#if MSAA - -#if MULTILAYER - return texelFetch(samp9, pos, gl_SampleID); -#else - return texelFetch(samp9, pos.xy, gl_SampleID); -#endif - -#else - return texelFetch(samp9, pos, 0); -#endif -})GLSL"; - -constexpr const char* GLSL_SAMPLE_EFB_FS = R"GLSL( -#define MULTILAYER %d - -#if MULTILAYER -SAMPLER_BINDING(9) uniform sampler2DMSArray samp9; -#else -SAMPLER_BINDING(9) uniform sampler2DMS samp9; -#endif -vec4 sampleEFB(ivec3 pos) { - vec4 color = vec4(0.0, 0.0, 0.0, 0.0); - for (int i = 0; i < %d; i++) -#if MULTILAYER - color += texelFetch(samp9, pos, i); -#else - color += texelFetch(samp9, pos.xy, i); -#endif - - return color / %d; -})GLSL"; - -constexpr const char* GLSL_RGBA6_TO_RGB8_FS = R"GLSL( -flat in int layer; -out vec4 ocol0; -void main() { - ivec4 src6 = ivec4(round(sampleEFB(ivec3(gl_FragCoord.xy, layer)) * 63.f)); - ivec4 dst8; - - dst8.r = (src6.r << 2) | (src6.g >> 4); - dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2); - dst8.b = ((src6.b & 0x3) << 6) | src6.a; - dst8.a = 255; - - ocol0 = float4(dst8) / 255.f; -})GLSL"; - -constexpr const char* GLSL_RGB8_TO_RGBA6_FS = R"GLSL( -flat in int layer; -out vec4 ocol0; -void main() { - ivec4 src8 = ivec4(round(sampleEFB(ivec3(gl_FragCoord.xy, layer)) * 255.f)); - ivec4 dst6; - - dst6.r = src8.r >> 2; - dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4); - dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6); - dst6.a = src8.b & 0x3F; - ocol0 = float4(dst6) / 63.f; -})GLSL"; - -constexpr const char* GLSL_GS = R"GLSL( -layout(triangles) in; -layout(triangle_strip, max_vertices = %d) out; -flat out int layer; -void main() { - for (int j = 0; j < %d; ++j) { - for (int i = 0; i < 3; ++i) { - layer = j; - gl_Layer = j; - gl_Position = gl_in[i].gl_Position; - EmitVertex(); - } - EndPrimitive(); - } -})GLSL"; - -constexpr const char* GLSL_EFB_POKE_VERTEX_VS = R"GLSL( -in vec2 rawpos; -in vec4 rawcolor0; // color -in int rawcolor1; // depth -out vec4 v_c; -out float v_z; -void main(void) { - gl_Position = vec4(((rawpos + 0.5) / vec2(640.0, 528.0) * 2.0 - 1.0) * vec2(1.0, -1.0), 0.0, 1.0); - gl_PointSize = %d.0 / 640.0; - - v_c = rawcolor0.bgra; - v_z = float(rawcolor1 & 0xFFFFFF) / 16777216.0; -})GLSL"; - -constexpr const char* GLSL_EFB_POKE_PIXEL_FS = R"GLSL( -in vec4 %s_c; -in float %s_z; -out vec4 ocol0; -void main(void) { - ocol0 = %s_c; - gl_FragDepth = %s_z; -})GLSL"; - -constexpr const char* GLSL_EFB_POKE_GEOMETRY_GS = R"GLSL( -layout(points) in; -layout(points, max_vertices = %d) out; -in vec4 v_c[1]; -in float v_z[1]; -out vec4 g_c; -out float g_z; -void main() { - for (int j = 0; j < %d; ++j) { - gl_Layer = j; - gl_Position = gl_in[0].gl_Position; - gl_PointSize = %d.0 / 640.0; - g_c = v_c[0]; - g_z = v_z[0]; - - EmitVertex(); - EndPrimitive(); - } -})GLSL"; - -namespace OGL -{ -int FramebufferManager::m_targetWidth; -int FramebufferManager::m_targetHeight; -int FramebufferManager::m_msaaSamples; -bool FramebufferManager::m_enable_stencil_buffer; - -GLenum FramebufferManager::m_textureType; -std::vector FramebufferManager::m_efbFramebuffer; -GLuint FramebufferManager::m_efbColor; -GLuint FramebufferManager::m_efbDepth; -GLuint FramebufferManager::m_efbColorSwap; // for hot swap when reinterpreting EFB pixel formats - -// Only used in MSAA mode. -std::vector FramebufferManager::m_resolvedFramebuffer; -GLuint FramebufferManager::m_resolvedColorTexture; -GLuint FramebufferManager::m_resolvedDepthTexture; - -// reinterpret pixel format -SHADER FramebufferManager::m_pixel_format_shaders[2]; - -// EFB pokes -GLuint FramebufferManager::m_EfbPokes_VBO; -GLuint FramebufferManager::m_EfbPokes_VAO; -SHADER FramebufferManager::m_EfbPokes; - -GLuint FramebufferManager::CreateTexture(GLenum texture_type, GLenum internal_format, - GLenum pixel_format, GLenum data_type) -{ - GLuint texture; - glActiveTexture(GL_TEXTURE9); - glGenTextures(1, &texture); - glBindTexture(texture_type, texture); - if (texture_type == GL_TEXTURE_2D_ARRAY) - { - glTexParameteri(texture_type, GL_TEXTURE_MAX_LEVEL, 0); - glTexImage3D(texture_type, 0, internal_format, m_targetWidth, m_targetHeight, m_EFBLayers, 0, - pixel_format, data_type, nullptr); - } - else if (texture_type == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) - { - if (g_ogl_config.bSupports3DTextureStorageMultisample) - glTexStorage3DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth, - m_targetHeight, m_EFBLayers, false); - else - glTexImage3DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth, - m_targetHeight, m_EFBLayers, false); - } - else if (texture_type == GL_TEXTURE_2D_MULTISAMPLE) - { - if (g_ogl_config.bSupports2DTextureStorageMultisample) - glTexStorage2DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth, - m_targetHeight, false); - else - glTexImage2DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth, - m_targetHeight, false); - } - else - { - PanicAlert("Unhandled texture type %d", texture_type); - } - glBindTexture(texture_type, 0); - return texture; -} - -void FramebufferManager::BindLayeredTexture(GLuint texture, const std::vector& framebuffers, - GLenum attachment, GLenum texture_type) -{ - glBindFramebuffer(GL_FRAMEBUFFER, framebuffers[0]); - FramebufferTexture(GL_FRAMEBUFFER, attachment, texture_type, texture, 0); - // Bind all the other layers as separate FBOs for blitting. - for (unsigned int i = 1; i < m_EFBLayers; i++) - { - glBindFramebuffer(GL_FRAMEBUFFER, framebuffers[i]); - glFramebufferTextureLayer(GL_FRAMEBUFFER, attachment, texture, 0, i); - } -} - -bool FramebufferManager::HasStencilBuffer() -{ - return m_enable_stencil_buffer; -} - -FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int msaaSamples, - bool enable_stencil_buffer) -{ - m_efbColor = 0; - m_efbDepth = 0; - m_efbColorSwap = 0; - m_resolvedColorTexture = 0; - m_resolvedDepthTexture = 0; - - m_targetWidth = targetWidth; - m_targetHeight = targetHeight; - m_msaaSamples = msaaSamples; - m_enable_stencil_buffer = enable_stencil_buffer; - - // The EFB can be set to different pixel formats by the game through the - // BPMEM_ZCOMPARE register (which should probably have a different name). - // They are: - // - 24-bit RGB (8-bit components) with 24-bit Z - // - 24-bit RGBA (6-bit components) with 24-bit Z - // - Multisampled 16-bit RGB (5-6-5 format) with 16-bit Z - // We only use one EFB format here: 32-bit ARGB with 24-bit Z. - // Multisampling depends on user settings. - // The distinction becomes important for certain operations, i.e. the - // alpha channel should be ignored if the EFB does not have one. - - glActiveTexture(GL_TEXTURE9); - - m_EFBLayers = (g_ActiveConfig.stereo_mode != StereoMode::Off) ? 2 : 1; - m_efbFramebuffer.resize(m_EFBLayers); - m_resolvedFramebuffer.resize(m_EFBLayers); - - GLenum depth_internal_format = GL_DEPTH_COMPONENT32F; - GLenum depth_pixel_format = GL_DEPTH_COMPONENT; - GLenum depth_data_type = GL_FLOAT; - if (m_enable_stencil_buffer) - { - depth_internal_format = GL_DEPTH32F_STENCIL8; - depth_pixel_format = GL_DEPTH_STENCIL; - depth_data_type = GL_FLOAT_32_UNSIGNED_INT_24_8_REV; - } - - const bool multilayer = m_EFBLayers > 1; - - if (m_msaaSamples <= 1) - { - m_textureType = GL_TEXTURE_2D_ARRAY; - } - else - { - // Only use a layered multisample texture if needed. Some drivers - // slow down significantly with single-layered multisample textures. - m_textureType = multilayer ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_MULTISAMPLE; - - // Although we are able to access the multisampled texture directly, we don't do it - // everywhere. The old way is to "resolve" this multisampled texture by copying it into a - // non-sampled texture. This would lead to an unneeded copy of the EFB, so we are going to - // avoid it. But as this job isn't done right now, we do need that texture for resolving: - GLenum resolvedType = GL_TEXTURE_2D_ARRAY; - - m_resolvedColorTexture = CreateTexture(resolvedType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); - m_resolvedDepthTexture = - CreateTexture(resolvedType, depth_internal_format, depth_pixel_format, depth_data_type); - - // Bind resolved textures to resolved framebuffer. - glGenFramebuffers(m_EFBLayers, m_resolvedFramebuffer.data()); - BindLayeredTexture(m_resolvedColorTexture, m_resolvedFramebuffer, GL_COLOR_ATTACHMENT0, - resolvedType); - BindLayeredTexture(m_resolvedDepthTexture, m_resolvedFramebuffer, GL_DEPTH_ATTACHMENT, - resolvedType); - if (m_enable_stencil_buffer) - BindLayeredTexture(m_resolvedDepthTexture, m_resolvedFramebuffer, GL_STENCIL_ATTACHMENT, - resolvedType); - } - - m_efbColor = CreateTexture(m_textureType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); - m_efbDepth = - CreateTexture(m_textureType, depth_internal_format, depth_pixel_format, depth_data_type); - m_efbColorSwap = CreateTexture(m_textureType, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE); - - // Bind target textures to EFB framebuffer. - glGenFramebuffers(m_EFBLayers, m_efbFramebuffer.data()); - BindLayeredTexture(m_efbColor, m_efbFramebuffer, GL_COLOR_ATTACHMENT0, m_textureType); - BindLayeredTexture(m_efbDepth, m_efbFramebuffer, GL_DEPTH_ATTACHMENT, m_textureType); - if (m_enable_stencil_buffer) - BindLayeredTexture(m_efbDepth, m_efbFramebuffer, GL_STENCIL_ATTACHMENT, m_textureType); - - // EFB framebuffer is currently bound, make sure to clear it before use. - glViewport(0, 0, m_targetWidth, m_targetHeight); - glScissor(0, 0, m_targetWidth, m_targetHeight); - glClearColor(0.f, 0.f, 0.f, 0.f); - glClearDepthf(1.0f); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - if (m_enable_stencil_buffer) - { - glClearStencil(0); - glClear(GL_STENCIL_BUFFER_BIT); - } - - // reinterpret pixel format - std::string vs = GLSL_REINTERPRET_PIXELFMT_VS; - - // The way to sample the EFB is based on the on the current configuration. - // As we use the same sampling way for both interpreting shaders, the sampling - // shader are generated first: - std::string sampler; - - if (m_msaaSamples <= 1) - { - // non-msaa, so just fetch the pixel - sampler = StringFromFormat(GLSL_SHADER_FS, multilayer, false); - } - else if (g_ActiveConfig.backend_info.bSupportsSSAA) - { - // msaa + sample shading available, so just fetch the sample - // This will lead to sample shading, but it's the only way to not loose - // the values of each sample. - sampler = StringFromFormat(GLSL_SHADER_FS, multilayer, true); - } - else - { - // msaa without sample shading: calculate the mean value of the pixel - sampler = StringFromFormat(GLSL_SAMPLE_EFB_FS, multilayer, m_msaaSamples, m_msaaSamples); - } - - std::string ps_rgba6_to_rgb8 = sampler + GLSL_RGBA6_TO_RGB8_FS; - - std::string ps_rgb8_to_rgba6 = sampler + GLSL_RGB8_TO_RGBA6_FS; - - std::string gs = StringFromFormat(GLSL_GS, m_EFBLayers * 3, m_EFBLayers); - - ProgramShaderCache::CompileShader(m_pixel_format_shaders[0], vs, ps_rgb8_to_rgba6, - multilayer ? gs : ""); - ProgramShaderCache::CompileShader(m_pixel_format_shaders[1], vs, ps_rgba6_to_rgb8, - multilayer ? gs : ""); - - const auto prefix = multilayer ? "g" : "v"; - - ProgramShaderCache::CompileShader( - m_EfbPokes, StringFromFormat(GLSL_EFB_POKE_VERTEX_VS, m_targetWidth), - - StringFromFormat(GLSL_EFB_POKE_PIXEL_FS, prefix, prefix, prefix, prefix), - - multilayer ? - StringFromFormat(GLSL_EFB_POKE_GEOMETRY_GS, m_EFBLayers, m_EFBLayers, m_targetWidth) : - ""); - glGenBuffers(1, &m_EfbPokes_VBO); - glGenVertexArrays(1, &m_EfbPokes_VAO); - glBindBuffer(GL_ARRAY_BUFFER, m_EfbPokes_VBO); - glBindVertexArray(m_EfbPokes_VAO); - glEnableVertexAttribArray(SHADER_POSITION_ATTRIB); - glVertexAttribPointer(SHADER_POSITION_ATTRIB, 2, GL_UNSIGNED_SHORT, 0, sizeof(EfbPokeData), - (void*)offsetof(EfbPokeData, x)); - glEnableVertexAttribArray(SHADER_COLOR0_ATTRIB); - glVertexAttribPointer(SHADER_COLOR0_ATTRIB, 4, GL_UNSIGNED_BYTE, 1, sizeof(EfbPokeData), - (void*)offsetof(EfbPokeData, data)); - glEnableVertexAttribArray(SHADER_COLOR1_ATTRIB); - glVertexAttribIPointer(SHADER_COLOR1_ATTRIB, 1, GL_INT, sizeof(EfbPokeData), - (void*)offsetof(EfbPokeData, data)); - glBindBuffer(GL_ARRAY_BUFFER, - static_cast(g_vertex_manager.get())->GetVertexBufferHandle()); - - if (!static_cast(g_renderer.get())->IsGLES()) - glEnable(GL_PROGRAM_POINT_SIZE); -} - -FramebufferManager::~FramebufferManager() -{ - glBindFramebuffer(GL_FRAMEBUFFER, 0); - - GLuint glObj[3]; - - // Note: OpenGL deletion functions silently ignore parameters of "0". - - glDeleteFramebuffers(m_EFBLayers, m_efbFramebuffer.data()); - glDeleteFramebuffers(m_EFBLayers, m_resolvedFramebuffer.data()); - - // Required, as these are static class members - m_efbFramebuffer.clear(); - m_resolvedFramebuffer.clear(); - - glObj[0] = m_resolvedColorTexture; - glObj[1] = m_resolvedDepthTexture; - glDeleteTextures(2, glObj); - m_resolvedColorTexture = 0; - m_resolvedDepthTexture = 0; - - glObj[0] = m_efbColor; - glObj[1] = m_efbDepth; - glObj[2] = m_efbColorSwap; - glDeleteTextures(3, glObj); - m_efbColor = 0; - m_efbDepth = 0; - m_efbColorSwap = 0; - - // reinterpret pixel format - m_pixel_format_shaders[0].Destroy(); - m_pixel_format_shaders[1].Destroy(); - - // EFB pokes - glDeleteBuffers(1, &m_EfbPokes_VBO); - glDeleteVertexArrays(1, &m_EfbPokes_VAO); - m_EfbPokes_VBO = 0; - m_EfbPokes_VAO = 0; - m_EfbPokes.Destroy(); -} - -GLuint FramebufferManager::GetEFBColorTexture(const EFBRectangle& sourceRc) -{ - if (m_msaaSamples <= 1) - { - return m_efbColor; - } - else - { - // Transfer the EFB to a resolved texture. EXT_framebuffer_blit is - // required. - - TargetRectangle targetRc = g_renderer->ConvertEFBRectangle(sourceRc); - targetRc.ClampUL(0, 0, m_targetWidth, m_targetHeight); - - // Resolve. - for (unsigned int i = 0; i < m_EFBLayers; i++) - { - glBindFramebuffer(GL_READ_FRAMEBUFFER, m_efbFramebuffer[i]); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_resolvedFramebuffer[i]); - glBlitFramebuffer(targetRc.left, targetRc.top, targetRc.right, targetRc.bottom, targetRc.left, - targetRc.top, targetRc.right, targetRc.bottom, GL_COLOR_BUFFER_BIT, - GL_NEAREST); - } - - // Return to EFB. - glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer[0]); - - return m_resolvedColorTexture; - } -} - -GLuint FramebufferManager::GetEFBDepthTexture(const EFBRectangle& sourceRc) -{ - if (m_msaaSamples <= 1) - { - return m_efbDepth; - } - else - { - // Transfer the EFB to a resolved texture. - - TargetRectangle targetRc = g_renderer->ConvertEFBRectangle(sourceRc); - targetRc.ClampUL(0, 0, m_targetWidth, m_targetHeight); - - // Resolve. - for (unsigned int i = 0; i < m_EFBLayers; i++) - { - glBindFramebuffer(GL_READ_FRAMEBUFFER, m_efbFramebuffer[i]); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_resolvedFramebuffer[i]); - glBlitFramebuffer(targetRc.left, targetRc.top, targetRc.right, targetRc.bottom, targetRc.left, - targetRc.top, targetRc.right, targetRc.bottom, GL_DEPTH_BUFFER_BIT, - GL_NEAREST); - } - - // Return to EFB. - glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer[0]); - - return m_resolvedDepthTexture; - } -} - -void FramebufferManager::ResolveEFBStencilTexture() -{ - if (m_msaaSamples <= 1) - return; - - // Resolve. - for (unsigned int i = 0; i < m_EFBLayers; i++) - { - glBindFramebuffer(GL_READ_FRAMEBUFFER, m_efbFramebuffer[i]); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_resolvedFramebuffer[i]); - glBlitFramebuffer(0, 0, m_targetWidth, m_targetHeight, 0, 0, m_targetWidth, m_targetHeight, - GL_STENCIL_BUFFER_BIT, GL_NEAREST); - } - - // Return to EFB. - glBindFramebuffer(GL_FRAMEBUFFER, m_efbFramebuffer[0]); -} - -GLuint FramebufferManager::GetResolvedFramebuffer() -{ - if (m_msaaSamples <= 1) - return m_efbFramebuffer[0]; - return m_resolvedFramebuffer[0]; -} - -void FramebufferManager::SetFramebuffer(GLuint fb) -{ - glBindFramebuffer(GL_FRAMEBUFFER, fb != 0 ? fb : GetEFBFramebuffer()); -} - -void FramebufferManager::FramebufferTexture(GLenum target, GLenum attachment, GLenum textarget, - GLuint texture, GLint level) -{ - if (textarget == GL_TEXTURE_2D_ARRAY || textarget == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) - { - if (m_EFBLayers > 1) - glFramebufferTexture(target, attachment, texture, level); - else - glFramebufferTextureLayer(target, attachment, texture, level, 0); - } - else - { - glFramebufferTexture2D(target, attachment, textarget, texture, level); - } -} - -// Apply AA if enabled -GLuint FramebufferManager::ResolveAndGetRenderTarget(const EFBRectangle& source_rect) -{ - return GetEFBColorTexture(source_rect); -} - -GLuint FramebufferManager::ResolveAndGetDepthTarget(const EFBRectangle& source_rect) -{ - return GetEFBDepthTexture(source_rect); -} - -void FramebufferManager::ReinterpretPixelData(unsigned int convtype) -{ - g_renderer->ResetAPIState(); - - GLuint src_texture = 0; - - // We aren't allowed to render and sample the same texture in one draw call, - // so we have to create a new texture and overwrite it completely. - // To not allocate one big texture every time, we've allocated two on - // initialization and just swap them here: - src_texture = m_efbColor; - m_efbColor = m_efbColorSwap; - m_efbColorSwap = src_texture; - FramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_textureType, m_efbColor, 0); - - glViewport(0, 0, m_targetWidth, m_targetHeight); - glActiveTexture(GL_TEXTURE9); - glBindTexture(m_textureType, src_texture); - g_sampler_cache->BindNearestSampler(9); - - m_pixel_format_shaders[convtype ? 1 : 0].Bind(); - ProgramShaderCache::BindVertexFormat(nullptr); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - glBindTexture(m_textureType, 0); - - g_renderer->RestoreAPIState(); -} - -void FramebufferManager::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) -{ - g_renderer->ResetAPIState(); - - if (type == EFBAccessType::PokeZ) - { - glDepthMask(GL_TRUE); - glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); - glEnable(GL_DEPTH_TEST); - glDepthFunc(GL_ALWAYS); - } - - glBindVertexArray(m_EfbPokes_VAO); - glBindBuffer(GL_ARRAY_BUFFER, m_EfbPokes_VBO); - glBufferData(GL_ARRAY_BUFFER, sizeof(EfbPokeData) * num_points, points, GL_STREAM_DRAW); - m_EfbPokes.Bind(); - glViewport(0, 0, m_targetWidth, m_targetHeight); - glDrawArrays(GL_POINTS, 0, (GLsizei)num_points); - - glBindBuffer(GL_ARRAY_BUFFER, - static_cast(g_vertex_manager.get())->GetVertexBufferHandle()); - g_renderer->RestoreAPIState(); - - // TODO: Could just update the EFB cache with the new value - ClearEFBCache(); -} - -} // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/FramebufferManager.h b/Source/Core/VideoBackends/OGL/FramebufferManager.h deleted file mode 100644 index f68556bf90..0000000000 --- a/Source/Core/VideoBackends/OGL/FramebufferManager.h +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2009 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include - -#include "Common/CommonTypes.h" -#include "Common/GL/GLUtil.h" -#include "VideoBackends/OGL/ProgramShaderCache.h" -#include "VideoBackends/OGL/Render.h" -#include "VideoCommon/FramebufferManagerBase.h" - -// On the GameCube, the game sends a request for the graphics processor to -// transfer its internal EFB (Embedded Framebuffer) to an area in GameCube RAM -// called the XFB (External Framebuffer). The size and location of the XFB is -// decided at the time of the copy, and the format is always YUYV. The video -// interface is given a pointer to the XFB, which will be decoded and -// displayed on the TV. -// -// There are two ways for Dolphin to emulate this: -// -// Real XFB mode: -// -// Dolphin will behave like the GameCube and encode the EFB to -// a portion of GameCube RAM. The emulated video interface will decode the data -// for output to the screen. -// -// Advantages: Behaves exactly like the GameCube. -// Disadvantages: Resolution will be limited. -// -// Virtual XFB mode: -// -// When a request is made to copy the EFB to an XFB, Dolphin -// will remember the RAM location and size of the XFB in a Virtual XFB list. -// The video interface will look up the XFB in the list and use the enhanced -// data stored there, if available. -// -// Advantages: Enables high resolution graphics, better than real hardware. -// Disadvantages: If the GameCube CPU writes directly to the XFB (which is -// possible but uncommon), the Virtual XFB will not capture this information. - -// There may be multiple XFBs in GameCube RAM. This is the maximum number to -// virtualize. - -namespace OGL -{ -class FramebufferManager : public FramebufferManagerBase -{ -public: - FramebufferManager(int targetWidth, int targetHeight, int msaaSamples, - bool enable_stencil_buffer); - ~FramebufferManager(); - - // To get the EFB in texture form, these functions may have to transfer - // the EFB to a resolved texture first. - static GLuint GetEFBColorTexture(const EFBRectangle& sourceRc); - static GLuint GetEFBDepthTexture(const EFBRectangle& sourceRc); - static void ResolveEFBStencilTexture(); - - static GLuint GetEFBFramebuffer(unsigned int layer = 0) - { - return (layer < m_EFBLayers) ? m_efbFramebuffer[layer] : m_efbFramebuffer.back(); - } - // Resolved framebuffer is only used in MSAA mode. - static GLuint GetResolvedFramebuffer(); - static void SetFramebuffer(GLuint fb); - static void FramebufferTexture(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, - GLint level); - - // If in MSAA mode, this will perform a resolve of the specified rectangle, and return the resolve - // target as a texture ID. - // Thus, this call may be expensive. Don't repeat it unnecessarily. - // If not in MSAA mode, will just return the render target texture ID. - // After calling this, before you render anything else, you MUST bind the framebuffer you want to - // draw to. - static GLuint ResolveAndGetRenderTarget(const EFBRectangle& rect); - - // Same as above but for the depth Target. - // After calling this, before you render anything else, you MUST bind the framebuffer you want to - // draw to. - static GLuint ResolveAndGetDepthTarget(const EFBRectangle& rect); - - // Convert EFB content on pixel format change. - // convtype=0 -> rgb8->rgba6, convtype=2 -> rgba6->rgb8 - static void ReinterpretPixelData(unsigned int convtype); - - static void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points); - static bool HasStencilBuffer(); - -private: - GLuint CreateTexture(GLenum texture_type, GLenum internal_format, GLenum pixel_format, - GLenum data_type); - void BindLayeredTexture(GLuint texture, const std::vector& framebuffers, - GLenum attachment, GLenum texture_type); - - static int m_targetWidth; - static int m_targetHeight; - static int m_msaaSamples; - - static GLenum m_textureType; - static std::vector m_efbFramebuffer; - static GLuint m_efbColor; - static GLuint m_efbDepth; - static GLuint - m_efbColorSwap; // will be hot swapped with m_efbColor when reinterpreting EFB pixel formats - - static bool m_enable_stencil_buffer; - - // Only used in MSAA mode, TODO: try to avoid them - static std::vector m_resolvedFramebuffer; - static GLuint m_resolvedColorTexture; - static GLuint m_resolvedDepthTexture; - - // For pixel format draw - static SHADER m_pixel_format_shaders[2]; - - // For EFB pokes - static GLuint m_EfbPokes_VBO; - static GLuint m_EfbPokes_VAO; - static SHADER m_EfbPokes; -}; - -} // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/NativeVertexFormat.cpp b/Source/Core/VideoBackends/OGL/NativeVertexFormat.cpp index df24809ae0..50a6276c72 100644 --- a/Source/Core/VideoBackends/OGL/NativeVertexFormat.cpp +++ b/Source/Core/VideoBackends/OGL/NativeVertexFormat.cpp @@ -7,6 +7,7 @@ #include "Common/MsgHandler.h" #include "VideoBackends/OGL/ProgramShaderCache.h" +#include "VideoBackends/OGL/Render.h" #include "VideoBackends/OGL/VertexManager.h" #include "VideoCommon/NativeVertexFormat.h" @@ -18,7 +19,7 @@ namespace OGL { std::unique_ptr -VertexManager::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) +Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) { return std::make_unique(vtx_decl); } @@ -44,10 +45,10 @@ static void SetPointer(u32 attrib, u32 stride, const AttributeFormat& format) (u8*)nullptr + format.offset); } -GLVertexFormat::GLVertexFormat(const PortableVertexDeclaration& _vtx_decl) +GLVertexFormat::GLVertexFormat(const PortableVertexDeclaration& vtx_decl) + : NativeVertexFormat(vtx_decl) { - this->vtx_decl = _vtx_decl; - u32 vertex_stride = _vtx_decl.stride; + u32 vertex_stride = vtx_decl.stride; // We will not allow vertex components causing uneven strides. if (vertex_stride & 3) @@ -63,22 +64,22 @@ GLVertexFormat::GLVertexFormat(const PortableVertexDeclaration& _vtx_decl) glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vm->GetIndexBufferHandle()); glBindBuffer(GL_ARRAY_BUFFER, vm->GetVertexBufferHandle()); - SetPointer(SHADER_POSITION_ATTRIB, vertex_stride, _vtx_decl.position); + SetPointer(SHADER_POSITION_ATTRIB, vertex_stride, vtx_decl.position); for (int i = 0; i < 3; i++) - SetPointer(SHADER_NORM0_ATTRIB + i, vertex_stride, _vtx_decl.normals[i]); + SetPointer(SHADER_NORM0_ATTRIB + i, vertex_stride, vtx_decl.normals[i]); for (int i = 0; i < 2; i++) - SetPointer(SHADER_COLOR0_ATTRIB + i, vertex_stride, _vtx_decl.colors[i]); + SetPointer(SHADER_COLOR0_ATTRIB + i, vertex_stride, vtx_decl.colors[i]); for (int i = 0; i < 8; i++) - SetPointer(SHADER_TEXTURE0_ATTRIB + i, vertex_stride, _vtx_decl.texcoords[i]); + SetPointer(SHADER_TEXTURE0_ATTRIB + i, vertex_stride, vtx_decl.texcoords[i]); - SetPointer(SHADER_POSMTX_ATTRIB, vertex_stride, _vtx_decl.posmtx); + SetPointer(SHADER_POSMTX_ATTRIB, vertex_stride, vtx_decl.posmtx); } GLVertexFormat::~GLVertexFormat() { glDeleteVertexArrays(1, &VAO); } -} +} // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/OGL.vcxproj b/Source/Core/VideoBackends/OGL/OGL.vcxproj index 38ea16b42e..7395b6ef2b 100644 --- a/Source/Core/VideoBackends/OGL/OGL.vcxproj +++ b/Source/Core/VideoBackends/OGL/OGL.vcxproj @@ -40,17 +40,13 @@ - - - - @@ -58,16 +54,12 @@ - - - - diff --git a/Source/Core/VideoBackends/OGL/OGL.vcxproj.filters b/Source/Core/VideoBackends/OGL/OGL.vcxproj.filters index fbede5caf6..4077f05a37 100644 --- a/Source/Core/VideoBackends/OGL/OGL.vcxproj.filters +++ b/Source/Core/VideoBackends/OGL/OGL.vcxproj.filters @@ -18,21 +18,12 @@ Decoder - - GLUtil - Render - - Render - Render - - Render - Render @@ -42,9 +33,6 @@ Render - - Render - @@ -61,21 +49,12 @@ Decoder - - GLUtil - Render - - Render - Render - - Render - Render @@ -85,9 +64,6 @@ Render - - Render - diff --git a/Source/Core/VideoBackends/OGL/OGLShader.cpp b/Source/Core/VideoBackends/OGL/OGLShader.cpp index e881269524..340d79f244 100644 --- a/Source/Core/VideoBackends/OGL/OGLShader.cpp +++ b/Source/Core/VideoBackends/OGL/OGLShader.cpp @@ -24,23 +24,24 @@ static GLenum GetGLShaderTypeForStage(ShaderStage stage) } } -OGLShader::OGLShader(ShaderStage stage, GLenum gl_type, GLuint shader_id) - : AbstractShader(stage), m_type(gl_type), m_id(shader_id) +OGLShader::OGLShader(ShaderStage stage, GLenum gl_type, GLuint gl_id) + : AbstractShader(stage), m_id(ProgramShaderCache::GenerateShaderID()), m_type(gl_type), + m_gl_id(gl_id) { } -OGLShader::OGLShader(GLuint compute_program_id) - : AbstractShader(ShaderStage::Compute), m_type(GL_COMPUTE_SHADER), - m_compute_program_id(compute_program_id) +OGLShader::OGLShader(GLuint gl_compute_program_id) + : AbstractShader(ShaderStage::Compute), m_id(ProgramShaderCache::GenerateShaderID()), + m_type(GL_COMPUTE_SHADER), m_gl_compute_program_id(gl_compute_program_id) { } OGLShader::~OGLShader() { if (m_stage != ShaderStage::Compute) - glDeleteShader(m_id); + glDeleteShader(m_gl_id); else - glDeleteProgram(m_compute_program_id); + glDeleteProgram(m_gl_compute_program_id); } bool OGLShader::HasBinary() const diff --git a/Source/Core/VideoBackends/OGL/OGLShader.h b/Source/Core/VideoBackends/OGL/OGLShader.h index e3036044f6..a703d60696 100644 --- a/Source/Core/VideoBackends/OGL/OGLShader.h +++ b/Source/Core/VideoBackends/OGL/OGLShader.h @@ -16,13 +16,14 @@ namespace OGL class OGLShader final : public AbstractShader { public: - explicit OGLShader(ShaderStage stage, GLenum gl_type, GLuint shader_id); - explicit OGLShader(GLuint compute_program_id); + explicit OGLShader(ShaderStage stage, GLenum gl_type, GLuint gl_id); + explicit OGLShader(GLuint gl_compute_program_id); ~OGLShader() override; + u64 GetID() const { return m_id; } GLenum GetGLShaderType() const { return m_type; } - GLuint GetGLShaderID() const { return m_id; } - GLuint GetGLComputeProgramID() const { return m_compute_program_id; } + GLuint GetGLShaderID() const { return m_gl_id; } + GLuint GetGLComputeProgramID() const { return m_gl_compute_program_id; } bool HasBinary() const override; BinaryData GetBinary() const override; @@ -30,9 +31,10 @@ public: size_t length); private: + u64 m_id; GLenum m_type; - GLuint m_id = 0; - GLuint m_compute_program_id = 0; + GLuint m_gl_id = 0; + GLuint m_gl_compute_program_id = 0; }; } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/OGLTexture.cpp b/Source/Core/VideoBackends/OGL/OGLTexture.cpp index fbe7f576c7..3cdbaca301 100644 --- a/Source/Core/VideoBackends/OGL/OGLTexture.cpp +++ b/Source/Core/VideoBackends/OGL/OGLTexture.cpp @@ -6,13 +6,8 @@ #include "Common/CommonTypes.h" #include "Common/MsgHandler.h" -#include "VideoBackends/OGL/FramebufferManager.h" #include "VideoBackends/OGL/OGLTexture.h" #include "VideoBackends/OGL/SamplerCache.h" -#include "VideoBackends/OGL/TextureCache.h" - -#include "VideoCommon/ImageWrite.h" -#include "VideoCommon/TextureConfig.h" namespace OGL { @@ -115,10 +110,9 @@ OGLTexture::OGLTexture(const TextureConfig& tex_config) : AbstractTexture(tex_co DEBUG_ASSERT_MSG(VIDEO, !tex_config.IsMultisampled() || tex_config.levels == 1, "OpenGL does not support multisampled textures with mip levels"); - GLenum target = - tex_config.IsMultisampled() ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; + const GLenum target = GetGLTarget(); glGenTextures(1, &m_texId); - glActiveTexture(GL_TEXTURE9); + glActiveTexture(GL_MUTABLE_TEXTURE_INDEX); glBindTexture(target, m_texId); glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, m_config.levels - 1); @@ -139,7 +133,7 @@ OGLTexture::OGLTexture(const TextureConfig& tex_config) : AbstractTexture(tex_co m_config.layers); } - if (m_config.rendertarget) + if (m_config.IsRenderTarget()) { // We can't render to compressed formats. ASSERT(!IsCompressedFormat(m_config.format)); @@ -147,40 +141,19 @@ OGLTexture::OGLTexture(const TextureConfig& tex_config) : AbstractTexture(tex_co { for (u32 level = 0; level < m_config.levels; level++) { - glTexImage3D(target, level, GL_RGBA, std::max(m_config.width >> level, 1u), - std::max(m_config.height >> level, 1u), m_config.layers, 0, GL_RGBA, - GL_UNSIGNED_BYTE, nullptr); + glTexImage3D(target, level, gl_internal_format, std::max(m_config.width >> level, 1u), + std::max(m_config.height >> level, 1u), m_config.layers, 0, + GetGLFormatForTextureFormat(m_config.format), + GetGLTypeForTextureFormat(m_config.format), nullptr); } } - glGenFramebuffers(1, &m_framebuffer); - FramebufferManager::SetFramebuffer(m_framebuffer); - FramebufferManager::FramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, target, m_texId, - 0); - - // We broke the framebuffer binding here, and need to restore it, as the CreateTexture - // method is in the base renderer class and can be called by VideoCommon. - FramebufferManager::SetFramebuffer(0); } } OGLTexture::~OGLTexture() { - g_renderer->UnbindTexture(this); - if (m_texId) - glDeleteTextures(1, &m_texId); - - if (m_framebuffer) - glDeleteFramebuffers(1, &m_framebuffer); -} - -GLuint OGLTexture::GetRawTexIdentifier() const -{ - return m_texId; -} - -GLuint OGLTexture::GetFramebuffer() const -{ - return m_framebuffer; + Renderer::GetInstance()->UnbindTexture(this); + glDeleteTextures(1, &m_texId); } void OGLTexture::CopyRectangleFromTexture(const AbstractTexture* src, @@ -188,19 +161,18 @@ void OGLTexture::CopyRectangleFromTexture(const AbstractTexture* src, u32 src_level, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) { - const OGLTexture* srcentry = static_cast(src); + const OGLTexture* src_gltex = static_cast(src); ASSERT(src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.GetHeight() == dst_rect.GetHeight()); if (g_ogl_config.bSupportsCopySubImage) { - glCopyImageSubData(srcentry->m_texId, GL_TEXTURE_2D_ARRAY, src_level, src_rect.left, - src_rect.top, src_layer, m_texId, GL_TEXTURE_2D_ARRAY, dst_level, - dst_rect.left, dst_rect.top, dst_layer, dst_rect.GetWidth(), - dst_rect.GetHeight(), 1); + glCopyImageSubData(src_gltex->m_texId, src_gltex->GetGLTarget(), src_level, src_rect.left, + src_rect.top, src_layer, m_texId, GetGLTarget(), dst_level, dst_rect.left, + dst_rect.top, dst_layer, dst_rect.GetWidth(), dst_rect.GetHeight(), 1); } else { - BlitFramebuffer(const_cast(srcentry), src_rect, src_layer, src_level, dst_rect, + BlitFramebuffer(const_cast(src_gltex), src_rect, src_layer, src_level, dst_rect, dst_layer, dst_level); } } @@ -210,28 +182,12 @@ void OGLTexture::BlitFramebuffer(OGLTexture* srcentry, const MathUtil::Rectangle const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) { - // If it isn't a single leveled/layered texture, we need to update the framebuffer. - bool update_src_framebuffer = - srcentry->m_framebuffer == 0 || srcentry->m_config.layers != 0 || src_level != 0; - bool update_dst_framebuffer = m_framebuffer == 0 || m_config.layers != 0 || dst_level != 0; - if (!m_framebuffer) - glGenFramebuffers(1, &m_framebuffer); - if (!srcentry->m_framebuffer) - glGenFramebuffers(1, &const_cast(srcentry)->m_framebuffer); - - glBindFramebuffer(GL_READ_FRAMEBUFFER, srcentry->m_framebuffer); - if (update_src_framebuffer) - { - glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcentry->m_texId, - src_level, src_layer); - } - - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_framebuffer); - if (update_dst_framebuffer) - { - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_texId, dst_level, - dst_layer); - } + Renderer::GetInstance()->BindSharedReadFramebuffer(); + glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcentry->m_texId, src_level, + src_layer); + Renderer::GetInstance()->BindSharedDrawFramebuffer(); + glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_texId, dst_level, + dst_layer); // glBlitFramebuffer is still affected by the scissor test, which is enabled by default. glDisable(GL_SCISSOR_TEST); @@ -239,50 +195,10 @@ void OGLTexture::BlitFramebuffer(OGLTexture* srcentry, const MathUtil::Rectangle glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, GL_NEAREST); - if (update_src_framebuffer) - { - FramebufferManager::FramebufferTexture( - GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - srcentry->m_config.IsMultisampled() ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY, - srcentry->m_texId, 0); - } - if (update_dst_framebuffer) - { - FramebufferManager::FramebufferTexture( - GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - m_config.IsMultisampled() ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY, m_texId, - 0); - } - // The default state for the scissor test is enabled. We don't need to do a full state // restore, as the framebuffer and scissor test are the only things we changed. glEnable(GL_SCISSOR_TEST); - FramebufferManager::SetFramebuffer(0); -} - -void OGLTexture::ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& srcrect, - const MathUtil::Rectangle& dstrect) -{ - const OGLTexture* srcentry = static_cast(source); - if (!m_framebuffer) - { - glGenFramebuffers(1, &m_framebuffer); - FramebufferManager::SetFramebuffer(m_framebuffer); - FramebufferManager::FramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - GL_TEXTURE_2D_ARRAY, m_texId, 0); - } - g_renderer->ResetAPIState(); - FramebufferManager::SetFramebuffer(m_framebuffer); - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_2D_ARRAY, srcentry->m_texId); - g_sampler_cache->BindLinearSampler(9); - glViewport(dstrect.left, dstrect.top, dstrect.GetWidth(), dstrect.GetHeight()); - TextureCache::GetInstance()->GetColorCopyProgram().Bind(); - glUniform4f(TextureCache::GetInstance()->GetColorCopyPositionUniform(), float(srcrect.left), - float(srcrect.top), float(srcrect.GetWidth()), float(srcrect.GetHeight())); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - g_renderer->RestoreAPIState(); + Renderer::GetInstance()->RestoreFramebufferBinding(); } void OGLTexture::ResolveFromTexture(const AbstractTexture* src, @@ -307,8 +223,9 @@ void OGLTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8 std::max(1u, m_config.width >> level), std::max(1u, m_config.height >> level), width, height); - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_2D_ARRAY, m_texId); + const GLenum target = GetGLTarget(); + glActiveTexture(GL_MUTABLE_TEXTURE_INDEX); + glBindTexture(target, m_texId); if (row_length != width) glPixelStorei(GL_UNPACK_ROW_LENGTH, row_length); @@ -318,12 +235,12 @@ void OGLTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8 { if (g_ogl_config.bSupportsTextureStorage) { - glCompressedTexSubImage3D(GL_TEXTURE_2D_ARRAY, level, 0, 0, 0, width, height, 1, - gl_internal_format, static_cast(buffer_size), buffer); + glCompressedTexSubImage3D(target, level, 0, 0, 0, width, height, 1, gl_internal_format, + static_cast(buffer_size), buffer); } else { - glCompressedTexImage3D(GL_TEXTURE_2D_ARRAY, level, gl_internal_format, width, height, 1, 0, + glCompressedTexImage3D(target, level, gl_internal_format, width, height, 1, 0, static_cast(buffer_size), buffer); } } @@ -333,13 +250,12 @@ void OGLTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8 GLenum gl_type = GetGLTypeForTextureFormat(m_config.format); if (g_ogl_config.bSupportsTextureStorage) { - glTexSubImage3D(GL_TEXTURE_2D_ARRAY, level, 0, 0, 0, width, height, 1, gl_format, gl_type, - buffer); + glTexSubImage3D(target, level, 0, 0, 0, width, height, 1, gl_format, gl_type, buffer); } else { - glTexImage3D(GL_TEXTURE_2D_ARRAY, level, gl_internal_format, width, height, 1, 0, gl_format, - gl_type, buffer); + glTexImage3D(target, level, gl_internal_format, width, height, 1, 0, gl_format, gl_type, + buffer); } } @@ -347,6 +263,11 @@ void OGLTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); } +GLenum OGLTexture::GetGLFormatForImageTexture() const +{ + return GetGLInternalFormatForTextureFormat(m_config.format, true); +} + OGLStagingTexture::OGLStagingTexture(StagingTextureType type, const TextureConfig& config, GLenum target, GLuint buffer_name, size_t buffer_size, char* map_ptr, size_t map_stride) @@ -405,8 +326,7 @@ std::unique_ptr OGLStagingTexture::Create(StagingTextureType } glBufferStorage(target, buffer_size, nullptr, buffer_flags); - buffer_ptr = - reinterpret_cast(glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, buffer_size, map_flags)); + buffer_ptr = reinterpret_cast(glMapBufferRange(target, 0, buffer_size, map_flags)); ASSERT(buffer_ptr != nullptr); } else @@ -426,7 +346,7 @@ void OGLStagingTexture::CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect) { - ASSERT(m_type == StagingTextureType::Readback); + ASSERT(m_type == StagingTextureType::Readback || m_type == StagingTextureType::Mutable); ASSERT(src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.GetHeight() == dst_rect.GetHeight()); ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= src->GetConfig().width && @@ -443,40 +363,37 @@ void OGLStagingTexture::CopyFromTexture(const AbstractTexture* src, glPixelStorei(GL_PACK_ROW_LENGTH, m_config.width); const OGLTexture* gltex = static_cast(src); - size_t dst_offset = dst_rect.top * m_config.GetStride() + dst_rect.left * m_texel_size; + const size_t dst_offset = dst_rect.top * m_config.GetStride() + dst_rect.left * m_texel_size; - // If we don't have a FBO associated with this texture, we need to use a slow path. - if (gltex->GetFramebuffer() != 0 && src_layer == 0 && src_level == 0) + // Prefer glGetTextureSubImage(), when available. + if (g_ogl_config.bSupportsTextureSubImage) { - // This texture has a framebuffer, so we can use glReadPixels(). - glBindFramebuffer(GL_READ_FRAMEBUFFER, gltex->GetFramebuffer()); - glReadPixels(src_rect.left, src_rect.top, src_rect.GetWidth(), src_rect.GetHeight(), - GetGLFormatForTextureFormat(m_config.format), - GetGLTypeForTextureFormat(m_config.format), reinterpret_cast(dst_offset)); - - // Reset both read/draw framebuffers. - glBindFramebuffer(GL_FRAMEBUFFER, FramebufferManager::GetEFBFramebuffer()); + glGetTextureSubImage( + gltex->GetGLTextureId(), src_level, src_rect.left, src_rect.top, src_layer, + src_rect.GetWidth(), src_rect.GetHeight(), 1, GetGLFormatForTextureFormat(src->GetFormat()), + GetGLTypeForTextureFormat(src->GetFormat()), + static_cast(m_buffer_size - dst_offset), reinterpret_cast(dst_offset)); } else { - if (g_ogl_config.bSupportsTextureSubImage) + // Mutate the shared framebuffer. + Renderer::GetInstance()->BindSharedReadFramebuffer(); + if (AbstractTexture::IsDepthFormat(gltex->GetFormat())) { - glGetTextureSubImage( - gltex->GetRawTexIdentifier(), src_level, src_rect.left, src_rect.top, src_layer, - src_rect.GetWidth(), src_rect.GetHeight(), 1, - GetGLFormatForTextureFormat(m_config.format), GetGLTypeForTextureFormat(m_config.format), - static_cast(m_buffer_size - dst_offset), reinterpret_cast(dst_offset)); + glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, 0, 0, 0); + glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, gltex->GetGLTextureId(), + src_level, src_layer); } else { - // TODO: Investigate whether it's faster to use glReadPixels() with a framebuffer, since we're - // copying the whole texture, which may waste bandwidth. So we're trading CPU work in creating - // the framebuffer for GPU work in copying potentially redundant texels. - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_2D_ARRAY, gltex->GetRawTexIdentifier()); - glGetTexImage(GL_TEXTURE_2D_ARRAY, src_level, GetGLFormatForTextureFormat(m_config.format), - GetGLTypeForTextureFormat(m_config.format), nullptr); + glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, gltex->GetGLTextureId(), + src_level, src_layer); + glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, 0, 0, 0); } + glReadPixels(src_rect.left, src_rect.top, src_rect.GetWidth(), src_rect.GetHeight(), + GetGLFormatForTextureFormat(src->GetFormat()), + GetGLTypeForTextureFormat(src->GetFormat()), reinterpret_cast(dst_offset)); + Renderer::GetInstance()->RestoreFramebufferBinding(); } glPixelStorei(GL_PACK_ROW_LENGTH, 0); @@ -501,7 +418,7 @@ void OGLStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) { - ASSERT(m_type == StagingTextureType::Upload); + ASSERT(m_type == StagingTextureType::Upload || m_type == StagingTextureType::Mutable); ASSERT(src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.GetHeight() == dst_rect.GetHeight()); ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= m_config.width && @@ -509,8 +426,9 @@ void OGLStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= dst->GetConfig().width && dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= dst->GetConfig().height); - size_t src_offset = src_rect.top * m_config.GetStride() + src_rect.left * m_texel_size; - size_t copy_size = src_rect.GetHeight() * m_config.GetStride(); + const OGLTexture* gltex = static_cast(dst); + const size_t src_offset = src_rect.top * m_config.GetStride() + src_rect.left * m_texel_size; + const size_t copy_size = src_rect.GetHeight() * m_config.GetStride(); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_buffer_name); glPixelStorei(GL_UNPACK_ROW_LENGTH, m_config.width); @@ -533,12 +451,12 @@ void OGLStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, } // Copy from the staging buffer to the texture object. - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_2D_ARRAY, static_cast(dst)->GetRawTexIdentifier()); - glTexSubImage3D(GL_TEXTURE_2D_ARRAY, 0, dst_rect.left, dst_rect.top, dst_layer, - dst_rect.GetWidth(), dst_rect.GetHeight(), 1, - GetGLFormatForTextureFormat(m_config.format), - GetGLTypeForTextureFormat(m_config.format), reinterpret_cast(src_offset)); + const GLenum target = gltex->GetGLTarget(); + glActiveTexture(GL_MUTABLE_TEXTURE_INDEX); + glBindTexture(target, gltex->GetGLTextureId()); + glTexSubImage3D(target, 0, dst_rect.left, dst_rect.top, dst_layer, dst_rect.GetWidth(), + dst_rect.GetHeight(), 1, GetGLFormatForTextureFormat(dst->GetFormat()), + GetGLTypeForTextureFormat(dst->GetFormat()), reinterpret_cast(src_offset)); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); @@ -602,10 +520,13 @@ void OGLStagingTexture::Unmap() m_map_pointer = nullptr; } -OGLFramebuffer::OGLFramebuffer(AbstractTextureFormat color_format, +OGLFramebuffer::OGLFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples, GLuint fbo) - : AbstractFramebuffer(color_format, depth_format, width, height, layers, samples), m_fbo(fbo) + : AbstractFramebuffer(color_attachment, depth_attachment, color_format, depth_format, width, + height, layers, samples), + m_fbo(fbo) { } @@ -614,8 +535,8 @@ OGLFramebuffer::~OGLFramebuffer() glDeleteFramebuffers(1, &m_fbo); } -std::unique_ptr OGLFramebuffer::Create(const OGLTexture* color_attachment, - const OGLTexture* depth_attachment) +std::unique_ptr OGLFramebuffer::Create(OGLTexture* color_attachment, + OGLTexture* depth_attachment) { if (!ValidateConfig(color_attachment, depth_attachment)) return nullptr; @@ -638,13 +559,13 @@ std::unique_ptr OGLFramebuffer::Create(const OGLTexture* color_a { if (color_attachment->GetConfig().layers > 1) { - glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - color_attachment->GetRawTexIdentifier(), 0); + glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, color_attachment->GetGLTextureId(), + 0); } else { glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - color_attachment->GetRawTexIdentifier(), 0, 0); + color_attachment->GetGLTextureId(), 0, 0); } } @@ -655,19 +576,26 @@ std::unique_ptr OGLFramebuffer::Create(const OGLTexture* color_a GL_DEPTH_ATTACHMENT; if (depth_attachment->GetConfig().layers > 1) { - glFramebufferTexture(GL_FRAMEBUFFER, attachment, depth_attachment->GetRawTexIdentifier(), 0); + glFramebufferTexture(GL_FRAMEBUFFER, attachment, depth_attachment->GetGLTextureId(), 0); } else { - glFramebufferTextureLayer(GL_FRAMEBUFFER, attachment, depth_attachment->GetRawTexIdentifier(), - 0, 0); + glFramebufferTextureLayer(GL_FRAMEBUFFER, attachment, depth_attachment->GetGLTextureId(), 0, + 0); } } DEBUG_ASSERT(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); - FramebufferManager::SetFramebuffer(0); - return std::make_unique(color_format, depth_format, width, height, layers, - samples, fbo); + Renderer::GetInstance()->RestoreFramebufferBinding(); + + return std::make_unique(color_attachment, depth_attachment, color_format, + depth_format, width, height, layers, samples, fbo); +} + +void OGLFramebuffer::UpdateDimensions(u32 width, u32 height) +{ + m_width = width; + m_height = height; } } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/OGLTexture.h b/Source/Core/VideoBackends/OGL/OGLTexture.h index 60c5c932b3..bd6a6918e2 100644 --- a/Source/Core/VideoBackends/OGL/OGLTexture.h +++ b/Source/Core/VideoBackends/OGL/OGLTexture.h @@ -25,16 +25,17 @@ public: const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) override; - void ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& srcrect, - const MathUtil::Rectangle& dstrect) override; void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, u32 layer, u32 level) override; void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, size_t buffer_size) override; - GLuint GetRawTexIdentifier() const; - GLuint GetFramebuffer() const; + GLuint GetGLTextureId() const { return m_texId; } + GLenum GetGLTarget() const + { + return IsMultisampled() ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; + } + GLenum GetGLFormatForImageTexture() const; private: void BlitFramebuffer(OGLTexture* srcentry, const MathUtil::Rectangle& src_rect, @@ -42,7 +43,6 @@ private: u32 dst_layer, u32 dst_level); GLuint m_texId; - GLuint m_framebuffer = 0; }; class OGLStagingTexture final : public AbstractStagingTexture @@ -79,13 +79,18 @@ private: class OGLFramebuffer final : public AbstractFramebuffer { public: - OGLFramebuffer(AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, + OGLFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples, GLuint fbo); ~OGLFramebuffer() override; + static std::unique_ptr Create(OGLTexture* color_attachment, + OGLTexture* depth_attachment); + GLuint GetFBO() const { return m_fbo; } - static std::unique_ptr Create(const OGLTexture* color_attachment, - const OGLTexture* depth_attachment); + + // Used for updating the dimensions of the system/window framebuffer. + void UpdateDimensions(u32 width, u32 height); protected: GLuint m_fbo; diff --git a/Source/Core/VideoBackends/OGL/PostProcessing.cpp b/Source/Core/VideoBackends/OGL/PostProcessing.cpp deleted file mode 100644 index 57d8f7c8f8..0000000000 --- a/Source/Core/VideoBackends/OGL/PostProcessing.cpp +++ /dev/null @@ -1,273 +0,0 @@ -// Copyright 2009 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/OGL/PostProcessing.h" - -#include "Common/CommonTypes.h" -#include "Common/Logging/Log.h" -#include "Common/StringUtil.h" - -#include "Core/Config/GraphicsSettings.h" - -#include "VideoBackends/OGL/FramebufferManager.h" -#include "VideoBackends/OGL/OGLTexture.h" -#include "VideoBackends/OGL/ProgramShaderCache.h" -#include "VideoBackends/OGL/SamplerCache.h" - -#include "VideoCommon/VideoCommon.h" -#include "VideoCommon/VideoConfig.h" - -namespace OGL -{ -static const char s_vertex_shader[] = "out vec2 uv0;\n" - "uniform vec4 src_rect;\n" - "void main(void) {\n" - " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" - " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" - " uv0 = vec2(mix(src_rect.xy, src_rect.zw, rawpos));\n" - "}\n"; - -OpenGLPostProcessing::OpenGLPostProcessing() : m_initialized(false) -{ - CreateHeader(); -} - -OpenGLPostProcessing::~OpenGLPostProcessing() -{ - m_shader.Destroy(); -} - -void OpenGLPostProcessing::BlitFromTexture(TargetRectangle src, TargetRectangle dst, - int src_texture, int src_width, int src_height, - int layer) -{ - ApplyShader(); - - glViewport(dst.left, dst.bottom, dst.GetWidth(), dst.GetHeight()); - - ProgramShaderCache::BindVertexFormat(nullptr); - - m_shader.Bind(); - - glUniform4f(m_uniform_resolution, (float)src_width, (float)src_height, 1.0f / (float)src_width, - 1.0f / (float)src_height); - glUniform4f(m_uniform_src_rect, src.left / (float)src_width, src.top / (float)src_height, - src.right / (float)src_width, src.bottom / (float)src_height); - glUniform1ui(m_uniform_time, (GLuint)m_timer.GetTimeElapsed()); - glUniform1i(m_uniform_layer, layer); - - if (m_config.IsDirty()) - { - for (auto& it : m_config.GetOptions()) - { - if (it.second.m_dirty) - { - switch (it.second.m_type) - { - case PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_BOOL: - glUniform1i(m_uniform_bindings[it.first], it.second.m_bool_value); - break; - case PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_INTEGER: - switch (it.second.m_integer_values.size()) - { - case 1: - glUniform1i(m_uniform_bindings[it.first], it.second.m_integer_values[0]); - break; - case 2: - glUniform2i(m_uniform_bindings[it.first], it.second.m_integer_values[0], - it.second.m_integer_values[1]); - break; - case 3: - glUniform3i(m_uniform_bindings[it.first], it.second.m_integer_values[0], - it.second.m_integer_values[1], it.second.m_integer_values[2]); - break; - case 4: - glUniform4i(m_uniform_bindings[it.first], it.second.m_integer_values[0], - it.second.m_integer_values[1], it.second.m_integer_values[2], - it.second.m_integer_values[3]); - break; - } - break; - case PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_FLOAT: - switch (it.second.m_float_values.size()) - { - case 1: - glUniform1f(m_uniform_bindings[it.first], it.second.m_float_values[0]); - break; - case 2: - glUniform2f(m_uniform_bindings[it.first], it.second.m_float_values[0], - it.second.m_float_values[1]); - break; - case 3: - glUniform3f(m_uniform_bindings[it.first], it.second.m_float_values[0], - it.second.m_float_values[1], it.second.m_float_values[2]); - break; - case 4: - glUniform4f(m_uniform_bindings[it.first], it.second.m_float_values[0], - it.second.m_float_values[1], it.second.m_float_values[2], - it.second.m_float_values[3]); - break; - } - break; - } - it.second.m_dirty = false; - } - } - m_config.SetDirty(false); - } - - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_2D_ARRAY, src_texture); - g_sampler_cache->BindLinearSampler(9); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); -} - -void OpenGLPostProcessing::ApplyShader() -{ - // shader didn't changed - if (m_initialized && m_config.GetShader() == g_ActiveConfig.sPostProcessingShader) - return; - - m_shader.Destroy(); - m_uniform_bindings.clear(); - - // load shader code - std::string main_code = m_config.LoadShader(); - std::string options_code = LoadShaderOptions(); - std::string code = m_glsl_header + options_code + main_code; - - // and compile it - if (!ProgramShaderCache::CompileShader(m_shader, s_vertex_shader, code)) - { - ERROR_LOG(VIDEO, "Failed to compile post-processing shader %s", m_config.GetShader().c_str()); - Config::SetCurrent(Config::GFX_ENHANCE_POST_SHADER, ""); - code = m_config.LoadShader(); - ProgramShaderCache::CompileShader(m_shader, s_vertex_shader, code); - } - - // read uniform locations - m_uniform_resolution = glGetUniformLocation(m_shader.glprogid, "resolution"); - m_uniform_time = glGetUniformLocation(m_shader.glprogid, "time"); - m_uniform_src_rect = glGetUniformLocation(m_shader.glprogid, "src_rect"); - m_uniform_layer = glGetUniformLocation(m_shader.glprogid, "layer"); - - for (const auto& it : m_config.GetOptions()) - { - std::string glsl_name = "options." + it.first; - m_uniform_bindings[it.first] = glGetUniformLocation(m_shader.glprogid, glsl_name.c_str()); - } - m_initialized = true; -} - -void OpenGLPostProcessing::CreateHeader() -{ - m_glsl_header = - // Required variables - // Shouldn't be accessed directly by the PP shader - // Texture sampler - "SAMPLER_BINDING(8) uniform sampler2D samp8;\n" - "SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n" - - // Output variable - "out float4 ocol0;\n" - // Input coordinates - "in float2 uv0;\n" - // Resolution - "uniform float4 resolution;\n" - // Time - "uniform uint time;\n" - // Layer - "uniform int layer;\n" - - // Interfacing functions - "float4 Sample()\n" - "{\n" - "\treturn texture(samp9, float3(uv0, layer));\n" - "}\n" - - "float4 SampleLocation(float2 location)\n" - "{\n" - "\treturn texture(samp9, float3(location, layer));\n" - "}\n" - - "float4 SampleLayer(int layer)\n" - "{\n" - "\treturn texture(samp9, float3(uv0, layer));\n" - "}\n" - - "#define SampleOffset(offset) textureOffset(samp9, float3(uv0, layer), offset)\n" - - "float2 GetResolution()\n" - "{\n" - "\treturn resolution.xy;\n" - "}\n" - - "float2 GetInvResolution()\n" - "{\n" - "\treturn resolution.zw;\n" - "}\n" - - "float2 GetCoordinates()\n" - "{\n" - "\treturn uv0;\n" - "}\n" - - "uint GetTime()\n" - "{\n" - "\treturn time;\n" - "}\n" - - "void SetOutput(float4 color)\n" - "{\n" - "\tocol0 = color;\n" - "}\n" - - "#define GetOption(x) (options.x)\n" - "#define OptionEnabled(x) (options.x != 0)\n"; -} - -std::string OpenGLPostProcessing::LoadShaderOptions() -{ - m_uniform_bindings.clear(); - if (m_config.GetOptions().empty()) - return ""; - - std::string glsl_options = "struct Options\n{\n"; - - for (const auto& it : m_config.GetOptions()) - { - if (it.second.m_type == - PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_BOOL) - { - glsl_options += StringFromFormat("int %s;\n", it.first.c_str()); - } - else if (it.second.m_type == - PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_INTEGER) - { - u32 count = static_cast(it.second.m_integer_values.size()); - if (count == 1) - glsl_options += StringFromFormat("int %s;\n", it.first.c_str()); - else - glsl_options += StringFromFormat("int%d %s;\n", count, it.first.c_str()); - } - else if (it.second.m_type == - PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_FLOAT) - { - u32 count = static_cast(it.second.m_float_values.size()); - if (count == 1) - glsl_options += StringFromFormat("float %s;\n", it.first.c_str()); - else - glsl_options += StringFromFormat("float%d %s;\n", count, it.first.c_str()); - } - - m_uniform_bindings[it.first] = 0; - } - - glsl_options += "};\n"; - glsl_options += "uniform Options options;\n"; - - return glsl_options; -} - -} // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/PostProcessing.h b/Source/Core/VideoBackends/OGL/PostProcessing.h deleted file mode 100644 index 1c8c8ecf52..0000000000 --- a/Source/Core/VideoBackends/OGL/PostProcessing.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "Common/GL/GLUtil.h" - -#include "VideoBackends/OGL/ProgramShaderCache.h" - -#include "VideoCommon/PostProcessing.h" -#include "VideoCommon/VideoCommon.h" - -namespace OGL -{ -class OpenGLPostProcessing : public PostProcessingShaderImplementation -{ -public: - OpenGLPostProcessing(); - ~OpenGLPostProcessing(); - - void BlitFromTexture(TargetRectangle src, TargetRectangle dst, int src_texture, int src_width, - int src_height, int layer); - void ApplyShader(); - -private: - bool m_initialized; - SHADER m_shader; - GLuint m_uniform_resolution; - GLuint m_uniform_src_rect; - GLuint m_uniform_time; - GLuint m_uniform_layer; - std::string m_glsl_header; - - std::unordered_map m_uniform_bindings; - - void CreateHeader(); - std::string LoadShaderOptions(); -}; - -} // namespace diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp index c8d54acd5a..e06491b986 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.cpp @@ -4,6 +4,7 @@ #include "VideoBackends/OGL/ProgramShaderCache.h" +#include #include #include #include @@ -27,7 +28,6 @@ #include "VideoBackends/OGL/VertexManager.h" #include "VideoCommon/AsyncShaderCompiler.h" -#include "VideoCommon/Debugger.h" #include "VideoCommon/DriverDetails.h" #include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/ImageWrite.h" @@ -54,6 +54,7 @@ static GLuint CurrentProgram = 0; ProgramShaderCache::PipelineProgramMap ProgramShaderCache::s_pipeline_programs; std::mutex ProgramShaderCache::s_pipeline_program_lock; static std::string s_glsl_header = ""; +static std::atomic s_shader_counter{0}; static thread_local bool s_is_shared_context = false; static std::string GetGLSLVersionString() @@ -109,13 +110,13 @@ void SHADER::SetProgramVariables() glUniformBlockBinding(glprogid, UBERBlock_id, 4); // Bind Texture Samplers - for (int a = 0; a < 10; ++a) + for (int a = 0; a < 8; ++a) { - std::string name = StringFromFormat(a < 8 ? "samp[%d]" : "samp%d", a); - // Still need to get sampler locations since we aren't binding them statically in the shaders - int loc = glGetUniformLocation(glprogid, name.c_str()); - if (loc != -1) + int loc = glGetUniformLocation(glprogid, StringFromFormat("samp[%d]", a).c_str()); + if (loc < 0) + loc = glGetUniformLocation(glprogid, StringFromFormat("samp%d", a).c_str()); + if (loc >= 0) glUniform1i(loc, a); } @@ -191,21 +192,22 @@ bool PipelineProgramKey::operator!=(const PipelineProgramKey& rhs) const bool PipelineProgramKey::operator==(const PipelineProgramKey& rhs) const { - return std::tie(vertex_shader, geometry_shader, pixel_shader) == - std::tie(rhs.vertex_shader, rhs.geometry_shader, rhs.pixel_shader); + return std::tie(vertex_shader_id, geometry_shader_id, pixel_shader_id) == + std::tie(rhs.vertex_shader_id, rhs.geometry_shader_id, rhs.pixel_shader_id); } bool PipelineProgramKey::operator<(const PipelineProgramKey& rhs) const { - return std::tie(vertex_shader, geometry_shader, pixel_shader) < - std::tie(rhs.vertex_shader, rhs.geometry_shader, rhs.pixel_shader); + return std::tie(vertex_shader_id, geometry_shader_id, pixel_shader_id) < + std::tie(rhs.vertex_shader_id, rhs.geometry_shader_id, rhs.pixel_shader_id); } std::size_t PipelineProgramKeyHash::operator()(const PipelineProgramKey& key) const { // We would really want std::hash_combine for this.. - std::hash hasher; - return hasher(key.vertex_shader) + hasher(key.geometry_shader) + hasher(key.pixel_shader); + std::hash hasher; + return hasher(key.vertex_shader_id) + hasher(key.geometry_shader_id) + + hasher(key.pixel_shader_id); } StreamBuffer* ProgramShaderCache::GetUniformBuffer() @@ -218,13 +220,6 @@ u32 ProgramShaderCache::GetUniformBufferAlignment() return s_ubo_align; } -void ProgramShaderCache::InvalidateConstants() -{ - VertexShaderManager::dirty = true; - GeometryShaderManager::dirty = true; - PixelShaderManager::dirty = true; -} - void ProgramShaderCache::UploadConstants() { if (PixelShaderManager::dirty || VertexShaderManager::dirty || GeometryShaderManager::dirty) @@ -574,7 +569,9 @@ const PipelineProgram* ProgramShaderCache::GetPipelineProgram(const GLVertexForm const OGLShader* geometry_shader, const OGLShader* pixel_shader) { - PipelineProgramKey key = {vertex_shader, geometry_shader, pixel_shader}; + PipelineProgramKey key = {vertex_shader ? vertex_shader->GetID() : 0, + geometry_shader ? geometry_shader->GetID() : 0, + pixel_shader ? pixel_shader->GetID() : 0}; { std::lock_guard guard(s_pipeline_program_lock); auto iter = s_pipeline_programs.find(key); @@ -750,6 +747,7 @@ void ProgramShaderCache::CreateHeader() "%s\n" // Silly differences + "#define API_OPENGL 1\n" "#define float2 vec2\n" "#define float3 vec3\n" "#define float4 vec4\n" @@ -759,8 +757,6 @@ void ProgramShaderCache::CreateHeader() "#define int2 ivec2\n" "#define int3 ivec3\n" "#define int4 ivec4\n" - - // hlsl to glsl function translation "#define frac fract\n" "#define lerp mix\n" @@ -782,12 +778,17 @@ void ProgramShaderCache::CreateHeader() "#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y)\n" "#define UBO_BINDING(packing, x) layout(packing, binding = x)\n" "#define SAMPLER_BINDING(x) layout(binding = x)\n" - "#define SSBO_BINDING(x) layout(binding = x)\n" : + "#define TEXEL_BUFFER_BINDING(x) layout(binding = x)\n" + "#define SSBO_BINDING(x) layout(binding = x)\n" + "#define IMAGE_BINDING(format, x) layout(format, binding = x)\n" : "#define ATTRIBUTE_LOCATION(x)\n" "#define FRAGMENT_OUTPUT_LOCATION(x)\n" "#define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y)\n" "#define UBO_BINDING(packing, x) layout(packing)\n" - "#define SAMPLER_BINDING(x)\n", + "#define SAMPLER_BINDING(x)\n" + "#define TEXEL_BUFFER_BINDING(x)\n" + "#define SSBO_BINDING(x)\n" + "#define IMAGE_BINDING(format, x) layout(format)\n", // Input/output blocks are matched by name during program linking "#define VARYING_LOCATION(x)\n", !is_glsles && g_ActiveConfig.backend_info.bSupportsFragmentStoresAndAtomics ? @@ -823,6 +824,11 @@ void ProgramShaderCache::CreateHeader() v >= GlslEs310 ? "precision highp image2DArray;" : ""); } +u64 ProgramShaderCache::GenerateShaderID() +{ + return s_shader_counter++; +} + bool SharedContextAsyncShaderCompiler::WorkerThreadInitMainThread(void** param) { std::unique_ptr context = diff --git a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h index b94e733167..7fc267dc5d 100644 --- a/Source/Core/VideoBackends/OGL/ProgramShaderCache.h +++ b/Source/Core/VideoBackends/OGL/ProgramShaderCache.h @@ -44,9 +44,9 @@ struct SHADER struct PipelineProgramKey { - const OGLShader* vertex_shader; - const OGLShader* geometry_shader; - const OGLShader* pixel_shader; + u64 vertex_shader_id; + u64 geometry_shader_id; + u64 pixel_shader_id; bool operator==(const PipelineProgramKey& rhs) const; bool operator!=(const PipelineProgramKey& rhs) const; @@ -82,7 +82,6 @@ public: const std::string& gcode); static StreamBuffer* GetUniformBuffer(); static u32 GetUniformBufferAlignment(); - static void InvalidateConstants(); static void UploadConstants(); static void UploadConstants(const void* data, u32 data_size); @@ -90,6 +89,14 @@ public: static void Shutdown(); static void CreateHeader(); + // This counter increments with each shader object allocated, in order to give it a unique ID. + // Since the shaders can be destroyed after a pipeline is created, we can't use the shader pointer + // as a key for GL programs. For the same reason, we can't use the GL objects either. This ID is + // guaranteed to be unique for the emulation session, even if the memory allocator or GL driver + // re-uses pointers, therefore we won't have any collisions where the shaders attached to a + // pipeline do not match the pipeline configuration. + static u64 GenerateShaderID(); + static const PipelineProgram* GetPipelineProgram(const GLVertexFormat* vertex_format, const OGLShader* vertex_shader, const OGLShader* geometry_shader, diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index b72941583e..baeb9f877c 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -27,22 +27,21 @@ #include "Core/Core.h" #include "VideoBackends/OGL/BoundingBox.h" -#include "VideoBackends/OGL/FramebufferManager.h" #include "VideoBackends/OGL/OGLPipeline.h" #include "VideoBackends/OGL/OGLShader.h" #include "VideoBackends/OGL/OGLTexture.h" -#include "VideoBackends/OGL/PostProcessing.h" #include "VideoBackends/OGL/ProgramShaderCache.h" #include "VideoBackends/OGL/SamplerCache.h" #include "VideoBackends/OGL/StreamBuffer.h" -#include "VideoBackends/OGL/TextureCache.h" #include "VideoBackends/OGL/VertexManager.h" #include "VideoCommon/BPFunctions.h" #include "VideoCommon/DriverDetails.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/OnScreenDisplay.h" #include "VideoCommon/PixelEngine.h" +#include "VideoCommon/PostProcessing.h" #include "VideoCommon/RenderState.h" #include "VideoCommon/ShaderGenCommon.h" #include "VideoCommon/VertexShaderManager.h" @@ -54,22 +53,6 @@ namespace OGL { VideoConfig g_ogl_config; -// Declarations and definitions -// ---------------------------- - -// 1 for no MSAA. Use s_MSAASamples > 1 to check for MSAA. -static int s_MSAASamples = 1; - -// EFB cache related -static const u32 EFB_CACHE_RECT_SIZE = 64; // Cache 64x64 blocks. -static const u32 EFB_CACHE_WIDTH = - (EFB_WIDTH + EFB_CACHE_RECT_SIZE - 1) / EFB_CACHE_RECT_SIZE; // round up -static const u32 EFB_CACHE_HEIGHT = (EFB_HEIGHT + EFB_CACHE_RECT_SIZE - 1) / EFB_CACHE_RECT_SIZE; -static bool s_efbCacheValid[2][EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT]; -static bool s_efbCacheIsCleared = false; -static std::vector - s_efbCache[2][EFB_CACHE_WIDTH * EFB_CACHE_HEIGHT]; // 2 for PeekZ and PeekColor - static void APIENTRY ErrorCallback(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const char* message, const void* userParam) { @@ -356,6 +339,16 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ m_current_depth_state(RenderState::GetInvalidDepthState()), m_current_blend_state(RenderState::GetInvalidBlendingState()) { + // Create the window framebuffer. + if (!m_main_gl_context->IsHeadless()) + { + m_system_framebuffer = std::make_unique( + nullptr, nullptr, AbstractTextureFormat::RGBA8, AbstractTextureFormat::Undefined, + std::max(m_main_gl_context->GetBackBufferWidth(), 1u), + std::max(m_main_gl_context->GetBackBufferHeight(), 1u), 1, 1, 0); + m_current_framebuffer = m_system_framebuffer.get(); + } + bool bSuccess = true; g_ogl_config.gl_vendor = (const char*)glGetString(GL_VENDOR); @@ -437,9 +430,9 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ g_Config.backend_info.bSupportsPrimitiveRestart = !DriverDetails::HasBug(DriverDetails::BUG_PRIMITIVE_RESTART) && ((GLExtensions::Version() >= 310) || GLExtensions::Supports("GL_NV_primitive_restart")); - g_Config.backend_info.bSupportsBBox = true; g_Config.backend_info.bSupportsFragmentStoresAndAtomics = GLExtensions::Supports("GL_ARB_shader_storage_buffer_object"); + g_Config.backend_info.bSupportsBBox = g_Config.backend_info.bSupportsFragmentStoresAndAtomics; g_Config.backend_info.bSupportsGSInstancing = GLExtensions::Supports("GL_ARB_gpu_shader5"); g_Config.backend_info.bSupportsSSAA = GLExtensions::Supports("GL_ARB_gpu_shader5") && GLExtensions::Supports("GL_ARB_sample_shading"); @@ -692,9 +685,13 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ glDebugMessageCallbackARB(ErrorCallback, nullptr); } if (LogManager::GetInstance()->IsEnabled(LogTypes::HOST_GPU, LogTypes::LERROR)) + { glEnable(GL_DEBUG_OUTPUT); + } else + { glDisable(GL_DEBUG_OUTPUT); + } } int samples; @@ -753,23 +750,9 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ if (!DriverDetails::HasBug(DriverDetails::BUG_BROKEN_VSYNC)) m_main_gl_context->SwapInterval(g_ActiveConfig.bVSyncActive); - // Because of the fixed framebuffer size we need to disable the resolution - // options while running - - // The stencil is used for bounding box emulation when SSBOs are not available - glDisable(GL_STENCIL_TEST); - glStencilFunc(GL_ALWAYS, 1, 0xFF); - glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); - - // Reset The Current Viewport - glViewport(0, 0, GetTargetWidth(), GetTargetHeight()); if (g_ActiveConfig.backend_info.bSupportsClipControl) glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); - glClearColor(0.0f, 0.0f, 0.0f, 1.0f); - glClearDepthf(1.0f); - glEnable(GL_DEPTH_TEST); - glDepthFunc(GL_LEQUAL); if (g_ActiveConfig.backend_info.bSupportsDepthClamp) { glEnable(GL_CLIP_DISTANCE0); @@ -779,18 +762,14 @@ Renderer::Renderer(std::unique_ptr main_gl_context, float backbuffer_ glPixelStorei(GL_UNPACK_ALIGNMENT, 4); // 4-byte pixel alignment - glEnable(GL_SCISSOR_TEST); - glScissor(0, 0, GetTargetWidth(), GetTargetHeight()); - glBlendFunc(GL_ONE, GL_ONE); - glBlendColor(0, 0, 0, 0.5f); - glClearDepthf(1.0f); + glGenFramebuffers(1, &m_shared_read_framebuffer); + glGenFramebuffers(1, &m_shared_draw_framebuffer); if (g_ActiveConfig.backend_info.bSupportsPrimitiveRestart) GLUtil::EnablePrimitiveRestart(m_main_gl_context.get()); IndexGenerator::Init(); UpdateActiveConfig(); - ClearEFBCache(); } Renderer::~Renderer() = default; @@ -805,24 +784,15 @@ bool Renderer::Initialize() if (!::Renderer::Initialize()) return false; - // Initialize the FramebufferManager - g_framebuffer_manager = std::make_unique( - m_target_width, m_target_height, s_MSAASamples, BoundingBox::NeedsStencilBuffer()); - m_current_framebuffer_width = m_target_width; - m_current_framebuffer_height = m_target_height; - - m_post_processor = std::make_unique(); return true; } void Renderer::Shutdown() { ::Renderer::Shutdown(); - g_framebuffer_manager.reset(); - UpdateActiveConfig(); - - m_post_processor.reset(); + glDeleteFramebuffers(1, &m_shared_draw_framebuffer); + glDeleteFramebuffers(1, &m_shared_read_framebuffer); } std::unique_ptr Renderer::CreateTexture(const TextureConfig& config) @@ -836,12 +806,11 @@ std::unique_ptr Renderer::CreateStagingTexture(StagingTe return OGLStagingTexture::Create(type, config); } -std::unique_ptr -Renderer::CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) +std::unique_ptr Renderer::CreateFramebuffer(AbstractTexture* color_attachment, + AbstractTexture* depth_attachment) { - return OGLFramebuffer::Create(static_cast(color_attachment), - static_cast(depth_attachment)); + return OGLFramebuffer::Create(static_cast(color_attachment), + static_cast(depth_attachment)); } std::unique_ptr Renderer::CreateShaderFromSource(ShaderStage stage, @@ -861,231 +830,9 @@ std::unique_ptr Renderer::CreatePipeline(const AbstractPipelin return OGLPipeline::Create(config); } -TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc) -{ - TargetRectangle result; - result.left = EFBToScaledX(rc.left); - result.top = EFBToScaledY(EFB_HEIGHT - rc.top); - result.right = EFBToScaledX(rc.right); - result.bottom = EFBToScaledY(EFB_HEIGHT - rc.bottom); - return result; -} - void Renderer::SetScissorRect(const MathUtil::Rectangle& rc) { - glScissor(rc.left, rc.bottom, rc.GetWidth(), rc.GetHeight()); -} - -void ClearEFBCache() -{ - if (!s_efbCacheIsCleared) - { - s_efbCacheIsCleared = true; - memset(s_efbCacheValid, 0, sizeof(s_efbCacheValid)); - } -} - -void Renderer::UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc, - const TargetRectangle& targetPixelRc, const void* data) -{ - const u32 cacheType = (type == EFBAccessType::PeekZ ? 0 : 1); - - if (s_efbCache[cacheType][cacheRectIdx].empty()) - s_efbCache[cacheType][cacheRectIdx].resize(EFB_CACHE_RECT_SIZE * EFB_CACHE_RECT_SIZE); - - u32 targetPixelRcWidth = targetPixelRc.right - targetPixelRc.left; - u32 efbPixelRcHeight = efbPixelRc.bottom - efbPixelRc.top; - u32 efbPixelRcWidth = efbPixelRc.right - efbPixelRc.left; - - for (u32 yCache = 0; yCache < efbPixelRcHeight; ++yCache) - { - u32 yEFB = efbPixelRc.top + yCache; - u32 yPixel = (EFBToScaledY(EFB_HEIGHT - yEFB) + EFBToScaledY(EFB_HEIGHT - yEFB - 1)) / 2; - u32 yData = yPixel - targetPixelRc.bottom; - - for (u32 xCache = 0; xCache < efbPixelRcWidth; ++xCache) - { - u32 xEFB = efbPixelRc.left + xCache; - u32 xPixel = (EFBToScaledX(xEFB) + EFBToScaledX(xEFB + 1)) / 2; - u32 xData = xPixel - targetPixelRc.left; - u32 value; - if (type == EFBAccessType::PeekZ) - { - float* ptr = (float*)data; - value = MathUtil::Clamp((u32)(ptr[yData * targetPixelRcWidth + xData] * 16777216.0f), - 0, 0xFFFFFF); - } - else - { - u32* ptr = (u32*)data; - value = ptr[yData * targetPixelRcWidth + xData]; - } - s_efbCache[cacheType][cacheRectIdx][yCache * EFB_CACHE_RECT_SIZE + xCache] = value; - } - } - - s_efbCacheValid[cacheType][cacheRectIdx] = true; - s_efbCacheIsCleared = false; -} - -// This function allows the CPU to directly access the EFB. -// There are EFB peeks (which will read the color or depth of a pixel) -// and EFB pokes (which will change the color or depth of a pixel). -// -// The behavior of EFB peeks can only be modified by: -// - GX_PokeAlphaRead -// The behavior of EFB pokes can be modified by: -// - GX_PokeAlphaMode (TODO) -// - GX_PokeAlphaUpdate (TODO) -// - GX_PokeBlendMode (TODO) -// - GX_PokeColorUpdate (TODO) -// - GX_PokeDither (TODO) -// - GX_PokeDstAlpha (TODO) -// - GX_PokeZMode (TODO) -u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) -{ - u32 cacheRectIdx = (y / EFB_CACHE_RECT_SIZE) * EFB_CACHE_WIDTH + (x / EFB_CACHE_RECT_SIZE); - - EFBRectangle efbPixelRc; - - if (type == EFBAccessType::PeekColor || type == EFBAccessType::PeekZ) - { - // Get the rectangular target region containing the EFB pixel - efbPixelRc.left = (x / EFB_CACHE_RECT_SIZE) * EFB_CACHE_RECT_SIZE; - efbPixelRc.top = (y / EFB_CACHE_RECT_SIZE) * EFB_CACHE_RECT_SIZE; - efbPixelRc.right = std::min(efbPixelRc.left + EFB_CACHE_RECT_SIZE, (u32)EFB_WIDTH); - efbPixelRc.bottom = std::min(efbPixelRc.top + EFB_CACHE_RECT_SIZE, (u32)EFB_HEIGHT); - } - else - { - efbPixelRc.left = x; - efbPixelRc.top = y; - efbPixelRc.right = x + 1; - efbPixelRc.bottom = y + 1; - } - - TargetRectangle targetPixelRc = ConvertEFBRectangle(efbPixelRc); - u32 targetPixelRcWidth = targetPixelRc.right - targetPixelRc.left; - u32 targetPixelRcHeight = targetPixelRc.top - targetPixelRc.bottom; - - // TODO (FIX) : currently, AA path is broken/offset and doesn't return the correct pixel - switch (type) - { - case EFBAccessType::PeekZ: - { - if (!s_efbCacheValid[0][cacheRectIdx]) - { - if (s_MSAASamples > 1) - { - ResetAPIState(); - - // Resolve our rectangle. - FramebufferManager::GetEFBDepthTexture(efbPixelRc); - glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferManager::GetResolvedFramebuffer()); - } - - std::unique_ptr depthMap(new float[targetPixelRcWidth * targetPixelRcHeight]); - - glReadPixels(targetPixelRc.left, targetPixelRc.bottom, targetPixelRcWidth, - targetPixelRcHeight, GL_DEPTH_COMPONENT, GL_FLOAT, depthMap.get()); - - UpdateEFBCache(type, cacheRectIdx, efbPixelRc, targetPixelRc, depthMap.get()); - - if (s_MSAASamples > 1) - RestoreAPIState(); - } - - u32 xRect = x % EFB_CACHE_RECT_SIZE; - u32 yRect = y % EFB_CACHE_RECT_SIZE; - u32 z = s_efbCache[0][cacheRectIdx][yRect * EFB_CACHE_RECT_SIZE + xRect]; - - // if Z is in 16 bit format you must return a 16 bit integer - if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) - z = z >> 8; - - return z; - } - - case EFBAccessType::PeekColor: // GXPeekARGB - { - // Although it may sound strange, this really is A8R8G8B8 and not RGBA or 24-bit... - - // Tested in Killer 7, the first 8bits represent the alpha value which is used to - // determine if we're aiming at an enemy (0x80 / 0x88) or not (0x70) - // Wind Waker is also using it for the pictograph to determine the color of each pixel - if (!s_efbCacheValid[1][cacheRectIdx]) - { - if (s_MSAASamples > 1) - { - ResetAPIState(); - - // Resolve our rectangle. - FramebufferManager::GetEFBColorTexture(efbPixelRc); - glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferManager::GetResolvedFramebuffer()); - } - - std::unique_ptr colorMap(new u32[targetPixelRcWidth * targetPixelRcHeight]); - - if (IsGLES()) - // XXX: Swap colours - glReadPixels(targetPixelRc.left, targetPixelRc.bottom, targetPixelRcWidth, - targetPixelRcHeight, GL_RGBA, GL_UNSIGNED_BYTE, colorMap.get()); - else - glReadPixels(targetPixelRc.left, targetPixelRc.bottom, targetPixelRcWidth, - targetPixelRcHeight, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, colorMap.get()); - - UpdateEFBCache(type, cacheRectIdx, efbPixelRc, targetPixelRc, colorMap.get()); - - if (s_MSAASamples > 1) - RestoreAPIState(); - } - - u32 xRect = x % EFB_CACHE_RECT_SIZE; - u32 yRect = y % EFB_CACHE_RECT_SIZE; - u32 color = s_efbCache[1][cacheRectIdx][yRect * EFB_CACHE_RECT_SIZE + xRect]; - - // check what to do with the alpha channel (GX_PokeAlphaRead) - PixelEngine::UPEAlphaReadReg alpha_read_mode = PixelEngine::GetAlphaReadMode(); - - if (bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24) - { - color = RGBA8ToRGBA6ToRGBA8(color); - } - else if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) - { - color = RGBA8ToRGB565ToRGBA8(color); - } - if (bpmem.zcontrol.pixel_format != PEControl::RGBA6_Z24) - { - color |= 0xFF000000; - } - if (alpha_read_mode.ReadMode == 2) - { - // GX_READ_NONE - return color; - } - else if (alpha_read_mode.ReadMode == 1) - { - // GX_READ_FF - return (color | 0xFF000000); - } - else /*if(alpha_read_mode.ReadMode == 0)*/ - { - // GX_READ_00 - return (color & 0x00FFFFFF); - } - } - - default: - break; - } - - return 0; -} - -void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) -{ - FramebufferManager::PokeEFB(type, points, num_points); + glScissor(rc.left, rc.top, rc.GetWidth(), rc.GetHeight()); } u16 Renderer::BBoxRead(int index) @@ -1138,9 +885,6 @@ void Renderer::BBoxWrite(int index, u16 _value) void Renderer::SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) { - // The x/y parameters here assume a upper-left origin. glViewport takes an offset from the - // lower-left of the framebuffer, so we must set y to the distance from the lower-left. - y = static_cast(m_current_framebuffer_height) - y - height; if (g_ogl_config.bSupportViewportFloat) { glViewportIndexedf(0, x, y, width, height); @@ -1156,7 +900,7 @@ void Renderer::SetViewport(float x, float y, float width, float height, float ne void Renderer::Draw(u32 base_vertex, u32 num_vertices) { - glDrawArrays(static_cast(m_graphics_pipeline)->GetGLPrimitive(), base_vertex, + glDrawArrays(static_cast(m_current_pipeline)->GetGLPrimitive(), base_vertex, num_vertices); } @@ -1164,135 +908,112 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) { if (g_ogl_config.bSupportsGLBaseVertex) { - glDrawElementsBaseVertex(static_cast(m_graphics_pipeline)->GetGLPrimitive(), + glDrawElementsBaseVertex(static_cast(m_current_pipeline)->GetGLPrimitive(), num_indices, GL_UNSIGNED_SHORT, static_cast(nullptr) + base_index, base_vertex); } else { - glDrawElements(static_cast(m_graphics_pipeline)->GetGLPrimitive(), + glDrawElements(static_cast(m_current_pipeline)->GetGLPrimitive(), num_indices, GL_UNSIGNED_SHORT, static_cast(nullptr) + base_index); } } +void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, + u32 groups_z) +{ + glUseProgram(static_cast(shader)->GetGLComputeProgramID()); + glDispatchCompute(groups_x, groups_y, groups_z); + + // We messed up the program binding, so restore it. + ProgramShaderCache::InvalidateLastProgram(); + if (m_current_pipeline) + static_cast(m_current_pipeline)->GetProgram()->shader.Bind(); + + // Barrier to texture can be used for reads. + if (m_bound_image_texture) + glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT); +} + void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) { - ResetAPIState(); + g_framebuffer_manager->FlushEFBPokes(); + g_framebuffer_manager->InvalidatePeekCache(); - // color - GLboolean const color_mask = colorEnable ? GL_TRUE : GL_FALSE, - alpha_mask = alphaEnable ? GL_TRUE : GL_FALSE; - glColorMask(color_mask, color_mask, color_mask, alpha_mask); - - glClearColor(float((color >> 16) & 0xFF) / 255.0f, float((color >> 8) & 0xFF) / 255.0f, - float((color >> 0) & 0xFF) / 255.0f, float((color >> 24) & 0xFF) / 255.0f); - - // depth - glDepthMask(zEnable ? GL_TRUE : GL_FALSE); - - glClearDepthf(float(z & 0xFFFFFF) / 16777216.0f); + u32 clear_mask = 0; + if (colorEnable || alphaEnable) + { + glColorMask(colorEnable, colorEnable, colorEnable, alphaEnable); + glClearColor(float((color >> 16) & 0xFF) / 255.0f, float((color >> 8) & 0xFF) / 255.0f, + float((color >> 0) & 0xFF) / 255.0f, float((color >> 24) & 0xFF) / 255.0f); + clear_mask = GL_COLOR_BUFFER_BIT; + } + if (zEnable) + { + glDepthMask(zEnable ? GL_TRUE : GL_FALSE); + glClearDepthf(float(z & 0xFFFFFF) / 16777216.0f); + clear_mask |= GL_DEPTH_BUFFER_BIT; + } // Update rect for clearing the picture - glEnable(GL_SCISSOR_TEST); - - TargetRectangle const targetRc = ConvertEFBRectangle(rc); - glScissor(targetRc.left, targetRc.bottom, targetRc.GetWidth(), targetRc.GetHeight()); - // glColorMask/glDepthMask/glScissor affect glClear (glViewport does not) - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + const auto converted_target_rc = + ConvertFramebufferRectangle(ConvertEFBRectangle(rc), m_current_framebuffer); + SetScissorRect(converted_target_rc); - RestoreAPIState(); + glClear(clear_mask); - ClearEFBCache(); + // Restore color/depth mask. + if (colorEnable || alphaEnable) + { + glColorMask(m_current_blend_state.colorupdate, m_current_blend_state.colorupdate, + m_current_blend_state.colorupdate, m_current_blend_state.alphaupdate); + } + if (zEnable) + glDepthMask(m_current_depth_state.updateenable); + + // Scissor rect must be restored. + BPFunctions::SetScissor(); } void Renderer::RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) { - TargetRectangle source_rc = rc; - source_rc.top = rc.GetHeight(); - source_rc.bottom = 0; + // Quad-buffered stereo is annoying on GL. + if (g_ActiveConfig.stereo_mode != StereoMode::QuadBuffer) + return ::Renderer::RenderXFBToScreen(texture, rc); - // Check if we need to render to a new surface. - TargetRectangle flipped_trc = GetTargetRectangle(); - std::swap(flipped_trc.top, flipped_trc.bottom); + const auto target_rc = GetTargetRectangle(); - // Copy the framebuffer to screen. - OpenGLPostProcessing* post_processor = static_cast(m_post_processor.get()); - if (g_ActiveConfig.stereo_mode == StereoMode::SBS || - g_ActiveConfig.stereo_mode == StereoMode::TAB) - { - TargetRectangle left_rc, right_rc; + glDrawBuffer(GL_BACK_LEFT); + m_post_processor->BlitFromTexture(target_rc, rc, texture, 0); - // Top-and-Bottom mode needs to compensate for inverted vertical screen coordinates. - if (g_ActiveConfig.stereo_mode == StereoMode::TAB) - std::tie(right_rc, left_rc) = ConvertStereoRectangle(flipped_trc); - else - std::tie(left_rc, right_rc) = ConvertStereoRectangle(flipped_trc); + glDrawBuffer(GL_BACK_RIGHT); + m_post_processor->BlitFromTexture(target_rc, rc, texture, 1); - post_processor->BlitFromTexture(source_rc, left_rc, - static_cast(texture)->GetRawTexIdentifier(), - texture->GetWidth(), texture->GetHeight(), 0); - post_processor->BlitFromTexture(source_rc, right_rc, - static_cast(texture)->GetRawTexIdentifier(), - texture->GetWidth(), texture->GetHeight(), 1); - } - else if (g_ActiveConfig.stereo_mode == StereoMode::QuadBuffer) - { - glDrawBuffer(GL_BACK_LEFT); - post_processor->BlitFromTexture(source_rc, flipped_trc, - static_cast(texture)->GetRawTexIdentifier(), - texture->GetWidth(), texture->GetHeight(), 0); - - glDrawBuffer(GL_BACK_RIGHT); - post_processor->BlitFromTexture(source_rc, flipped_trc, - static_cast(texture)->GetRawTexIdentifier(), - texture->GetWidth(), texture->GetHeight(), 1); - - glDrawBuffer(GL_BACK); - } - else - { - post_processor->BlitFromTexture(source_rc, flipped_trc, - static_cast(texture)->GetRawTexIdentifier(), - texture->GetWidth(), texture->GetHeight(), 0); - } + glDrawBuffer(GL_BACK); } -void Renderer::ReinterpretPixelData(unsigned int convtype) +void Renderer::SetFramebuffer(AbstractFramebuffer* framebuffer) { - if (convtype == 0 || convtype == 2) - { - FramebufferManager::ReinterpretPixelData(convtype); - } - else - { - ERROR_LOG(VIDEO, "Trying to reinterpret pixel data with unsupported conversion type %d", - convtype); - } -} + if (m_current_framebuffer == framebuffer) + return; -void Renderer::SetFramebuffer(const AbstractFramebuffer* framebuffer) -{ - glBindFramebuffer(GL_FRAMEBUFFER, static_cast(framebuffer)->GetFBO()); + glBindFramebuffer(GL_FRAMEBUFFER, static_cast(framebuffer)->GetFBO()); m_current_framebuffer = framebuffer; - m_current_framebuffer_width = framebuffer->GetWidth(); - m_current_framebuffer_height = framebuffer->GetHeight(); } -void Renderer::SetAndDiscardFramebuffer(const AbstractFramebuffer* framebuffer) +void Renderer::SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) { // EXT_discard_framebuffer could be used here to save bandwidth on tilers. SetFramebuffer(framebuffer); } -void Renderer::SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, +void Renderer::SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value, float depth_value) { SetFramebuffer(framebuffer); - // NOTE: This disturbs the current scissor/mask setting. - // This won't be an issue when we implement proper state tracking. glDisable(GL_SCISSOR_TEST); GLbitfield clear_mask = 0; if (framebuffer->HasColorBuffer()) @@ -1304,15 +1025,162 @@ void Renderer::SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, if (framebuffer->HasDepthBuffer()) { glDepthMask(GL_TRUE); - glClearDepth(depth_value); + glClearDepthf(depth_value); clear_mask |= GL_DEPTH_BUFFER_BIT; } glClear(clear_mask); + glEnable(GL_SCISSOR_TEST); + + // Restore color/depth mask. + if (framebuffer->HasColorBuffer()) + { + glColorMask(m_current_blend_state.colorupdate, m_current_blend_state.colorupdate, + m_current_blend_state.colorupdate, m_current_blend_state.alphaupdate); + } + if (framebuffer->HasDepthBuffer()) + glDepthMask(m_current_depth_state.updateenable); } -void Renderer::ApplyBlendingState(const BlendingState state, bool force) +void Renderer::BindBackbuffer(const ClearColor& clear_color) { - if (!force && m_current_blend_state == state) + CheckForSurfaceChange(); + CheckForSurfaceResize(); + SetAndClearFramebuffer(m_system_framebuffer.get(), clear_color); +} + +void Renderer::PresentBackbuffer() +{ + if (g_ogl_config.bSupportsDebug) + { + if (LogManager::GetInstance()->IsEnabled(LogTypes::HOST_GPU, LogTypes::LERROR)) + glEnable(GL_DEBUG_OUTPUT); + else + glDisable(GL_DEBUG_OUTPUT); + } + + // Swap the back and front buffers, presenting the image. + m_main_gl_context->Swap(); +} + +void Renderer::OnConfigChanged(u32 bits) +{ + if (bits & CONFIG_CHANGE_BIT_VSYNC && !DriverDetails::HasBug(DriverDetails::BUG_BROKEN_VSYNC)) + m_main_gl_context->SwapInterval(g_ActiveConfig.bVSyncActive); + + if (bits & CONFIG_CHANGE_BIT_ANISOTROPY) + g_sampler_cache->Clear(); +} + +void Renderer::Flush() +{ + // ensure all commands are sent to the GPU. + // Otherwise the driver could batch several frames together. + glFlush(); +} + +void Renderer::WaitForGPUIdle() +{ + glFinish(); +} + +void Renderer::CheckForSurfaceChange() +{ + if (!m_surface_changed.TestAndClear()) + return; + + m_main_gl_context->UpdateSurface(m_new_surface_handle); + m_new_surface_handle = nullptr; + + // With a surface change, the window likely has new dimensions. + m_backbuffer_width = m_main_gl_context->GetBackBufferWidth(); + m_backbuffer_height = m_main_gl_context->GetBackBufferHeight(); + m_system_framebuffer->UpdateDimensions(m_backbuffer_width, m_backbuffer_height); +} + +void Renderer::CheckForSurfaceResize() +{ + if (!m_surface_resized.TestAndClear()) + return; + + m_main_gl_context->Update(); + m_backbuffer_width = m_main_gl_context->GetBackBufferWidth(); + m_backbuffer_height = m_main_gl_context->GetBackBufferHeight(); + m_system_framebuffer->UpdateDimensions(m_backbuffer_width, m_backbuffer_height); +} + +void Renderer::BeginUtilityDrawing() +{ + ::Renderer::BeginUtilityDrawing(); + + glEnable(GL_PROGRAM_POINT_SIZE); + if (g_ActiveConfig.backend_info.bSupportsDepthClamp) + { + glDisable(GL_CLIP_DISTANCE0); + glDisable(GL_CLIP_DISTANCE1); + } +} + +void Renderer::EndUtilityDrawing() +{ + ::Renderer::EndUtilityDrawing(); + + glDisable(GL_PROGRAM_POINT_SIZE); + if (g_ActiveConfig.backend_info.bSupportsDepthClamp) + { + glEnable(GL_CLIP_DISTANCE0); + glEnable(GL_CLIP_DISTANCE1); + } +} + +void Renderer::ApplyRasterizationState(const RasterizationState state) +{ + if (m_current_rasterization_state == state) + return; + + // none, ccw, cw, ccw + if (state.cullmode != GenMode::CULL_NONE) + { + // TODO: GX_CULL_ALL not supported, yet! + glEnable(GL_CULL_FACE); + glFrontFace(state.cullmode == GenMode::CULL_FRONT ? GL_CCW : GL_CW); + } + else + { + glDisable(GL_CULL_FACE); + } + + m_current_rasterization_state = state; +} + +void Renderer::ApplyDepthState(const DepthState state) +{ + if (m_current_depth_state == state) + return; + + const GLenum glCmpFuncs[8] = {GL_NEVER, GL_LESS, GL_EQUAL, GL_LEQUAL, + GL_GREATER, GL_NOTEQUAL, GL_GEQUAL, GL_ALWAYS}; + + if (state.testenable) + { + glEnable(GL_DEPTH_TEST); + glDepthMask(state.updateenable ? GL_TRUE : GL_FALSE); + glDepthFunc(glCmpFuncs[state.func]); + } + else + { + // if the test is disabled write is disabled too + // TODO: When PE performance metrics are being emulated via occlusion queries, we should + // (probably?) enable depth test with depth function ALWAYS here + glDisable(GL_DEPTH_TEST); + glDepthMask(GL_FALSE); + } + + m_current_depth_state = state; +} + +void Renderer::ApplyBlendingState(const BlendingState state) +{ + if (m_current_blend_state == state) return; bool useDualSource = @@ -1348,13 +1216,9 @@ void Renderer::ApplyBlendingState(const BlendingState state, bool force) GL_ONE_MINUS_DST_ALPHA}; if (state.blendenable) - { glEnable(GL_BLEND); - } else - { glDisable(GL_BLEND); - } // Always call glBlendEquationSeparate and glBlendFuncSeparate, even when // GL_BLEND is disabled, as a workaround for some bugs (possibly graphics @@ -1372,220 +1236,58 @@ void Renderer::ApplyBlendingState(const BlendingState state, bool force) GL_XOR, GL_OR, GL_NOR, GL_EQUIV, GL_INVERT, GL_OR_REVERSE, GL_COPY_INVERTED, GL_OR_INVERTED, GL_NAND, GL_SET}; - if (IsGLES()) + // Logic ops aren't available in GLES3 + if (!IsGLES()) { - // Logic ops aren't available in GLES3 - } - else if (state.logicopenable) - { - glEnable(GL_COLOR_LOGIC_OP); - glLogicOp(logic_op_codes[state.logicmode]); - } - else - { - glDisable(GL_COLOR_LOGIC_OP); + if (state.logicopenable) + { + glEnable(GL_COLOR_LOGIC_OP); + glLogicOp(logic_op_codes[state.logicmode]); + } + else + { + glDisable(GL_COLOR_LOGIC_OP); + } } glColorMask(state.colorupdate, state.colorupdate, state.colorupdate, state.alphaupdate); m_current_blend_state = state; } -void Renderer::BindBackbuffer(const ClearColor& clear_color) -{ - CheckForSurfaceChange(); - CheckForSurfaceResize(); - - glBindFramebuffer(GL_FRAMEBUFFER, 0); - glClearColor(0, 0, 0, 0); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - m_current_framebuffer = nullptr; - m_current_framebuffer_width = m_backbuffer_width; - m_current_framebuffer_height = m_backbuffer_height; -} - -void Renderer::PresentBackbuffer() -{ - if (g_ogl_config.bSupportsDebug) - { - if (LogManager::GetInstance()->IsEnabled(LogTypes::HOST_GPU, LogTypes::LERROR)) - glEnable(GL_DEBUG_OUTPUT); - else - glDisable(GL_DEBUG_OUTPUT); - } - - // Swap the back and front buffers, presenting the image. - m_main_gl_context->Swap(); -} - -void Renderer::OnConfigChanged(u32 bits) -{ - if (bits & (CONFIG_CHANGE_BIT_TARGET_SIZE | CONFIG_CHANGE_BIT_MULTISAMPLES | - CONFIG_CHANGE_BIT_STEREO_MODE | CONFIG_CHANGE_BIT_BBOX)) - { - s_MSAASamples = g_ActiveConfig.iMultisamples; - if (s_MSAASamples > 1 && s_MSAASamples > g_ogl_config.max_samples) - { - s_MSAASamples = g_ogl_config.max_samples; - OSD::AddMessage( - StringFromFormat("%d Anti Aliasing samples selected, but only %d supported by your GPU.", - s_MSAASamples, g_ogl_config.max_samples), - 10000); - } - - g_framebuffer_manager.reset(); - g_framebuffer_manager = std::make_unique( - m_target_width, m_target_height, s_MSAASamples, BoundingBox::NeedsStencilBuffer()); - BoundingBox::SetTargetSizeChanged(m_target_width, m_target_height); - } - - if (bits & CONFIG_CHANGE_BIT_VSYNC && !DriverDetails::HasBug(DriverDetails::BUG_BROKEN_VSYNC)) - m_main_gl_context->SwapInterval(g_ActiveConfig.bVSyncActive); - - if (bits & CONFIG_CHANGE_BIT_ANISOTROPY) - g_sampler_cache->Clear(); -} - -void Renderer::Flush() -{ - // ensure all commands are sent to the GPU. - // Otherwise the driver could batch several frames togehter. - glFlush(); -} - -void Renderer::CheckForSurfaceChange() -{ - if (!m_surface_changed.TestAndClear()) - return; - - m_main_gl_context->UpdateSurface(m_new_surface_handle); - m_new_surface_handle = nullptr; - - // With a surface change, the window likely has new dimensions. - m_backbuffer_width = m_main_gl_context->GetBackBufferWidth(); - m_backbuffer_height = m_main_gl_context->GetBackBufferHeight(); -} - -void Renderer::CheckForSurfaceResize() -{ - if (!m_surface_resized.TestAndClear()) - return; - - m_main_gl_context->Update(); - m_backbuffer_width = m_main_gl_context->GetBackBufferWidth(); - m_backbuffer_height = m_main_gl_context->GetBackBufferHeight(); -} - -// ALWAYS call RestoreAPIState for each ResetAPIState call you're doing -void Renderer::ResetAPIState() -{ - // Gets us to a reasonably sane state where it's possible to do things like - // image copies with textured quads, etc. - glDisable(GL_SCISSOR_TEST); - glDisable(GL_DEPTH_TEST); - glDisable(GL_CULL_FACE); - glDisable(GL_BLEND); - if (!IsGLES()) - glDisable(GL_COLOR_LOGIC_OP); - if (g_ActiveConfig.backend_info.bSupportsDepthClamp) - { - glDisable(GL_CLIP_DISTANCE0); - glDisable(GL_CLIP_DISTANCE1); - } - glDepthMask(GL_FALSE); - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - m_current_rasterization_state = RenderState::GetInvalidRasterizationState(); - m_current_depth_state = RenderState::GetInvalidDepthState(); - m_current_blend_state = RenderState::GetInvalidBlendingState(); -} - -void Renderer::RestoreAPIState() -{ - m_current_framebuffer = nullptr; - m_current_framebuffer_width = m_target_width; - m_current_framebuffer_height = m_target_height; - FramebufferManager::SetFramebuffer(0); - - // Gets us back into a more game-like state. - glEnable(GL_SCISSOR_TEST); - if (g_ActiveConfig.backend_info.bSupportsDepthClamp) - { - glEnable(GL_CLIP_DISTANCE0); - glEnable(GL_CLIP_DISTANCE1); - } - BPFunctions::SetScissor(); - BPFunctions::SetViewport(); -} - -void Renderer::ApplyRasterizationState(const RasterizationState state, bool force) -{ - if (!force && m_current_rasterization_state == state) - return; - - // none, ccw, cw, ccw - if (state.cullmode != GenMode::CULL_NONE) - { - // TODO: GX_CULL_ALL not supported, yet! - glEnable(GL_CULL_FACE); - glFrontFace(state.cullmode == GenMode::CULL_FRONT ? GL_CCW : GL_CW); - } - else - { - glDisable(GL_CULL_FACE); - } - - m_current_rasterization_state = state; -} - -void Renderer::ApplyDepthState(const DepthState state, bool force) -{ - if (!force && m_current_depth_state == state) - return; - - const GLenum glCmpFuncs[8] = {GL_NEVER, GL_LESS, GL_EQUAL, GL_LEQUAL, - GL_GREATER, GL_NOTEQUAL, GL_GEQUAL, GL_ALWAYS}; - - if (state.testenable) - { - glEnable(GL_DEPTH_TEST); - glDepthMask(state.updateenable ? GL_TRUE : GL_FALSE); - glDepthFunc(glCmpFuncs[state.func]); - } - else - { - // if the test is disabled write is disabled too - // TODO: When PE performance metrics are being emulated via occlusion queries, we should - // (probably?) enable depth test with depth function ALWAYS here - glDisable(GL_DEPTH_TEST); - glDepthMask(GL_FALSE); - } - - m_current_depth_state = state; -} - void Renderer::SetPipeline(const AbstractPipeline* pipeline) { - // Not all shader changes currently go through SetPipeline, so we can't - // test if the pipeline hasn't changed and skip these applications. Yet. - m_graphics_pipeline = static_cast(pipeline); - if (!m_graphics_pipeline) + if (m_current_pipeline == pipeline) return; - ApplyRasterizationState(m_graphics_pipeline->GetRasterizationState()); - ApplyDepthState(m_graphics_pipeline->GetDepthState()); - ApplyBlendingState(m_graphics_pipeline->GetBlendingState()); - ProgramShaderCache::BindVertexFormat(m_graphics_pipeline->GetVertexFormat()); - m_graphics_pipeline->GetProgram()->shader.Bind(); + if (pipeline) + { + ApplyRasterizationState(static_cast(pipeline)->GetRasterizationState()); + ApplyDepthState(static_cast(pipeline)->GetDepthState()); + ApplyBlendingState(static_cast(pipeline)->GetBlendingState()); + ProgramShaderCache::BindVertexFormat( + static_cast(pipeline)->GetVertexFormat()); + static_cast(pipeline)->GetProgram()->shader.Bind(); + } + else + { + ProgramShaderCache::InvalidateLastProgram(); + glUseProgram(0); + } + m_current_pipeline = pipeline; } void Renderer::SetTexture(u32 index, const AbstractTexture* texture) { - if (m_bound_textures[index] == texture) + const OGLTexture* gl_texture = static_cast(texture); + if (m_bound_textures[index] == gl_texture) return; glActiveTexture(GL_TEXTURE0 + index); - glBindTexture(GL_TEXTURE_2D_ARRAY, - texture ? static_cast(texture)->GetRawTexIdentifier() : 0); - m_bound_textures[index] = texture; + if (gl_texture) + glBindTexture(gl_texture->GetGLTarget(), gl_texture->GetGLTextureId()); + else + glBindTexture(GL_TEXTURE_2D_ARRAY, 0); + m_bound_textures[index] = gl_texture; } void Renderer::SetSamplerState(u32 index, const SamplerState& state) @@ -1593,6 +1295,25 @@ void Renderer::SetSamplerState(u32 index, const SamplerState& state) g_sampler_cache->SetSamplerState(index, state); } +void Renderer::SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) +{ + if (m_bound_image_texture == texture) + return; + + if (texture) + { + const GLenum access = read ? (write ? GL_READ_WRITE : GL_READ_ONLY) : GL_WRITE_ONLY; + glBindImageTexture(0, static_cast(texture)->GetGLTextureId(), 0, GL_TRUE, 0, + access, static_cast(texture)->GetGLFormatForImageTexture()); + } + else + { + glBindImageTexture(0, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8); + } + + m_bound_image_texture = texture; +} + void Renderer::UnbindTexture(const AbstractTexture* texture) { for (size_t i = 0; i < m_bound_textures.size(); i++) @@ -1604,15 +1325,34 @@ void Renderer::UnbindTexture(const AbstractTexture* texture) glBindTexture(GL_TEXTURE_2D_ARRAY, 0); m_bound_textures[i] = nullptr; } -} -void Renderer::SetInterlacingMode() -{ - // TODO + if (m_bound_image_texture == texture) + { + glBindImageTexture(0, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8); + m_bound_image_texture = nullptr; + } } std::unique_ptr Renderer::CreateAsyncShaderCompiler() { return std::make_unique(); } + +void Renderer::BindSharedReadFramebuffer() +{ + glBindFramebuffer(GL_READ_FRAMEBUFFER, m_shared_read_framebuffer); +} + +void Renderer::BindSharedDrawFramebuffer() +{ + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_shared_draw_framebuffer); +} + +void Renderer::RestoreFramebufferBinding() +{ + glBindFramebuffer( + GL_FRAMEBUFFER, + m_current_framebuffer ? static_cast(m_current_framebuffer)->GetFBO() : 0); +} + } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/Render.h b/Source/Core/VideoBackends/OGL/Render.h index 398a49377d..442a31d5c0 100644 --- a/Source/Core/VideoBackends/OGL/Render.h +++ b/Source/Core/VideoBackends/OGL/Render.h @@ -11,12 +11,11 @@ #include "Common/GL/GLExtensions/GLExtensions.h" #include "VideoCommon/RenderBase.h" -struct XFBSourceBase; - namespace OGL { +class OGLFramebuffer; class OGLPipeline; -void ClearEFBCache(); +class OGLTexture; enum GlslVersion { @@ -86,6 +85,8 @@ public: Renderer(std::unique_ptr main_gl_context, float backbuffer_scale); ~Renderer() override; + static Renderer* GetInstance() { return static_cast(g_renderer.get()); } + bool IsHeadless() const override; bool Initialize() override; @@ -98,73 +99,80 @@ public: size_t length) override; std::unique_ptr CreateShaderFromBinary(ShaderStage stage, const void* data, size_t length) override; + std::unique_ptr + CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; std::unique_ptr CreatePipeline(const AbstractPipelineConfig& config) override; std::unique_ptr - CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) override; + CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override; void SetPipeline(const AbstractPipeline* pipeline) override; - void SetFramebuffer(const AbstractFramebuffer* framebuffer) override; - void SetAndDiscardFramebuffer(const AbstractFramebuffer* framebuffer) override; - void SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, - const ClearColor& color_value = {}, + void SetFramebuffer(AbstractFramebuffer* framebuffer) override; + void SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) override; + void SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value = {}, float depth_value = 0.0f) override; void SetScissorRect(const MathUtil::Rectangle& rc) override; void SetTexture(u32 index, const AbstractTexture* texture) override; void SetSamplerState(u32 index, const SamplerState& state) override; + void SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) override; void UnbindTexture(const AbstractTexture* texture) override; - void SetInterlacingMode() override; void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) override; void Draw(u32 base_vertex, u32 num_vertices) override; void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; + void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, + u32 groups_z) override; void BindBackbuffer(const ClearColor& clear_color = {}) override; void PresentBackbuffer() override; - u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override; - void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override; - u16 BBoxRead(int index) override; void BBoxWrite(int index, u16 value) override; - void ResetAPIState() override; - void RestoreAPIState() override; - - TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; + void BeginUtilityDrawing() override; + void EndUtilityDrawing() override; void Flush() override; + void WaitForGPUIdle() override; void RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) override; void OnConfigChanged(u32 bits) override; void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) override; - void ReinterpretPixelData(unsigned int convtype) override; - std::unique_ptr CreateAsyncShaderCompiler() override; // Only call methods from this on the GPU thread. GLContext* GetMainGLContext() const { return m_main_gl_context.get(); } bool IsGLES() const { return m_main_gl_context->IsGLES(); } - const OGLPipeline* GetCurrentGraphicsPipeline() const { return m_graphics_pipeline; } + // Invalidates a cached texture binding. Required for texel buffers when they borrow the units. + void InvalidateTextureBinding(u32 index) { m_bound_textures[index] = nullptr; } + + // The shared framebuffer exists for copying textures when extensions are not available. It is + // slower, but the only way to do these things otherwise. + GLuint GetSharedReadFramebuffer() const { return m_shared_read_framebuffer; } + GLuint GetSharedDrawFramebuffer() const { return m_shared_draw_framebuffer; } + void BindSharedReadFramebuffer(); + void BindSharedDrawFramebuffer(); + + // Restores FBO binding after it's been changed. + void RestoreFramebufferBinding(); private: - void UpdateEFBCache(EFBAccessType type, u32 cacheRectIdx, const EFBRectangle& efbPixelRc, - const TargetRectangle& targetPixelRc, const void* data); - void CheckForSurfaceChange(); void CheckForSurfaceResize(); - void ApplyBlendingState(const BlendingState state, bool force = false); - void ApplyRasterizationState(const RasterizationState state, bool force = false); - void ApplyDepthState(const DepthState state, bool force = false); + void ApplyRasterizationState(const RasterizationState state); + void ApplyDepthState(const DepthState state); + void ApplyBlendingState(const BlendingState state); std::unique_ptr m_main_gl_context; - std::array m_bound_textures{}; - const OGLPipeline* m_graphics_pipeline = nullptr; + std::unique_ptr m_system_framebuffer; + std::array m_bound_textures{}; + AbstractTexture* m_bound_image_texture = nullptr; RasterizationState m_current_rasterization_state; DepthState m_current_depth_state; BlendingState m_current_blend_state; + GLuint m_shared_read_framebuffer = 0; + GLuint m_shared_draw_framebuffer = 0; }; } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/StreamBuffer.h b/Source/Core/VideoBackends/OGL/StreamBuffer.h index 2a8efba1af..9946f3d33e 100644 --- a/Source/Core/VideoBackends/OGL/StreamBuffer.h +++ b/Source/Core/VideoBackends/OGL/StreamBuffer.h @@ -19,6 +19,8 @@ public: static std::unique_ptr Create(u32 type, u32 size); virtual ~StreamBuffer(); + u32 GetGLBufferId() const { return m_buffer; } + u32 GetSize() const { return m_size; } u32 GetCurrentOffset() const { return m_iterator; } /* This mapping function will return a pair of: @@ -64,4 +66,4 @@ private: std::array m_fences{}; }; -} +} // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/TextureCache.cpp b/Source/Core/VideoBackends/OGL/TextureCache.cpp deleted file mode 100644 index 9b0699587b..0000000000 --- a/Source/Core/VideoBackends/OGL/TextureCache.cpp +++ /dev/null @@ -1,574 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include -#include -#include -#include -#include -#include - -#include "Common/Assert.h" -#include "Common/MsgHandler.h" -#include "Common/StringUtil.h" - -#include "VideoBackends/OGL/FramebufferManager.h" -#include "VideoBackends/OGL/GPUTimer.h" -#include "VideoBackends/OGL/OGLTexture.h" -#include "VideoBackends/OGL/ProgramShaderCache.h" -#include "VideoBackends/OGL/Render.h" -#include "VideoBackends/OGL/SamplerCache.h" -#include "VideoBackends/OGL/StreamBuffer.h" -#include "VideoBackends/OGL/TextureCache.h" -#include "VideoBackends/OGL/TextureConverter.h" - -#include "VideoCommon/ImageWrite.h" -#include "VideoCommon/TextureConversionShader.h" -#include "VideoCommon/TextureConverterShaderGen.h" -#include "VideoCommon/TextureDecoder.h" -#include "VideoCommon/VideoCommon.h" -#include "VideoCommon/VideoConfig.h" - -namespace OGL -{ -constexpr const char GLSL_PROGRAM_VS[] = R"GLSL( -out vec3 %c_uv0; -SAMPLER_BINDING(9) uniform sampler2DArray samp9; -uniform vec4 copy_position; // left, top, right, bottom - -void main() -{ - vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2); - %c_uv0 = vec3(mix(copy_position.xy, copy_position.zw, rawpos) / vec2(textureSize(samp9, 0).xy), 0.0); - gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0); -} -)GLSL"; - -constexpr const char GLSL_PROGRAM_GS[] = R"GLSL( -layout(triangles) in; -layout(triangle_strip, max_vertices = 6) out; -in vec3 v_uv0[3]; -out vec3 f_uv0; -SAMPLER_BINDING(9) uniform sampler2DArray samp9; - -void main() -{ - int layers = textureSize(samp9, 0).z; - for (int layer = 0; layer < layers; ++layer) { - for (int i = 0; i < 3; ++i) { - f_uv0 = vec3(v_uv0[i].xy, layer); - gl_Position = gl_in[i].gl_Position; - gl_Layer = layer; - EmitVertex(); - } - } - EndPrimitive(); -} -)GLSL"; - -constexpr const char GLSL_COLOR_COPY_FS[] = R"GLSL( -SAMPLER_BINDING(9) uniform sampler2DArray samp9; -in vec3 f_uv0; -out vec4 ocol0; - -void main() -{ - vec4 texcol = texture(samp9, f_uv0); - ocol0 = texcol; -} -)GLSL"; - -constexpr const char GLSL_PALETTE_FS[] = R"GLSL( -uniform int texture_buffer_offset; -uniform float multiplier; -SAMPLER_BINDING(9) uniform sampler2DArray samp9; -SAMPLER_BINDING(10) uniform usamplerBuffer samp10; - -in vec3 f_uv0; -out vec4 ocol0; - -int Convert3To8(int v) -{ - // Swizzle bits: 00000123 -> 12312312 - return (v << 5) | (v << 2) | (v >> 1); -} - -int Convert4To8(int v) -{ - // Swizzle bits: 00001234 -> 12341234 - return (v << 4) | v; -} - -int Convert5To8(int v) -{ - // Swizzle bits: 00012345 -> 12345123 - return (v << 3) | (v >> 2); -} - -int Convert6To8(int v) -{ - // Swizzle bits: 00123456 -> 12345612 - return (v << 2) | (v >> 4); -} - -float4 DecodePixel_RGB5A3(int val) -{ - int r,g,b,a; - if ((val&0x8000) > 0) - { - r=Convert5To8((val>>10) & 0x1f); - g=Convert5To8((val>>5 ) & 0x1f); - b=Convert5To8((val ) & 0x1f); - a=0xFF; - } - else - { - a=Convert3To8((val>>12) & 0x7); - r=Convert4To8((val>>8 ) & 0xf); - g=Convert4To8((val>>4 ) & 0xf); - b=Convert4To8((val ) & 0xf); - } - return float4(r, g, b, a) / 255.0; -} - -float4 DecodePixel_RGB565(int val) -{ - int r, g, b, a; - r = Convert5To8((val >> 11) & 0x1f); - g = Convert6To8((val >> 5) & 0x3f); - b = Convert5To8((val) & 0x1f); - a = 0xFF; - return float4(r, g, b, a) / 255.0; -} - -float4 DecodePixel_IA8(int val) -{ - int i = val & 0xFF; - int a = val >> 8; - return float4(i, i, i, a) / 255.0; -} - -void main() -{ - int src = int(round(texture(samp9, f_uv0).r * multiplier)); - src = int(texelFetch(samp10, src + texture_buffer_offset).r); - src = ((src << 8) & 0xFF00) | (src >> 8); - ocol0 = DecodePixel_%s(src); -} -)GLSL"; - -//#define TIME_TEXTURE_DECODING 1 - -void TextureCache::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, - u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half, float y_scale, - float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) -{ - // Flip top/bottom due to lower-left coordinate system. - float clamp_top_val = - clamp_bottom ? (1.0f - src_rect.bottom / static_cast(EFB_HEIGHT)) : 0.0f; - float clamp_bottom_val = - clamp_top ? (1.0f - src_rect.top / static_cast(EFB_HEIGHT)) : 1.0f; - TextureConverter::EncodeToRamFromTexture(dst, params, native_width, bytes_per_row, num_blocks_y, - memory_stride, src_rect, scale_by_half, y_scale, gamma, - clamp_top_val, clamp_bottom_val, filter_coefficients); -} - -TextureCache::TextureCache() -{ - CompileShaders(); - - if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) - { - s32 buffer_size_mb = (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding ? 32 : 1); - s32 buffer_size = buffer_size_mb * 1024 * 1024; - s32 max_buffer_size = 0; - - // The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates is 65KB, we are asking for a 1MB - // buffer here. This buffer is also used as storage for undecoded textures when compute shader - // texture decoding is enabled, in which case the requested size is 32MB. - glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_buffer_size); - - // Clamp the buffer size to the maximum size that the driver supports. - buffer_size = std::min(buffer_size, max_buffer_size); - - m_palette_stream_buffer = StreamBuffer::Create(GL_TEXTURE_BUFFER, buffer_size); - glGenTextures(1, &m_palette_resolv_texture); - glBindTexture(GL_TEXTURE_BUFFER, m_palette_resolv_texture); - glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, m_palette_stream_buffer->m_buffer); - - if (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding) - CreateTextureDecodingResources(); - } -} - -TextureCache::~TextureCache() -{ - DeleteShaders(); - if (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding) - DestroyTextureDecodingResources(); - - if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) - { - glDeleteTextures(1, &m_palette_resolv_texture); - } -} - -TextureCache* TextureCache::GetInstance() -{ - return static_cast(g_texture_cache.get()); -} - -const SHADER& TextureCache::GetColorCopyProgram() const -{ - return m_colorCopyProgram; -} - -GLuint TextureCache::GetColorCopyPositionUniform() const -{ - return m_colorCopyPositionUniform; -} - -bool TextureCache::CompilePaletteShader(TLUTFormat tlutfmt, const std::string& vcode, - const std::string& pcode, const std::string& gcode) -{ - ASSERT(IsValidTLUTFormat(tlutfmt)); - PaletteShader& shader = m_palette_shaders[static_cast(tlutfmt)]; - - if (!ProgramShaderCache::CompileShader(shader.shader, vcode, pcode, gcode)) - return false; - - shader.buffer_offset_uniform = - glGetUniformLocation(shader.shader.glprogid, "texture_buffer_offset"); - shader.multiplier_uniform = glGetUniformLocation(shader.shader.glprogid, "multiplier"); - shader.copy_position_uniform = glGetUniformLocation(shader.shader.glprogid, "copy_position"); - - return true; -} - -bool TextureCache::CompileShaders() -{ - std::string geo_program = ""; - char prefix = 'f'; - if (g_ActiveConfig.stereo_mode != StereoMode::Off) - { - geo_program = GLSL_PROGRAM_GS; - prefix = 'v'; - } - - if (!ProgramShaderCache::CompileShader(m_colorCopyProgram, - StringFromFormat(GLSL_PROGRAM_VS, prefix, prefix), - GLSL_COLOR_COPY_FS, geo_program)) - { - return false; - } - - m_colorCopyPositionUniform = glGetUniformLocation(m_colorCopyProgram.glprogid, "copy_position"); - - if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) - { - if (!CompilePaletteShader(TLUTFormat::IA8, StringFromFormat(GLSL_PROGRAM_VS, prefix, prefix), - StringFromFormat(GLSL_PALETTE_FS, "IA8"), geo_program)) - return false; - - if (!CompilePaletteShader(TLUTFormat::RGB565, StringFromFormat(GLSL_PROGRAM_VS, prefix, prefix), - StringFromFormat(GLSL_PALETTE_FS, "RGB565"), geo_program)) - return false; - - if (!CompilePaletteShader(TLUTFormat::RGB5A3, StringFromFormat(GLSL_PROGRAM_VS, prefix, prefix), - StringFromFormat(GLSL_PALETTE_FS, "RGB5A3"), geo_program)) - return false; - } - - return true; -} - -void TextureCache::DeleteShaders() -{ - for (auto& it : m_efb_copy_programs) - it.second.shader.Destroy(); - m_efb_copy_programs.clear(); - - if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) - for (auto& shader : m_palette_shaders) - shader.shader.Destroy(); -} - -void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source, - const void* palette, TLUTFormat tlutfmt) -{ - if (!g_ActiveConfig.backend_info.bSupportsPaletteConversion) - return; - - ASSERT(IsValidTLUTFormat(tlutfmt)); - const PaletteShader& palette_shader = m_palette_shaders[static_cast(tlutfmt)]; - - g_renderer->ResetAPIState(); - - OGLTexture* source_texture = static_cast(source->texture.get()); - OGLTexture* destination_texture = static_cast(destination->texture.get()); - - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_2D_ARRAY, source_texture->GetRawTexIdentifier()); - g_sampler_cache->BindNearestSampler(9); - - FramebufferManager::SetFramebuffer(destination_texture->GetFramebuffer()); - glViewport(0, 0, destination->GetWidth(), destination->GetHeight()); - palette_shader.shader.Bind(); - - // C14 textures are currently unsupported - int size = source->format == TextureFormat::I4 ? 32 : 512; - auto buffer = m_palette_stream_buffer->Map(size); - memcpy(buffer.first, palette, size); - m_palette_stream_buffer->Unmap(size); - glUniform1i(palette_shader.buffer_offset_uniform, buffer.second / 2); - glUniform1f(palette_shader.multiplier_uniform, - source->format == TextureFormat::I4 ? 15.0f : 255.0f); - glUniform4f(palette_shader.copy_position_uniform, 0.0f, 0.0f, - static_cast(source->GetWidth()), static_cast(source->GetHeight())); - - glActiveTexture(GL_TEXTURE10); - glBindTexture(GL_TEXTURE_BUFFER, m_palette_resolv_texture); - g_sampler_cache->BindNearestSampler(10); - - ProgramShaderCache::BindVertexFormat(nullptr); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - g_renderer->RestoreAPIState(); -} - -static const std::string decoding_vertex_shader = R"( -void main() -{ - vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2); - gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0); -} -)"; - -void TextureCache::CreateTextureDecodingResources() -{ - static const GLenum gl_view_types[TextureConversionShaderTiled::BUFFER_FORMAT_COUNT] = { - GL_R8UI, // BUFFER_FORMAT_R8_UINT - GL_R16UI, // BUFFER_FORMAT_R16_UINT - GL_RG32UI, // BUFFER_FORMAT_R32G32_UINT - GL_RGBA8UI, // BUFFER_FORMAT_RGBA8_UINT - }; - - glGenTextures(TextureConversionShaderTiled::BUFFER_FORMAT_COUNT, - m_texture_decoding_buffer_views.data()); - for (size_t i = 0; i < TextureConversionShaderTiled::BUFFER_FORMAT_COUNT; i++) - { - glBindTexture(GL_TEXTURE_BUFFER, m_texture_decoding_buffer_views[i]); - glTexBuffer(GL_TEXTURE_BUFFER, gl_view_types[i], m_palette_stream_buffer->m_buffer); - } -} - -void TextureCache::DestroyTextureDecodingResources() -{ - glDeleteTextures(TextureConversionShaderTiled::BUFFER_FORMAT_COUNT, - m_texture_decoding_buffer_views.data()); - m_texture_decoding_buffer_views.fill(0); - m_texture_decoding_program_info.clear(); -} - -bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) -{ - auto key = std::make_pair(static_cast(format), static_cast(palette_format)); - auto iter = m_texture_decoding_program_info.find(key); - if (iter != m_texture_decoding_program_info.end()) - return iter->second.valid; - - TextureDecodingProgramInfo info; - info.base_info = TextureConversionShaderTiled::GetDecodingShaderInfo(format); - if (!info.base_info) - { - m_texture_decoding_program_info.emplace(key, info); - return false; - } - - std::string shader_source = - TextureConversionShaderTiled::GenerateDecodingShader(format, palette_format, APIType::OpenGL); - if (shader_source.empty()) - { - m_texture_decoding_program_info.emplace(key, info); - return false; - } - - if (!ProgramShaderCache::CompileComputeShader(info.program, shader_source)) - { - m_texture_decoding_program_info.emplace(key, info); - return false; - } - - info.uniform_dst_size = glGetUniformLocation(info.program.glprogid, "u_dst_size"); - info.uniform_src_size = glGetUniformLocation(info.program.glprogid, "u_src_size"); - info.uniform_src_offset = glGetUniformLocation(info.program.glprogid, "u_src_offset"); - info.uniform_src_row_stride = glGetUniformLocation(info.program.glprogid, "u_src_row_stride"); - info.uniform_palette_offset = glGetUniformLocation(info.program.glprogid, "u_palette_offset"); - info.valid = true; - m_texture_decoding_program_info.emplace(key, info); - return true; -} - -void TextureCache::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, - size_t data_size, TextureFormat format, u32 width, u32 height, - u32 aligned_width, u32 aligned_height, u32 row_stride, - const u8* palette, TLUTFormat palette_format) -{ - auto key = std::make_pair(static_cast(format), static_cast(palette_format)); - auto iter = m_texture_decoding_program_info.find(key); - if (iter == m_texture_decoding_program_info.end()) - return; - -#ifdef TIME_TEXTURE_DECODING - GPUTimer timer; -#endif - - // Copy to GPU-visible buffer, aligned to the data type. - auto info = iter->second; - u32 bytes_per_buffer_elem = - TextureConversionShaderTiled::GetBytesPerBufferElement(info.base_info->buffer_format); - - // Only copy palette if it is required. - bool has_palette = info.base_info->palette_size > 0; - u32 total_upload_size = static_cast(data_size); - u32 palette_offset = total_upload_size; - if (has_palette) - { - // Align to u16. - if ((total_upload_size % sizeof(u16)) != 0) - { - total_upload_size++; - palette_offset++; - } - - total_upload_size += info.base_info->palette_size; - } - - // Allocate space in stream buffer, and copy texture + palette across. - auto buffer = m_palette_stream_buffer->Map(total_upload_size, bytes_per_buffer_elem); - memcpy(buffer.first, data, data_size); - if (has_palette) - memcpy(buffer.first + palette_offset, palette, info.base_info->palette_size); - m_palette_stream_buffer->Unmap(total_upload_size); - - info.program.Bind(); - - // Calculate stride in buffer elements - u32 row_stride_in_elements = row_stride / bytes_per_buffer_elem; - u32 offset_in_elements = buffer.second / bytes_per_buffer_elem; - u32 palette_offset_in_elements = (buffer.second + palette_offset) / sizeof(u16); - if (info.uniform_dst_size >= 0) - glUniform2ui(info.uniform_dst_size, width, height); - if (info.uniform_src_size >= 0) - glUniform2ui(info.uniform_src_size, aligned_width, aligned_height); - if (info.uniform_src_offset >= 0) - glUniform1ui(info.uniform_src_offset, offset_in_elements); - if (info.uniform_src_row_stride >= 0) - glUniform1ui(info.uniform_src_row_stride, row_stride_in_elements); - if (info.uniform_palette_offset >= 0) - glUniform1ui(info.uniform_palette_offset, palette_offset_in_elements); - - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_BUFFER, m_texture_decoding_buffer_views[info.base_info->buffer_format]); - - if (has_palette) - { - // Use an R16UI view for the palette. - glActiveTexture(GL_TEXTURE10); - glBindTexture(GL_TEXTURE_BUFFER, m_palette_resolv_texture); - } - - auto dispatch_groups = - TextureConversionShaderTiled::GetDispatchCount(info.base_info, aligned_width, aligned_height); - glBindImageTexture(0, static_cast(entry->texture.get())->GetRawTexIdentifier(), - dst_level, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA8); - glDispatchCompute(dispatch_groups.first, dispatch_groups.second, 1); - glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT); - -#ifdef TIME_TEXTURE_DECODING - WARN_LOG(VIDEO, "Decode texture format %u size %ux%u took %.4fms", static_cast(format), - width, height, timer.GetTimeMilliseconds()); -#endif -} - -void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, - const EFBRectangle& src_rect, bool scale_by_half, - EFBCopyFormat dst_format, bool is_intensity, float gamma, - bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) -{ - auto* destination_texture = static_cast(entry->texture.get()); - g_renderer->ResetAPIState(); // reset any game specific settings - - // Make sure to resolve anything we need to read from. - const GLuint read_texture = is_depth_copy ? - FramebufferManager::ResolveAndGetDepthTarget(src_rect) : - FramebufferManager::ResolveAndGetRenderTarget(src_rect); - - FramebufferManager::SetFramebuffer(destination_texture->GetFramebuffer()); - - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_2D_ARRAY, read_texture); - if (scale_by_half) - g_sampler_cache->BindLinearSampler(9); - else - g_sampler_cache->BindNearestSampler(9); - - glViewport(0, 0, destination_texture->GetConfig().width, destination_texture->GetConfig().height); - - auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity, - scale_by_half, - NeedsCopyFilterInShader(filter_coefficients)); - - auto it = m_efb_copy_programs.emplace(uid, EFBCopyShader()); - EFBCopyShader& shader = it.first->second; - bool created = it.second; - - if (created) - { - ShaderCode code = TextureConversionShaderGen::GenerateShader(APIType::OpenGL, uid.GetUidData()); - - std::string geo_program = ""; - char prefix = 'f'; - if (g_ActiveConfig.stereo_mode != StereoMode::Off) - { - geo_program = GLSL_PROGRAM_GS; - prefix = 'v'; - } - - ProgramShaderCache::CompileShader(shader.shader, - StringFromFormat(GLSL_PROGRAM_VS, prefix, prefix), - code.GetBuffer(), geo_program); - - shader.position_uniform = glGetUniformLocation(shader.shader.glprogid, "copy_position"); - shader.pixel_height_uniform = glGetUniformLocation(shader.shader.glprogid, "pixel_height"); - shader.gamma_rcp_uniform = glGetUniformLocation(shader.shader.glprogid, "gamma_rcp"); - shader.clamp_tb_uniform = glGetUniformLocation(shader.shader.glprogid, "clamp_tb"); - shader.filter_coefficients_uniform = - glGetUniformLocation(shader.shader.glprogid, "filter_coefficients"); - } - - shader.shader.Bind(); - - TargetRectangle R = g_renderer->ConvertEFBRectangle(src_rect); - glUniform4f(shader.position_uniform, static_cast(R.left), static_cast(R.top), - static_cast(R.right), static_cast(R.bottom)); - glUniform1f(shader.pixel_height_uniform, g_ActiveConfig.bCopyEFBScaled ? - 1.0f / g_renderer->GetTargetHeight() : - 1.0f / EFB_HEIGHT); - glUniform1f(shader.gamma_rcp_uniform, 1.0f / gamma); - glUniform2f(shader.clamp_tb_uniform, - clamp_bottom ? (1.0f - src_rect.bottom / static_cast(EFB_HEIGHT)) : 0.0f, - clamp_top ? (1.0f - src_rect.top / static_cast(EFB_HEIGHT)) : 1.0f); - glUniform3f(shader.filter_coefficients_uniform, filter_coefficients[0], filter_coefficients[1], - filter_coefficients[2]); - - ProgramShaderCache::BindVertexFormat(nullptr); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - g_renderer->RestoreAPIState(); -} -} // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/TextureCache.h b/Source/Core/VideoBackends/OGL/TextureCache.h deleted file mode 100644 index 89fc82ee9d..0000000000 --- a/Source/Core/VideoBackends/OGL/TextureCache.h +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "Common/CommonTypes.h" -#include "Common/GL/GLUtil.h" -#include "VideoBackends/OGL/ProgramShaderCache.h" - -#include "VideoCommon/TextureCacheBase.h" -#include "VideoCommon/TextureConversionShader.h" -#include "VideoCommon/TextureConverterShaderGen.h" -#include "VideoCommon/VideoCommon.h" - -class AbstractTexture; -class StreamBuffer; -struct TextureConfig; - -namespace OGL -{ -class TextureCache : public TextureCacheBase -{ -public: - TextureCache(); - ~TextureCache(); - - static TextureCache* GetInstance(); - - bool SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) override; - void DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, size_t data_size, - TextureFormat format, u32 width, u32 height, u32 aligned_width, - u32 aligned_height, u32 row_stride, const u8* palette, - TLUTFormat palette_format) override; - - const SHADER& GetColorCopyProgram() const; - GLuint GetColorCopyPositionUniform() const; - -private: - struct PaletteShader - { - SHADER shader; - GLuint buffer_offset_uniform; - GLuint multiplier_uniform; - GLuint copy_position_uniform; - }; - - struct TextureDecodingProgramInfo - { - const TextureConversionShaderTiled::DecodingShaderInfo* base_info = nullptr; - SHADER program; - GLint uniform_dst_size = -1; - GLint uniform_src_size = -1; - GLint uniform_src_row_stride = -1; - GLint uniform_src_offset = -1; - GLint uniform_palette_offset = -1; - bool valid = false; - }; - - void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette, - TLUTFormat format) override; - - void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override; - - void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, - bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, - float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override; - - bool CompileShaders() override; - void DeleteShaders() override; - - bool CompilePaletteShader(TLUTFormat tlutfmt, const std::string& vcode, const std::string& pcode, - const std::string& gcode); - - void CreateTextureDecodingResources(); - void DestroyTextureDecodingResources(); - - struct EFBCopyShader - { - SHADER shader; - GLuint position_uniform; - GLuint pixel_height_uniform; - GLuint gamma_rcp_uniform; - GLuint clamp_tb_uniform; - GLuint filter_coefficients_uniform; - }; - - std::map m_efb_copy_programs; - - SHADER m_colorCopyProgram; - GLuint m_colorCopyPositionUniform; - - std::array m_palette_shaders; - std::unique_ptr m_palette_stream_buffer; - GLuint m_palette_resolv_texture = 0; - - std::map, TextureDecodingProgramInfo> m_texture_decoding_program_info; - std::array - m_texture_decoding_buffer_views; -}; -} diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/TextureConverter.cpp deleted file mode 100644 index 095365140f..0000000000 --- a/Source/Core/VideoBackends/OGL/TextureConverter.cpp +++ /dev/null @@ -1,170 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -// Fast image conversion using OpenGL shaders. - -#include "VideoBackends/OGL/TextureConverter.h" - -#include - -#include "Common/CommonTypes.h" -#include "Common/FileUtil.h" -#include "Common/Logging/Log.h" -#include "Common/MsgHandler.h" -#include "Common/StringUtil.h" - -#include "Core/HW/Memmap.h" - -#include "VideoBackends/OGL/FramebufferManager.h" -#include "VideoBackends/OGL/OGLTexture.h" -#include "VideoBackends/OGL/ProgramShaderCache.h" -#include "VideoBackends/OGL/Render.h" -#include "VideoBackends/OGL/SamplerCache.h" -#include "VideoBackends/OGL/TextureCache.h" - -#include "VideoCommon/ImageWrite.h" -#include "VideoCommon/TextureConversionShader.h" -#include "VideoCommon/VideoCommon.h" -#include "VideoCommon/VideoConfig.h" - -namespace OGL -{ -namespace TextureConverter -{ -namespace -{ -struct EncodingProgram -{ - SHADER program; - GLint copy_position_uniform; - GLint y_scale_uniform; - GLint gamma_rcp_uniform; - GLint clamp_tb_uniform; - GLint filter_coefficients_uniform; -}; - -std::map s_encoding_programs; -std::unique_ptr s_encoding_render_texture; - -const int renderBufferWidth = EFB_WIDTH * 4; -const int renderBufferHeight = 1024; -} // namespace - -static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyParams& params) -{ - auto iter = s_encoding_programs.find(params); - if (iter != s_encoding_programs.end()) - return iter->second; - - const char* shader = - TextureConversionShaderTiled::GenerateEncodingShader(params, APIType::OpenGL); - -#if defined(_DEBUG) || defined(DEBUGFAST) - if (g_ActiveConfig.iLog & CONF_SAVESHADERS && shader) - { - static int counter = 0; - std::string filename = - StringFromFormat("%senc_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++); - - SaveData(filename, shader); - } -#endif - - const char* VProgram = "void main()\n" - "{\n" - " vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n" - " gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n" - "}\n"; - - EncodingProgram program; - if (!ProgramShaderCache::CompileShader(program.program, VProgram, shader)) - PanicAlert("Failed to compile texture encoding shader."); - - program.copy_position_uniform = glGetUniformLocation(program.program.glprogid, "position"); - program.y_scale_uniform = glGetUniformLocation(program.program.glprogid, "y_scale"); - program.gamma_rcp_uniform = glGetUniformLocation(program.program.glprogid, "gamma_rcp"); - program.clamp_tb_uniform = glGetUniformLocation(program.program.glprogid, "clamp_tb"); - program.filter_coefficients_uniform = - glGetUniformLocation(program.program.glprogid, "filter_coefficients"); - return s_encoding_programs.emplace(params, program).first->second; -} - -void Init() -{ - s_encoding_render_texture = g_renderer->CreateTexture(TextureCache::GetEncodingTextureConfig()); -} - -void Shutdown() -{ - s_encoding_render_texture.reset(); - - for (auto& program : s_encoding_programs) - program.second.program.Destroy(); - s_encoding_programs.clear(); -} - -// dst_line_size, writeStride in bytes - -static void EncodeToRamUsingShader(GLuint srcTexture, AbstractStagingTexture* destAddr, - u32 dst_line_size, u32 dstHeight, u32 writeStride, - bool linearFilter, float y_scale) -{ - FramebufferManager::SetFramebuffer( - static_cast(s_encoding_render_texture.get())->GetFramebuffer()); - - // set source texture - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_2D_ARRAY, srcTexture); - - // We also linear filtering for both box filtering and downsampling higher resolutions to 1x - // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more - // complex down filtering to average all pixels and produce the correct result. - // Also, box filtering won't be correct for anything other than 1x IR - if (linearFilter || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f) - g_sampler_cache->BindLinearSampler(9); - else - g_sampler_cache->BindNearestSampler(9); - - glViewport(0, 0, (GLsizei)(dst_line_size / 4), (GLsizei)dstHeight); - - ProgramShaderCache::BindVertexFormat(nullptr); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - MathUtil::Rectangle copy_rect(0, 0, dst_line_size / 4, dstHeight); - - destAddr->CopyFromTexture(s_encoding_render_texture.get(), copy_rect, 0, 0, copy_rect); -} - -void EncodeToRamFromTexture(AbstractStagingTexture* dest, const EFBCopyParams& params, - u32 native_width, u32 bytes_per_row, u32 num_blocks_y, - u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, - float y_scale, float gamma, float clamp_top, float clamp_bottom, - const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients) -{ - g_renderer->ResetAPIState(); - - EncodingProgram& texconv_shader = GetOrCreateEncodingShader(params); - - texconv_shader.program.Bind(); - glUniform4i(texconv_shader.copy_position_uniform, src_rect.left, src_rect.top, native_width, - scale_by_half ? 2 : 1); - glUniform1f(texconv_shader.y_scale_uniform, y_scale); - glUniform1f(texconv_shader.gamma_rcp_uniform, 1.0f / gamma); - glUniform2f(texconv_shader.clamp_tb_uniform, clamp_top, clamp_bottom); - glUniform3f(texconv_shader.filter_coefficients_uniform, filter_coefficients[0], - filter_coefficients[1], filter_coefficients[2]); - - const GLuint read_texture = params.depth ? - FramebufferManager::ResolveAndGetDepthTarget(src_rect) : - FramebufferManager::ResolveAndGetRenderTarget(src_rect); - - EncodeToRamUsingShader(read_texture, dest, bytes_per_row, num_blocks_y, memory_stride, - scale_by_half && !params.depth, y_scale); - - g_renderer->RestoreAPIState(); -} - -} // namespace TextureConverter - -} // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.h b/Source/Core/VideoBackends/OGL/TextureConverter.h deleted file mode 100644 index 575fbf5bd0..0000000000 --- a/Source/Core/VideoBackends/OGL/TextureConverter.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include "Common/CommonTypes.h" -#include "Common/GL/GLUtil.h" - -#include "VideoCommon/TextureCacheBase.h" -#include "VideoCommon/VideoCommon.h" - -struct EFBCopyParams; -class AbstractStagingTexture; - -namespace OGL -{ -// Converts textures between formats using shaders -// TODO: support multiple texture formats -namespace TextureConverter -{ -void Init(); -void Shutdown(); - -// returns size of the encoded data (in bytes) -void EncodeToRamFromTexture( - AbstractStagingTexture* dest, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, - float y_scale, float gamma, float clamp_top, float clamp_bottom, - const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients); -} - -} // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/VertexManager.cpp b/Source/Core/VideoBackends/OGL/VertexManager.cpp index 3883a13595..543a85dac8 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.cpp +++ b/Source/Core/VideoBackends/OGL/VertexManager.cpp @@ -9,17 +9,14 @@ #include #include +#include "Common/Align.h" #include "Common/CommonTypes.h" -#include "Common/FileUtil.h" #include "Common/GL/GLExtensions/GLExtensions.h" -#include "Common/StringUtil.h" -#include "VideoBackends/OGL/BoundingBox.h" #include "VideoBackends/OGL/OGLPipeline.h" #include "VideoBackends/OGL/ProgramShaderCache.h" #include "VideoBackends/OGL/Render.h" #include "VideoBackends/OGL/StreamBuffer.h" -#include "VideoCommon/BoundingBox.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/Statistics.h" @@ -28,38 +25,127 @@ namespace OGL { -// This are the initially requested size for the buffers expressed in bytes -const u32 MAX_IBUFFER_SIZE = 2 * 1024 * 1024; -const u32 MAX_VBUFFER_SIZE = 32 * 1024 * 1024; - -VertexManager::VertexManager() : m_cpu_v_buffer(MAX_VBUFFER_SIZE), m_cpu_i_buffer(MAX_IBUFFER_SIZE) +static void CheckBufferBinding() { - CreateDeviceObjects(); + // The index buffer is part of the VAO state, therefore we need to bind it first. + if (!ProgramShaderCache::IsValidVertexFormatBound()) + { + ProgramShaderCache::BindVertexFormat( + static_cast(VertexLoaderManager::GetCurrentVertexFormat())); + } } +VertexManager::VertexManager() = default; + VertexManager::~VertexManager() { - DestroyDeviceObjects(); -} + if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) + { + glDeleteTextures(static_cast(m_texel_buffer_views.size()), + m_texel_buffer_views.data()); + } -void VertexManager::CreateDeviceObjects() -{ - m_vertex_buffer = StreamBuffer::Create(GL_ARRAY_BUFFER, MAX_VBUFFER_SIZE); - m_index_buffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, MAX_IBUFFER_SIZE); -} - -void VertexManager::DestroyDeviceObjects() -{ - m_vertex_buffer.reset(); + // VAO must be found when destroying the index buffer. + CheckBufferBinding(); + m_texel_buffer.reset(); m_index_buffer.reset(); + m_vertex_buffer.reset(); +} + +bool VertexManager::Initialize() +{ + if (!VertexManagerBase::Initialize()) + return false; + + m_vertex_buffer = StreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_STREAM_BUFFER_SIZE); + m_index_buffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, INDEX_STREAM_BUFFER_SIZE); + + if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) + { + // The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates is 65KB, we are asking for a 1MB + // buffer here. This buffer is also used as storage for undecoded textures when compute shader + // texture decoding is enabled, in which case the requested size is 32MB. + GLint max_buffer_size; + glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_buffer_size); + m_texel_buffer = StreamBuffer::Create( + GL_TEXTURE_BUFFER, std::min(max_buffer_size, static_cast(TEXEL_STREAM_BUFFER_SIZE))); + + // Allocate texture views backed by buffer. + static constexpr std::array, NUM_TEXEL_BUFFER_FORMATS> + format_mapping = {{ + {TEXEL_BUFFER_FORMAT_R8_UINT, GL_R8UI}, + {TEXEL_BUFFER_FORMAT_R16_UINT, GL_R16UI}, + {TEXEL_BUFFER_FORMAT_RGBA8_UINT, GL_RGBA8}, + {TEXEL_BUFFER_FORMAT_R32G32_UINT, GL_RG32UI}, + }}; + glGenTextures(static_cast(m_texel_buffer_views.size()), m_texel_buffer_views.data()); + glActiveTexture(GL_MUTABLE_TEXTURE_INDEX); + for (const auto& it : format_mapping) + { + glBindTexture(GL_TEXTURE_BUFFER, m_texel_buffer_views[it.first]); + glTexBuffer(GL_TEXTURE_BUFFER, it.second, m_texel_buffer->GetGLBufferId()); + } + } + + return true; } void VertexManager::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) { - ProgramShaderCache::InvalidateConstants(); + InvalidateConstants(); ProgramShaderCache::UploadConstants(uniforms, uniforms_size); } +bool VertexManager::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset) +{ + if (data_size > m_texel_buffer->GetSize()) + return false; + + const u32 elem_size = GetTexelBufferElementSize(format); + const auto dst = m_texel_buffer->Map(data_size, elem_size); + std::memcpy(dst.first, data, data_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size); + *out_offset = dst.second / elem_size; + m_texel_buffer->Unmap(data_size); + + // Bind the correct view to the texel buffer slot. + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_BUFFER, m_texel_buffer_views[static_cast(format)]); + Renderer::GetInstance()->InvalidateTextureBinding(0); + return true; +} + +bool VertexManager::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset, const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, u32* out_palette_offset) +{ + const u32 elem_size = GetTexelBufferElementSize(format); + const u32 palette_elem_size = GetTexelBufferElementSize(palette_format); + const u32 reserve_size = data_size + palette_size + palette_elem_size; + if (reserve_size > m_texel_buffer->GetSize()) + return false; + + const auto dst = m_texel_buffer->Map(reserve_size, elem_size); + const u32 palette_byte_offset = Common::AlignUp(data_size, palette_elem_size); + std::memcpy(dst.first, data, data_size); + std::memcpy(dst.first + palette_byte_offset, palette_data, palette_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, palette_byte_offset + palette_size); + *out_offset = dst.second / elem_size; + *out_palette_offset = (dst.second + palette_byte_offset) / palette_elem_size; + m_texel_buffer->Unmap(palette_byte_offset + palette_size); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_BUFFER, m_texel_buffer_views[static_cast(format)]); + Renderer::GetInstance()->InvalidateTextureBinding(0); + + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_BUFFER, m_texel_buffer_views[static_cast(palette_format)]); + Renderer::GetInstance()->InvalidateTextureBinding(1); + + return true; +} + GLuint VertexManager::GetVertexBufferHandle() const { return m_vertex_buffer->m_buffer; @@ -70,37 +156,16 @@ GLuint VertexManager::GetIndexBufferHandle() const return m_index_buffer->m_buffer; } -static void CheckBufferBinding() +void VertexManager::ResetBuffer(u32 vertex_stride) { - // The index buffer is part of the VAO state, therefore we need to bind it first. - if (!ProgramShaderCache::IsValidVertexFormatBound()) - { - ProgramShaderCache::BindVertexFormat( - static_cast(VertexLoaderManager::GetCurrentVertexFormat())); - } -} + CheckBufferBinding(); -void VertexManager::ResetBuffer(u32 vertex_stride, bool cull_all) -{ - if (cull_all) - { - // This buffer isn't getting sent to the GPU. Just allocate it on the cpu. - m_cur_buffer_pointer = m_base_buffer_pointer = m_cpu_v_buffer.data(); - m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_v_buffer.size(); + auto buffer = m_vertex_buffer->Map(MAXVBUFFERSIZE, vertex_stride); + m_cur_buffer_pointer = m_base_buffer_pointer = buffer.first; + m_end_buffer_pointer = buffer.first + MAXVBUFFERSIZE; - IndexGenerator::Start((u16*)m_cpu_i_buffer.data()); - } - else - { - CheckBufferBinding(); - - auto buffer = m_vertex_buffer->Map(MAXVBUFFERSIZE, vertex_stride); - m_cur_buffer_pointer = m_base_buffer_pointer = buffer.first; - m_end_buffer_pointer = buffer.first + MAXVBUFFERSIZE; - - buffer = m_index_buffer->Map(MAXIBUFFERSIZE * sizeof(u16)); - IndexGenerator::Start((u16*)buffer.first); - } + buffer = m_index_buffer->Map(MAXIBUFFERSIZE * sizeof(u16)); + IndexGenerator::Start((u16*)buffer.first); } void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, @@ -120,31 +185,8 @@ void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_in ADDSTAT(stats.thisFrame.bytesIndexStreamed, index_data_size); } -void VertexManager::UploadConstants() +void VertexManager::UploadUniforms() { ProgramShaderCache::UploadConstants(); } - -void VertexManager::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) -{ - if (::BoundingBox::active && !g_Config.BBoxUseFragmentShaderImplementation()) - { - glEnable(GL_STENCIL_TEST); - } - - if (m_current_pipeline_object) - { - static_cast(g_renderer.get())->SetPipeline(m_current_pipeline_object); - static_cast(g_renderer.get())->DrawIndexed(base_index, num_indices, base_vertex); - } - - if (::BoundingBox::active && !g_Config.BBoxUseFragmentShaderImplementation()) - { - OGL::BoundingBox::StencilWasUpdated(); - glDisable(GL_STENCIL_TEST); - } - - g_Config.iSaveTargetId++; - ClearEFBCache(); -} } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/VertexManager.h b/Source/Core/VideoBackends/OGL/VertexManager.h index 388c559716..b9b410855c 100644 --- a/Source/Core/VideoBackends/OGL/VertexManager.h +++ b/Source/Core/VideoBackends/OGL/VertexManager.h @@ -4,8 +4,8 @@ #pragma once +#include #include -#include #include "Common/CommonTypes.h" #include "Common/GL/GLUtil.h" @@ -26,35 +26,34 @@ public: // Handles the OpenGL details of drawing lots of vertices quickly. // Other functionality is moving out. -class VertexManager : public VertexManagerBase +class VertexManager final : public VertexManagerBase { public: VertexManager(); - ~VertexManager(); + ~VertexManager() override; - std::unique_ptr - CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; + bool Initialize() override; void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override; + bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset) override; + bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, u32* out_offset, + const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, u32* out_palette_offset) override; GLuint GetVertexBufferHandle() const; GLuint GetIndexBufferHandle() const; protected: - void CreateDeviceObjects() override; - void DestroyDeviceObjects() override; - void ResetBuffer(u32 vertex_stride, bool cull_all) override; + void ResetBuffer(u32 vertex_stride) override; void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, u32* out_base_index) override; - void UploadConstants() override; - void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; + void UploadUniforms() override; private: std::unique_ptr m_vertex_buffer; std::unique_ptr m_index_buffer; - - // Alternative buffers in CPU memory for primatives we are going to discard. - std::vector m_cpu_v_buffer; - std::vector m_cpu_i_buffer; + std::unique_ptr m_texel_buffer; + std::array m_texel_buffer_views{}; }; } // namespace OGL diff --git a/Source/Core/VideoBackends/OGL/main.cpp b/Source/Core/VideoBackends/OGL/main.cpp index 3624a7fd7a..76cd7b2ca6 100644 --- a/Source/Core/VideoBackends/OGL/main.cpp +++ b/Source/Core/VideoBackends/OGL/main.cpp @@ -50,13 +50,11 @@ Make AA apply instantly during gameplay if possible #include "VideoBackends/OGL/ProgramShaderCache.h" #include "VideoBackends/OGL/Render.h" #include "VideoBackends/OGL/SamplerCache.h" -#include "VideoBackends/OGL/TextureCache.h" -#include "VideoBackends/OGL/TextureConverter.h" #include "VideoBackends/OGL/VertexManager.h" #include "VideoBackends/OGL/VideoBackend.h" -#include "VideoCommon/OnScreenDisplay.h" -#include "VideoCommon/VideoCommon.h" +#include "VideoCommon/FramebufferManager.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VideoConfig.h" namespace OGL @@ -78,6 +76,7 @@ void VideoBackend::InitBackendInfo() { g_Config.backend_info.api_type = APIType::OpenGL; g_Config.backend_info.MaxTextureSize = 16384; + g_Config.backend_info.bUsesLowerLeftOrigin = true; g_Config.backend_info.bSupportsExclusiveFullscreen = false; g_Config.backend_info.bSupportsOversizedViewports = true; g_Config.backend_info.bSupportsGeometryShaders = true; @@ -89,6 +88,7 @@ void VideoBackend::InitBackendInfo() g_Config.backend_info.bSupportsLogicOp = true; g_Config.backend_info.bSupportsMultithreading = false; g_Config.backend_info.bSupportsCopyToVram = true; + g_Config.backend_info.bSupportsLargePoints = true; // TODO: There is a bug here, if texel buffers are not supported the graphics options // will show the option when it is not supported. The only way around this would be @@ -173,17 +173,26 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi) return false; g_renderer = std::make_unique(std::move(main_gl_context), wsi.render_surface_scale); - g_vertex_manager = std::make_unique(); - g_perf_query = GetPerfQuery(); ProgramShaderCache::Init(); - g_texture_cache = std::make_unique(); - g_sampler_cache = std::make_unique(); + g_vertex_manager = std::make_unique(); g_shader_cache = std::make_unique(); - if (!g_renderer->Initialize()) + g_framebuffer_manager = std::make_unique(); + g_perf_query = GetPerfQuery(); + g_texture_cache = std::make_unique(); + g_sampler_cache = std::make_unique(); + BoundingBox::Init(); + + if (!g_vertex_manager->Initialize() || !g_shader_cache->Initialize() || + !g_renderer->Initialize() || !g_framebuffer_manager->Initialize() || + !g_texture_cache->Initialize()) + { + PanicAlert("Failed to initialize renderer classes"); + Shutdown(); return false; - TextureConverter::Init(); - BoundingBox::Init(g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight()); - return g_shader_cache->Initialize(); + } + + g_shader_cache->InitializeShaderCache(); + return true; } void VideoBackend::Shutdown() @@ -191,13 +200,13 @@ void VideoBackend::Shutdown() g_shader_cache->Shutdown(); g_renderer->Shutdown(); BoundingBox::Shutdown(); - TextureConverter::Shutdown(); - g_shader_cache.reset(); g_sampler_cache.reset(); g_texture_cache.reset(); - ProgramShaderCache::Shutdown(); g_perf_query.reset(); g_vertex_manager.reset(); + g_framebuffer_manager.reset(); + g_shader_cache.reset(); + ProgramShaderCache::Shutdown(); g_renderer.reset(); ShutdownShared(); } diff --git a/Source/Core/VideoBackends/Software/SWRenderer.cpp b/Source/Core/VideoBackends/Software/SWRenderer.cpp index 6369b35b20..4b57b9b551 100644 --- a/Source/Core/VideoBackends/Software/SWRenderer.cpp +++ b/Source/Core/VideoBackends/Software/SWRenderer.cpp @@ -19,11 +19,15 @@ #include "VideoCommon/AbstractPipeline.h" #include "VideoCommon/AbstractShader.h" +#include "VideoCommon/AbstractTexture.h" #include "VideoCommon/BoundingBox.h" +#include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/OnScreenDisplay.h" #include "VideoCommon/VideoBackendBase.h" #include "VideoCommon/VideoConfig.h" +namespace SW +{ SWRenderer::SWRenderer(std::unique_ptr window) : ::Renderer(static_cast(MAX_XFB_WIDTH), static_cast(MAX_XFB_HEIGHT), 1.0f, AbstractTextureFormat::RGBA8), @@ -38,21 +42,20 @@ bool SWRenderer::IsHeadless() const std::unique_ptr SWRenderer::CreateTexture(const TextureConfig& config) { - return std::make_unique(config); + return std::make_unique(config); } std::unique_ptr SWRenderer::CreateStagingTexture(StagingTextureType type, const TextureConfig& config) { - return std::make_unique(type, config); + return std::make_unique(type, config); } std::unique_ptr -SWRenderer::CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) +SWRenderer::CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) { - return SW::SWFramebuffer::Create(static_cast(color_attachment), - static_cast(depth_attachment)); + return SWFramebuffer::Create(static_cast(color_attachment), + static_cast(depth_attachment)); } class SWShader final : public AbstractShader @@ -132,18 +135,15 @@ void SWRenderer::BBoxWrite(int index, u16 value) BoundingBox::coords[index] = value; } -TargetRectangle SWRenderer::ConvertEFBRectangle(const EFBRectangle& rc) -{ - TargetRectangle result; - result.left = rc.left; - result.top = rc.top; - result.right = rc.right; - result.bottom = rc.bottom; - return result; -} - void SWRenderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) { EfbCopy::ClearEfb(); } + +std::unique_ptr +SWRenderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) +{ + return std::make_unique(vtx_decl); +} +} // namespace SW diff --git a/Source/Core/VideoBackends/Software/SWRenderer.h b/Source/Core/VideoBackends/Software/SWRenderer.h index 88c5ece360..a06ccaecd2 100644 --- a/Source/Core/VideoBackends/Software/SWRenderer.h +++ b/Source/Core/VideoBackends/Software/SWRenderer.h @@ -12,7 +12,9 @@ class SWOGLWindow; -class SWRenderer : public Renderer +namespace SW +{ +class SWRenderer final : public Renderer { public: SWRenderer(std::unique_ptr window); @@ -23,13 +25,14 @@ public: std::unique_ptr CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override; std::unique_ptr - CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) override; + CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override; std::unique_ptr CreateShaderFromSource(ShaderStage stage, const char* source, size_t length) override; std::unique_ptr CreateShaderFromBinary(ShaderStage stage, const void* data, size_t length) override; + std::unique_ptr + CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; std::unique_ptr CreatePipeline(const AbstractPipelineConfig& config) override; u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override; @@ -37,15 +40,18 @@ public: u16 BBoxRead(int index) override; void BBoxWrite(int index, u16 value) override; - TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; - void RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) override; void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) override; - void ReinterpretPixelData(unsigned int convtype) override {} + void ReinterpretPixelData(EFBReinterpretType convtype) override {} + + void ScaleTexture(AbstractFramebuffer* dst_framebuffer, const MathUtil::Rectangle& dst_rect, + const AbstractTexture* src_texture, + const MathUtil::Rectangle& src_rect) override; private: std::unique_ptr m_window; }; +} // namespace SW diff --git a/Source/Core/VideoBackends/Software/SWTexture.cpp b/Source/Core/VideoBackends/Software/SWTexture.cpp index eb450d642f..466da1efd5 100644 --- a/Source/Core/VideoBackends/Software/SWTexture.cpp +++ b/Source/Core/VideoBackends/Software/SWTexture.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "VideoBackends/Software/SWTexture.h" +#include "VideoBackends/Software/SWRenderer.h" #include #include "Common/Assert.h" @@ -45,6 +46,25 @@ void CopyTextureData(const TextureConfig& src_config, const u8* src_ptr, u32 src dst_ptr += dst_stride; } } +} // namespace + +void SWRenderer::ScaleTexture(AbstractFramebuffer* dst_framebuffer, + const MathUtil::Rectangle& dst_rect, + const AbstractTexture* src_texture, + const MathUtil::Rectangle& src_rect) +{ + const SWTexture* software_source_texture = static_cast(src_texture); + SWTexture* software_dest_texture = static_cast(dst_framebuffer->GetColorAttachment()); + + std::vector source_pixels; + source_pixels.resize(src_rect.GetHeight() * src_rect.GetWidth() * 4); + memcpy(source_pixels.data(), software_source_texture->GetData(), source_pixels.size()); + + std::vector destination_pixels; + destination_pixels.resize(dst_rect.GetHeight() * dst_rect.GetWidth() * 4); + + CopyRegion(source_pixels.data(), src_rect, destination_pixels.data(), dst_rect); + memcpy(software_dest_texture->GetData(), destination_pixels.data(), destination_pixels.size()); } SWTexture::SWTexture(const TextureConfig& tex_config) : AbstractTexture(tex_config) @@ -62,30 +82,6 @@ void SWTexture::CopyRectangleFromTexture(const AbstractTexture* src, src_rect.left, src_rect.top, src_rect.GetWidth(), src_rect.GetHeight(), m_config, m_data.data(), dst_rect.left, dst_rect.top); } -void SWTexture::ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& srcrect, - const MathUtil::Rectangle& dstrect) -{ - const SWTexture* software_source_texture = static_cast(source); - - if (srcrect.GetWidth() == dstrect.GetWidth() && srcrect.GetHeight() == dstrect.GetHeight()) - { - m_data.assign(software_source_texture->GetData(), - software_source_texture->GetData() + m_data.size()); - } - else - { - std::vector source_pixels; - source_pixels.resize(srcrect.GetHeight() * srcrect.GetWidth() * 4); - memcpy(source_pixels.data(), software_source_texture->GetData(), source_pixels.size()); - - std::vector destination_pixels; - destination_pixels.resize(dstrect.GetHeight() * dstrect.GetWidth() * 4); - - CopyRegion(source_pixels.data(), srcrect, destination_pixels.data(), dstrect); - memcpy(GetData(), destination_pixels.data(), destination_pixels.size()); - } -} void SWTexture::ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, u32 layer, u32 level) { @@ -153,14 +149,16 @@ void SWStagingTexture::Flush() m_needs_flush = false; } -SWFramebuffer::SWFramebuffer(AbstractTextureFormat color_format, AbstractTextureFormat depth_format, +SWFramebuffer::SWFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples) - : AbstractFramebuffer(color_format, depth_format, width, height, layers, samples) + : AbstractFramebuffer(color_attachment, depth_attachment, color_format, depth_format, width, + height, layers, samples) { } -std::unique_ptr SWFramebuffer::Create(const SWTexture* color_attachment, - const SWTexture* depth_attachment) +std::unique_ptr SWFramebuffer::Create(SWTexture* color_attachment, + SWTexture* depth_attachment) { if (!ValidateConfig(color_attachment, depth_attachment)) return nullptr; @@ -175,8 +173,8 @@ std::unique_ptr SWFramebuffer::Create(const SWTexture* color_atta const u32 layers = either_attachment->GetLayers(); const u32 samples = either_attachment->GetSamples(); - return std::make_unique(color_format, depth_format, width, height, layers, - samples); + return std::make_unique(color_attachment, depth_attachment, color_format, + depth_format, width, height, layers, samples); } } // namespace SW diff --git a/Source/Core/VideoBackends/Software/SWTexture.h b/Source/Core/VideoBackends/Software/SWTexture.h index f9d5c50485..7990600e81 100644 --- a/Source/Core/VideoBackends/Software/SWTexture.h +++ b/Source/Core/VideoBackends/Software/SWTexture.h @@ -25,9 +25,6 @@ public: const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) override; - void ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& srcrect, - const MathUtil::Rectangle& dstrect) override; void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, u32 layer, u32 level) override; void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, @@ -66,12 +63,13 @@ private: class SWFramebuffer final : public AbstractFramebuffer { public: - explicit SWFramebuffer(AbstractTextureFormat color_format, AbstractTextureFormat depth_format, + explicit SWFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples); ~SWFramebuffer() override = default; - static std::unique_ptr Create(const SWTexture* color_attachment, - const SWTexture* depth_attachment); + static std::unique_ptr Create(SWTexture* color_attachment, + SWTexture* depth_attachment); }; } // namespace SW diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp index 4a53b4041c..20c6a1f700 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp @@ -14,6 +14,7 @@ #include "VideoBackends/Software/DebugUtil.h" #include "VideoBackends/Software/NativeVertexFormat.h" #include "VideoBackends/Software/Rasterizer.h" +#include "VideoBackends/Software/SWRenderer.h" #include "VideoBackends/Software/Tev.h" #include "VideoBackends/Software/TransformUnit.h" @@ -27,48 +28,9 @@ #include "VideoCommon/VideoConfig.h" #include "VideoCommon/XFMemory.h" -class NullNativeVertexFormat : public NativeVertexFormat -{ -public: - NullNativeVertexFormat(const PortableVertexDeclaration& _vtx_decl) { vtx_decl = _vtx_decl; } -}; +SWVertexLoader::SWVertexLoader() = default; -std::unique_ptr -SWVertexLoader::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) -{ - return std::make_unique(vtx_decl); -} - -SWVertexLoader::SWVertexLoader() - : m_local_vertex_buffer(MAXVBUFFERSIZE), m_local_index_buffer(MAXIBUFFERSIZE) -{ -} - -SWVertexLoader::~SWVertexLoader() -{ -} - -void SWVertexLoader::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) -{ -} - -void SWVertexLoader::ResetBuffer(u32 vertex_stride, bool cull_all) -{ - m_cur_buffer_pointer = m_base_buffer_pointer = m_local_vertex_buffer.data(); - m_end_buffer_pointer = m_cur_buffer_pointer + m_local_vertex_buffer.size(); - IndexGenerator::Start(m_local_index_buffer.data()); -} - -void SWVertexLoader::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, - u32* out_base_vertex, u32* out_base_index) -{ - *out_base_vertex = 0; - *out_base_index = 0; -} - -void SWVertexLoader::UploadConstants() -{ -} +SWVertexLoader::~SWVertexLoader() = default; void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) { @@ -104,7 +66,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_ for (u32 i = 0; i < IndexGenerator::GetIndexLen(); i++) { - const u16 index = m_local_index_buffer[i]; + const u16 index = m_cpu_index_buffer[i]; memset(static_cast(&m_vertex), 0, sizeof(m_vertex)); // Super Mario Sunshine requires those to be zero for those debug boxes. @@ -224,8 +186,8 @@ static void ReadVertexAttribute(T* dst, DataReader src, const AttributeFormat& f void SWVertexLoader::ParseVertex(const PortableVertexDeclaration& vdec, int index) { - DataReader src(m_local_vertex_buffer.data(), - m_local_vertex_buffer.data() + m_local_vertex_buffer.size()); + DataReader src(m_cpu_vertex_buffer.data(), + m_cpu_vertex_buffer.data() + m_cpu_vertex_buffer.size()); src.Skip(index * vdec.stride); ReadVertexAttribute(&m_vertex.position[0], src, vdec.position, 0, 3, false); diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.h b/Source/Core/VideoBackends/Software/SWVertexLoader.h index dabbdef168..55316e5134 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.h +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.h @@ -20,24 +20,12 @@ public: SWVertexLoader(); ~SWVertexLoader(); - std::unique_ptr - CreateNativeVertexFormat(const PortableVertexDeclaration& vdec) override; - - void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override; - protected: - void ResetBuffer(u32 vertex_stride, bool cull_all) override; - void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, - u32* out_base_index) override; - void UploadConstants() override; void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; void SetFormat(u8 attributeIndex, u8 primitiveType); void ParseVertex(const PortableVertexDeclaration& vdec, int index); - std::vector m_local_vertex_buffer; - std::vector m_local_index_buffer; - InputVertexData m_vertex; SetupUnit m_setup_unit; diff --git a/Source/Core/VideoBackends/Software/SWmain.cpp b/Source/Core/VideoBackends/Software/SWmain.cpp index 0ab16bdfa8..c47072d4af 100644 --- a/Source/Core/VideoBackends/Software/SWmain.cpp +++ b/Source/Core/VideoBackends/Software/SWmain.cpp @@ -10,6 +10,7 @@ #include "Common/Common.h" #include "Common/CommonTypes.h" #include "Common/GL/GLContext.h" +#include "Common/MsgHandler.h" #include "VideoBackends/Software/Clipper.h" #include "VideoBackends/Software/DebugUtil.h" @@ -22,14 +23,11 @@ #include "VideoBackends/Software/TextureCache.h" #include "VideoBackends/Software/VideoBackend.h" -#include "VideoCommon/FramebufferManagerBase.h" -#include "VideoCommon/OnScreenDisplay.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" -#define VSYNC_ENABLED 0 - namespace SW { class PerfQuery : public PerfQueryBase @@ -59,6 +57,7 @@ void VideoSoftware::InitBackendInfo() { g_Config.backend_info.api_type = APIType::Nothing; g_Config.backend_info.MaxTextureSize = 16384; + g_Config.backend_info.bUsesLowerLeftOrigin = false; g_Config.backend_info.bSupports3DVision = false; g_Config.backend_info.bSupportsDualSourceBlend = true; g_Config.backend_info.bSupportsEarlyZ = true; @@ -70,6 +69,7 @@ void VideoSoftware::InitBackendInfo() g_Config.backend_info.bSupportsST3CTextures = false; g_Config.backend_info.bSupportsBPTCTextures = false; g_Config.backend_info.bSupportsCopyToVram = false; + g_Config.backend_info.bSupportsLargePoints = false; g_Config.backend_info.bSupportsFramebufferFetch = false; g_Config.backend_info.bSupportsBackgroundCompiling = false; g_Config.backend_info.bSupportsLogicOp = true; @@ -92,10 +92,22 @@ bool VideoSoftware::Initialize(const WindowSystemInfo& wsi) g_renderer = std::make_unique(std::move(window)); g_vertex_manager = std::make_unique(); + g_shader_cache = std::make_unique(); + g_framebuffer_manager = std::make_unique(); g_perf_query = std::make_unique(); g_texture_cache = std::make_unique(); - g_shader_cache = std::make_unique(); - return g_renderer->Initialize() && g_shader_cache->Initialize(); + + if (!g_vertex_manager->Initialize() || !g_shader_cache->Initialize() || + !g_renderer->Initialize() || !g_framebuffer_manager->Initialize() || + !g_texture_cache->Initialize()) + { + PanicAlert("Failed to initialize renderer classes"); + Shutdown(); + return false; + } + + g_shader_cache->InitializeShaderCache(); + return true; } void VideoSoftware::Shutdown() @@ -107,9 +119,10 @@ void VideoSoftware::Shutdown() g_renderer->Shutdown(); DebugUtil::Shutdown(); - g_framebuffer_manager.reset(); g_texture_cache.reset(); g_perf_query.reset(); + g_framebuffer_manager.reset(); + g_shader_cache.reset(); g_vertex_manager.reset(); g_renderer.reset(); ShutdownShared(); diff --git a/Source/Core/VideoBackends/Software/TextureCache.h b/Source/Core/VideoBackends/Software/TextureCache.h index 06bfbf73fa..8d2e26d508 100644 --- a/Source/Core/VideoBackends/Software/TextureCache.h +++ b/Source/Core/VideoBackends/Software/TextureCache.h @@ -9,27 +9,19 @@ namespace SW { class TextureCache : public TextureCacheBase { -public: - bool CompileShaders() override { return true; } - void DeleteShaders() override {} - void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette, - TLUTFormat format) override - { - } +protected: void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override + const EFBCopyFilterCoefficients& filter_coefficients) override { TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect, scale_by_half, y_scale, gamma); } - -private: void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override + const EFBCopyFilterCoefficients& filter_coefficients) override { // TODO: If we ever want to "fake" vram textures, we would need to implement this } diff --git a/Source/Core/VideoBackends/Vulkan/BoundingBox.cpp b/Source/Core/VideoBackends/Vulkan/BoundingBox.cpp index 169404d7ca..9e7da60249 100644 --- a/Source/Core/VideoBackends/Vulkan/BoundingBox.cpp +++ b/Source/Core/VideoBackends/Vulkan/BoundingBox.cpp @@ -13,7 +13,6 @@ #include "VideoBackends/Vulkan/Renderer.h" #include "VideoBackends/Vulkan/StagingBuffer.h" #include "VideoBackends/Vulkan/StateTracker.h" -#include "VideoBackends/Vulkan/Util.h" #include "VideoBackends/Vulkan/VulkanContext.h" namespace Vulkan @@ -33,7 +32,7 @@ BoundingBox::~BoundingBox() bool BoundingBox::Initialize() { - if (!g_vulkan_context->SupportsBoundingBox()) + if (!g_ActiveConfig.backend_info.bSupportsBBox) { WARN_LOG(VIDEO, "Vulkan: Bounding box is unsupported by your device."); return true; @@ -45,6 +44,8 @@ bool BoundingBox::Initialize() if (!CreateReadbackBuffer()) return false; + // Bind bounding box to state tracker + StateTracker::GetInstance()->SetSSBO(m_gpu_buffer, 0, BUFFER_SIZE); return true; } @@ -79,7 +80,7 @@ void BoundingBox::Flush() StateTracker::GetInstance()->EndRenderPass(); // Ensure GPU buffer is in a state where it can be transferred to. - Util::BufferMemoryBarrier( + StagingBuffer::BufferMemoryBarrier( g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, 0, BUFFER_SIZE, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); @@ -95,7 +96,7 @@ void BoundingBox::Flush() // Restore fragment shader access to the buffer. if (updated_buffer) { - Util::BufferMemoryBarrier( + StagingBuffer::BufferMemoryBarrier( g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); @@ -219,7 +220,7 @@ void BoundingBox::Readback() StateTracker::GetInstance()->EndRenderPass(); // Ensure all writes are completed to the GPU buffer prior to the transfer. - Util::BufferMemoryBarrier( + StagingBuffer::BufferMemoryBarrier( g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, 0, BUFFER_SIZE, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); @@ -233,15 +234,15 @@ void BoundingBox::Readback() m_readback_buffer->GetBuffer(), 1, ®ion); // Restore GPU buffer access. - Util::BufferMemoryBarrier(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, - VK_ACCESS_TRANSFER_READ_BIT, - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + StagingBuffer::BufferMemoryBarrier( + g_command_buffer_mgr->GetCurrentCommandBuffer(), m_gpu_buffer, VK_ACCESS_TRANSFER_READ_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, 0, BUFFER_SIZE, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); m_readback_buffer->FlushGPUCache(g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); // Wait until these commands complete. - Util::ExecuteCurrentCommandsAndRestoreState(false, true); + Renderer::GetInstance()->ExecuteCommandBuffer(false, true); // Cache is now valid. m_readback_buffer->InvalidateCPUCache(); diff --git a/Source/Core/VideoBackends/Vulkan/BoundingBox.h b/Source/Core/VideoBackends/Vulkan/BoundingBox.h index 872f49aa31..e5fda1066a 100644 --- a/Source/Core/VideoBackends/Vulkan/BoundingBox.h +++ b/Source/Core/VideoBackends/Vulkan/BoundingBox.h @@ -24,9 +24,6 @@ public: bool Initialize(); - VkBuffer GetGPUBuffer() const { return m_gpu_buffer; } - VkDeviceSize GetGPUBufferOffset() const { return 0; } - VkDeviceSize GetGPUBufferSize() const { return BUFFER_SIZE; } s32 Get(size_t index); void Set(size_t index, s32 value); diff --git a/Source/Core/VideoBackends/Vulkan/CMakeLists.txt b/Source/Core/VideoBackends/Vulkan/CMakeLists.txt index 5746cd1aaa..014a3378c8 100644 --- a/Source/Core/VideoBackends/Vulkan/CMakeLists.txt +++ b/Source/Core/VideoBackends/Vulkan/CMakeLists.txt @@ -1,21 +1,14 @@ add_library(videovulkan BoundingBox.cpp CommandBufferManager.cpp - FramebufferManager.cpp ObjectCache.cpp PerfQuery.cpp - PostProcessing.cpp Renderer.cpp - ShaderCache.cpp ShaderCompiler.cpp StateTracker.cpp StagingBuffer.cpp StreamBuffer.cpp SwapChain.cpp - Texture2D.cpp - TextureCache.cpp - TextureConverter.cpp - Util.cpp VertexFormat.cpp VertexManager.cpp VKPipeline.cpp diff --git a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp index ea07d47a53..1f2fcd01c7 100644 --- a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp +++ b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.cpp @@ -44,12 +44,16 @@ bool CommandBufferManager::Initialize() bool CommandBufferManager::CreateCommandBuffers() { + static constexpr VkSemaphoreCreateInfo semaphore_create_info = { + VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, nullptr, 0}; + VkDevice device = g_vulkan_context->GetDevice(); VkResult res; for (FrameResources& resources : m_frame_resources) { resources.init_command_buffer_used = false; + resources.semaphore_used = false; resources.needs_fence_wait = false; VkCommandPoolCreateInfo pool_info = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr, 0, @@ -83,6 +87,13 @@ bool CommandBufferManager::CreateCommandBuffers() return false; } + res = vkCreateSemaphore(device, &semaphore_create_info, nullptr, &resources.semaphore); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateSemaphore failed: "); + return false; + } + // TODO: A better way to choose the number of descriptors. VkDescriptorPoolSize pool_sizes[] = {{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 500000}, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 500000}, @@ -105,9 +116,16 @@ bool CommandBufferManager::CreateCommandBuffers() } } + res = vkCreateSemaphore(device, &semaphore_create_info, nullptr, &m_present_semaphore); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateSemaphore failed: "); + return false; + } + // Activate the first command buffer. ActivateCommandBuffer moves forward, so start with the last - m_current_frame = m_frame_resources.size() - 1; - ActivateCommandBuffer(); + m_current_frame = static_cast(m_frame_resources.size()) - 1; + BeginCommandBuffer(); return true; } @@ -122,28 +140,23 @@ void CommandBufferManager::DestroyCommandBuffers() // We destroy the command pool first, to avoid any warnings from the validation layers about // objects which are pending destruction being in-use. if (resources.command_pool != VK_NULL_HANDLE) - { vkDestroyCommandPool(device, resources.command_pool, nullptr); - resources.command_pool = VK_NULL_HANDLE; - } // Destroy any pending objects. for (auto& it : resources.cleanup_resources) it(); - resources.cleanup_resources.clear(); + + if (resources.semaphore != VK_NULL_HANDLE) + vkDestroySemaphore(device, resources.semaphore, nullptr); if (resources.fence != VK_NULL_HANDLE) - { vkDestroyFence(device, resources.fence, nullptr); - resources.fence = VK_NULL_HANDLE; - } if (resources.descriptor_pool != VK_NULL_HANDLE) - { vkDestroyDescriptorPool(device, resources.descriptor_pool, nullptr); - resources.descriptor_pool = VK_NULL_HANDLE; - } } + + vkDestroySemaphore(device, m_present_semaphore, nullptr); } VkDescriptorSet CommandBufferManager::AllocateDescriptorSet(VkDescriptorSetLayout set_layout) @@ -183,22 +196,14 @@ bool CommandBufferManager::CreateSubmitThread() m_pending_submits.pop_front(); } - SubmitCommandBuffer(submit.index, submit.wait_semaphore, submit.signal_semaphore, - submit.present_swap_chain, submit.present_image_index); + SubmitCommandBuffer(submit.command_buffer_index, submit.present_swap_chain, + submit.present_image_index); }); }); return true; } -void CommandBufferManager::PrepareToSubmitCommandBuffer() -{ - // Grab the semaphore before submitting command buffer either on-thread or off-thread. - // This prevents a race from occurring where a second command buffer is executed - // before the worker thread has woken and executed the first one yet. - m_submit_semaphore.Wait(); -} - void CommandBufferManager::WaitForWorkerThreadIdle() { // Drain the semaphore, then allow another request in the future. @@ -215,8 +220,8 @@ void CommandBufferManager::WaitForGPUIdle() void CommandBufferManager::WaitForFence(VkFence fence) { // Find the command buffer that this fence corresponds to. - size_t command_buffer_index = 0; - for (; command_buffer_index < m_frame_resources.size(); command_buffer_index++) + u32 command_buffer_index = 0; + for (; command_buffer_index < static_cast(m_frame_resources.size()); command_buffer_index++) { if (m_frame_resources[command_buffer_index].fence == fence) break; @@ -227,6 +232,9 @@ void CommandBufferManager::WaitForFence(VkFence fence) if (!m_frame_resources[command_buffer_index].needs_fence_wait) return; + // Ensure this command buffer has been submitted. + WaitForWorkerThreadIdle(); + // Wait for this command buffer to be completed. VkResult res = vkWaitForFences(g_vulkan_context->GetDevice(), 1, @@ -240,19 +248,11 @@ void CommandBufferManager::WaitForFence(VkFence fence) } void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread, - VkSemaphore wait_semaphore, - VkSemaphore signal_semaphore, VkSwapchainKHR present_swap_chain, uint32_t present_image_index) { - FrameResources& resources = m_frame_resources[m_current_frame]; - - // Fire fence tracking callbacks. This can't happen on the worker thread. - // We invoke these before submitting so that any last-minute commands can be added. - for (const auto& iter : m_fence_point_callbacks) - iter.second.first(resources.command_buffers[1], resources.fence); - // End the current command buffer. + FrameResources& resources = m_frame_resources[m_current_frame]; for (VkCommandBuffer command_buffer : resources.command_buffers) { VkResult res = vkEndCommandBuffer(command_buffer); @@ -266,14 +266,18 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread, // This command buffer now has commands, so can't be re-used without waiting. resources.needs_fence_wait = true; + // Grab the semaphore before submitting command buffer either on-thread or off-thread. + // This prevents a race from occurring where a second command buffer is executed + // before the worker thread has woken and executed the first one yet. + m_submit_semaphore.Wait(); + // Submitting off-thread? if (m_use_threaded_submission && submit_on_worker_thread) { // Push to the pending submit queue. { std::lock_guard guard(m_pending_submit_lock); - m_pending_submits.push_back({m_current_frame, wait_semaphore, signal_semaphore, - present_swap_chain, present_image_index}); + m_pending_submits.push_back({present_swap_chain, present_image_index, m_current_frame}); } // Wake up the worker thread for a single iteration. @@ -282,17 +286,18 @@ void CommandBufferManager::SubmitCommandBuffer(bool submit_on_worker_thread, else { // Pass through to normal submission path. - SubmitCommandBuffer(m_current_frame, wait_semaphore, signal_semaphore, present_swap_chain, - present_image_index); + SubmitCommandBuffer(m_current_frame, present_swap_chain, present_image_index); } + + // Switch to next cmdbuffer. + BeginCommandBuffer(); } -void CommandBufferManager::SubmitCommandBuffer(size_t index, VkSemaphore wait_semaphore, - VkSemaphore signal_semaphore, +void CommandBufferManager::SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain, - uint32_t present_image_index) + u32 present_image_index) { - FrameResources& resources = m_frame_resources[index]; + FrameResources& resources = m_frame_resources[command_buffer_index]; // This may be executed on the worker thread, so don't modify any state of the manager class. uint32_t wait_bits = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; @@ -307,22 +312,22 @@ void CommandBufferManager::SubmitCommandBuffer(size_t index, VkSemaphore wait_se nullptr}; // If the init command buffer did not have any commands recorded, don't submit it. - if (!m_frame_resources[index].init_command_buffer_used) + if (!resources.init_command_buffer_used) { submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &m_frame_resources[index].command_buffers[1]; + submit_info.pCommandBuffers = &resources.command_buffers[1]; } - if (wait_semaphore != VK_NULL_HANDLE) + if (resources.semaphore_used != VK_NULL_HANDLE) { - submit_info.pWaitSemaphores = &wait_semaphore; + submit_info.pWaitSemaphores = &resources.semaphore; submit_info.waitSemaphoreCount = 1; } - if (signal_semaphore != VK_NULL_HANDLE) + if (present_swap_chain != VK_NULL_HANDLE) { submit_info.signalSemaphoreCount = 1; - submit_info.pSignalSemaphores = &signal_semaphore; + submit_info.pSignalSemaphores = &m_present_semaphore; } VkResult res = @@ -337,11 +342,10 @@ void CommandBufferManager::SubmitCommandBuffer(size_t index, VkSemaphore wait_se if (present_swap_chain != VK_NULL_HANDLE) { // Should have a signal semaphore. - ASSERT(signal_semaphore != VK_NULL_HANDLE); VkPresentInfoKHR present_info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, nullptr, 1, - &signal_semaphore, + &m_present_semaphore, 1, &present_swap_chain, &present_image_index, @@ -361,15 +365,15 @@ void CommandBufferManager::SubmitCommandBuffer(size_t index, VkSemaphore wait_se m_submit_semaphore.Post(); } -void CommandBufferManager::OnCommandBufferExecuted(size_t index) +void CommandBufferManager::OnCommandBufferExecuted(u32 index) { FrameResources& resources = m_frame_resources[index]; // Fire fence tracking callbacks. - for (auto iter = m_fence_point_callbacks.begin(); iter != m_fence_point_callbacks.end();) + for (auto iter = m_fence_callbacks.begin(); iter != m_fence_callbacks.end();) { auto backup_iter = iter++; - backup_iter->second.second(resources.fence); + backup_iter->second(resources.fence); } // Clean up all objects pending destruction on this command buffer @@ -378,7 +382,7 @@ void CommandBufferManager::OnCommandBufferExecuted(size_t index) resources.cleanup_resources.clear(); } -void CommandBufferManager::ActivateCommandBuffer() +void CommandBufferManager::BeginCommandBuffer() { // Move to the next command buffer. m_current_frame = (m_current_frame + 1) % NUM_COMMAND_BUFFERS; @@ -422,19 +426,7 @@ void CommandBufferManager::ActivateCommandBuffer() // Reset upload command buffer state resources.init_command_buffer_used = false; -} - -void CommandBufferManager::ExecuteCommandBuffer(bool submit_off_thread, bool wait_for_completion) -{ - VkFence pending_fence = GetCurrentCommandBufferFence(); - - // If we're waiting for completion, don't bother waking the worker thread. - PrepareToSubmitCommandBuffer(); - SubmitCommandBuffer((submit_off_thread && wait_for_completion)); - ActivateCommandBuffer(); - - if (wait_for_completion) - WaitForFence(pending_fence); + resources.semaphore_used = false; } void CommandBufferManager::DeferBufferDestruction(VkBuffer object) @@ -479,20 +471,18 @@ void CommandBufferManager::DeferImageViewDestruction(VkImageView object) [object]() { vkDestroyImageView(g_vulkan_context->GetDevice(), object, nullptr); }); } -void CommandBufferManager::AddFencePointCallback( - const void* key, const CommandBufferQueuedCallback& queued_callback, - const CommandBufferExecutedCallback& executed_callback) +void CommandBufferManager::AddFenceSignaledCallback(const void* key, FenceSignaledCallback callback) { // Shouldn't be adding twice. - ASSERT(m_fence_point_callbacks.find(key) == m_fence_point_callbacks.end()); - m_fence_point_callbacks.emplace(key, std::make_pair(queued_callback, executed_callback)); + ASSERT(m_fence_callbacks.find(key) == m_fence_callbacks.end()); + m_fence_callbacks.emplace(key, std::move(callback)); } -void CommandBufferManager::RemoveFencePointCallback(const void* key) +void CommandBufferManager::RemoveFenceSignaledCallback(const void* key) { - auto iter = m_fence_point_callbacks.find(key); - ASSERT(iter != m_fence_point_callbacks.end()); - m_fence_point_callbacks.erase(iter); + auto iter = m_fence_callbacks.find(key); + ASSERT(iter != m_fence_callbacks.end()); + m_fence_callbacks.erase(iter); } std::unique_ptr g_command_buffer_mgr; diff --git a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h index 9cefe100d7..9cfc50e287 100644 --- a/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h +++ b/Source/Core/VideoBackends/Vulkan/CommandBufferManager.h @@ -22,7 +22,6 @@ #include "VideoCommon/VideoCommon.h" #include "VideoBackends/Vulkan/Constants.h" -#include "VideoBackends/Vulkan/Util.h" namespace Vulkan { @@ -55,8 +54,14 @@ public: // Gets the fence that will be signaled when the currently executing command buffer is // queued and executed. Do not wait for this fence before the buffer is executed. VkFence GetCurrentCommandBufferFence() const { return m_frame_resources[m_current_frame].fence; } - // Ensure the worker thread has submitted the previous frame's command buffer. - void PrepareToSubmitCommandBuffer(); + + // Returns the semaphore for the current command buffer, which can be used to ensure the + // swap chain image is ready before the command buffer executes. + VkSemaphore GetCurrentCommandBufferSemaphore() + { + m_frame_resources[m_current_frame].semaphore_used = true; + return m_frame_resources[m_current_frame].semaphore; + } // Ensure that the worker thread has submitted any previous command buffers and is idle. void WaitForWorkerThreadIdle(); @@ -70,17 +75,12 @@ public: void WaitForFence(VkFence fence); void SubmitCommandBuffer(bool submit_on_worker_thread, - VkSemaphore wait_semaphore = VK_NULL_HANDLE, - VkSemaphore signal_semaphore = VK_NULL_HANDLE, VkSwapchainKHR present_swap_chain = VK_NULL_HANDLE, uint32_t present_image_index = 0xFFFFFFFF); - void ActivateCommandBuffer(); - - void ExecuteCommandBuffer(bool submit_off_thread, bool wait_for_completion); - // Was the last present submitted to the queue a failure? If so, we must recreate our swapchain. bool CheckLastPresentFail() { return m_present_failed_flag.TestAndClear(); } + // Schedule a vulkan resource for destruction later on. This will occur when the command buffer // is next re-used, and the GPU has finished working with the specified resource. void DeferBufferDestruction(VkBuffer object); @@ -93,13 +93,9 @@ public: // Instruct the manager to fire the specified callback when a fence is flagged to be signaled. // This happens when command buffers are executed, and can be tested if signaled, which means // that all commands up to the point when the callback was fired have completed. - using CommandBufferQueuedCallback = std::function; - using CommandBufferExecutedCallback = std::function; - - void AddFencePointCallback(const void* key, const CommandBufferQueuedCallback& queued_callback, - const CommandBufferExecutedCallback& executed_callback); - - void RemoveFencePointCallback(const void* key); + using FenceSignaledCallback = std::function; + void AddFenceSignaledCallback(const void* key, FenceSignaledCallback callback); + void RemoveFenceSignaledCallback(const void* key); private: bool CreateCommandBuffers(); @@ -107,30 +103,32 @@ private: bool CreateSubmitThread(); - void SubmitCommandBuffer(size_t index, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, - VkSwapchainKHR present_swap_chain, uint32_t present_image_index); + void SubmitCommandBuffer(u32 command_buffer_index, VkSwapchainKHR present_swap_chain, + u32 present_image_index); + void BeginCommandBuffer(); - void OnCommandBufferExecuted(size_t index); + void OnCommandBufferExecuted(u32 index); struct FrameResources { // [0] - Init (upload) command buffer, [1] - draw command buffer - VkCommandPool command_pool; - std::array command_buffers; - VkDescriptorPool descriptor_pool; - VkFence fence; - bool init_command_buffer_used; - bool needs_fence_wait; + VkCommandPool command_pool = VK_NULL_HANDLE; + std::array command_buffers = {}; + VkDescriptorPool descriptor_pool = VK_NULL_HANDLE; + VkFence fence = VK_NULL_HANDLE; + VkSemaphore semaphore = VK_NULL_HANDLE; + bool init_command_buffer_used = false; + bool semaphore_used = false; + bool needs_fence_wait = false; std::vector> cleanup_resources; }; - std::array m_frame_resources = {}; - size_t m_current_frame; + std::array m_frame_resources; + u32 m_current_frame; // callbacks when a fence point is set - std::map> - m_fence_point_callbacks; + std::map m_fence_callbacks; // Threaded command buffer execution // Semaphore determines when a command buffer can be queued @@ -139,12 +137,11 @@ private: std::unique_ptr m_submit_loop; struct PendingCommandBufferSubmit { - size_t index; - VkSemaphore wait_semaphore; - VkSemaphore signal_semaphore; VkSwapchainKHR present_swap_chain; - uint32_t present_image_index; + u32 present_image_index; + u32 command_buffer_index; }; + VkSemaphore m_present_semaphore = VK_NULL_HANDLE; std::deque m_pending_submits; std::mutex m_pending_submit_lock; Common::Flag m_present_failed_flag; diff --git a/Source/Core/VideoBackends/Vulkan/Constants.h b/Source/Core/VideoBackends/Vulkan/Constants.h index f2af115b2a..816af86bc5 100644 --- a/Source/Core/VideoBackends/Vulkan/Constants.h +++ b/Source/Core/VideoBackends/Vulkan/Constants.h @@ -26,39 +26,29 @@ enum STAGING_BUFFER_TYPE // Descriptor set layouts enum DESCRIPTOR_SET_LAYOUT { - DESCRIPTOR_SET_LAYOUT_SINGLE_UNIFORM_BUFFER, - DESCRIPTOR_SET_LAYOUT_PER_STAGE_UNIFORM_BUFFERS, - DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS, - DESCRIPTOR_SET_LAYOUT_SHADER_STORAGE_BUFFERS, - DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS, + DESCRIPTOR_SET_LAYOUT_STANDARD_UNIFORM_BUFFERS, + DESCRIPTOR_SET_LAYOUT_STANDARD_SAMPLERS, + DESCRIPTOR_SET_LAYOUT_STANDARD_SHADER_STORAGE_BUFFERS, + DESCRIPTOR_SET_LAYOUT_UTILITY_UNIFORM_BUFFER, + DESCRIPTOR_SET_LAYOUT_UTILITY_SAMPLERS, DESCRIPTOR_SET_LAYOUT_COMPUTE, NUM_DESCRIPTOR_SET_LAYOUTS }; -// Descriptor set bind points -enum DESCRIPTOR_SET_BIND_POINT -{ - DESCRIPTOR_SET_BIND_POINT_UNIFORM_BUFFERS, - DESCRIPTOR_SET_BIND_POINT_PIXEL_SHADER_SAMPLERS, - DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER, - NUM_DESCRIPTOR_SET_BIND_POINTS -}; - // We use four pipeline layouts: // - Standard -// - Per-stage UBO (VS/GS/PS, VS constants accessible from PS) -// - 8 combined image samplers (accessible from PS) -// - 1 SSBO accessible from PS if supported -// - Push Constant -// - Same as standard, plus 128 bytes of push constants, accessible from all stages. -// - Texture Decoding -// - Same as push constant, plus a single texel buffer accessible from PS. +// - Per-stage UBO (VS/GS/PS, VS constants accessible from PS) [set=0, binding=0-2] +// - 8 combined image samplers (accessible from PS) [set=1, binding=0-7] +// - 1 SSBO accessible from PS if supported [set=2, binding=0] +// - Utility +// - 1 combined UBO, accessible from VS/GS/PS [set=0, binding=0] +// - 8 combined image samplers (accessible from PS) [set=1, binding=0-7] +// - 1 texel buffer (accessible from PS) [set=1, binding=8] // - Compute // - 1 uniform buffer [set=0, binding=0] -// - 4 combined image samplers [set=0, binding=1-4] -// - 1 texel buffer [set=0, binding=5] -// - 1 storage image [set=0, binding=6] -// - 128 bytes of push constants +// - 2 combined image samplers [set=0, binding=1-2] +// - 2 texel buffers [set=0, binding=3-4] +// - 1 storage image [set=0, binding=5] // // All four pipeline layout share the first two descriptor sets (uniform buffers, PS samplers). // The third descriptor set (see bind points above) is used for storage or texel buffers. @@ -66,8 +56,6 @@ enum DESCRIPTOR_SET_BIND_POINT enum PIPELINE_LAYOUT { PIPELINE_LAYOUT_STANDARD, - PIPELINE_LAYOUT_PUSH_CONSTANT, - PIPELINE_LAYOUT_TEXTURE_CONVERSION, PIPELINE_LAYOUT_UTILITY, PIPELINE_LAYOUT_COMPUTE, NUM_PIPELINE_LAYOUTS @@ -83,53 +71,22 @@ enum UNIFORM_BUFFER_DESCRIPTOR_SET_BINDING }; // Maximum number of attributes per vertex (we don't have any more than this?) -constexpr size_t MAX_VERTEX_ATTRIBUTES = 16; +constexpr u32 MAX_VERTEX_ATTRIBUTES = 16; // Number of pixel shader texture slots -constexpr size_t NUM_PIXEL_SHADER_SAMPLERS = 8; +constexpr u32 NUM_PIXEL_SHADER_SAMPLERS = 8; +constexpr u32 NUM_COMPUTE_SHADER_SAMPLERS = 2; -// Total number of binding points in the pipeline layout -constexpr size_t TOTAL_PIPELINE_BINDING_POINTS = - NUM_UBO_DESCRIPTOR_SET_BINDINGS + NUM_PIXEL_SHADER_SAMPLERS + 1; - -// Format of EFB textures -constexpr VkFormat EFB_COLOR_TEXTURE_FORMAT = VK_FORMAT_R8G8B8A8_UNORM; -constexpr VkFormat EFB_DEPTH_TEXTURE_FORMAT = VK_FORMAT_D32_SFLOAT; -constexpr VkFormat EFB_DEPTH_AS_COLOR_TEXTURE_FORMAT = VK_FORMAT_R32_SFLOAT; - -// Format of texturecache textures -constexpr VkFormat TEXTURECACHE_TEXTURE_FORMAT = VK_FORMAT_R8G8B8A8_UNORM; +// Number of texel buffer binding points. +constexpr u32 NUM_COMPUTE_TEXEL_BUFFERS = 2; // Textures that don't fit into this buffer will be uploaded with a separate buffer (see below). -constexpr size_t INITIAL_TEXTURE_UPLOAD_BUFFER_SIZE = 16 * 1024 * 1024; -constexpr size_t MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024; +constexpr u32 TEXTURE_UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024; // Textures greater than 1024*1024 will be put in staging textures that are released after // execution instead. A 2048x2048 texture is 16MB, and we'd only fit four of these in our // streaming buffer and be blocking frequently. Games are unlikely to have textures this // large anyway, so it's only really an issue for HD texture packs, and memory is not // a limiting factor in these scenarios anyway. -constexpr size_t STAGING_TEXTURE_UPLOAD_THRESHOLD = 1024 * 1024 * 8; - -// Streaming uniform buffer size -constexpr size_t INITIAL_UNIFORM_STREAM_BUFFER_SIZE = 16 * 1024 * 1024; -constexpr size_t MAXIMUM_UNIFORM_STREAM_BUFFER_SIZE = 32 * 1024 * 1024; - -// Texel buffer size for palette and texture decoding. -constexpr size_t TEXTURE_CONVERSION_TEXEL_BUFFER_SIZE = 8 * 1024 * 1024; - -// Push constant buffer size for utility shaders -constexpr u32 PUSH_CONSTANT_BUFFER_SIZE = 128; - -// Minimum number of draw calls per command buffer when attempting to preempt a readback operation. -constexpr u32 MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK = 10; - -// Multisampling state info that we don't expose in VideoCommon. -union MultisamplingState -{ - BitField<0, 5, u32> samples; // 1-16 - BitField<5, 1, u32> per_sample_shading; // SSAA - u32 hex; -}; - +constexpr u32 STAGING_TEXTURE_UPLOAD_THRESHOLD = 1024 * 1024 * 4; } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/FramebufferManager.cpp b/Source/Core/VideoBackends/Vulkan/FramebufferManager.cpp deleted file mode 100644 index cd1fbd9c82..0000000000 --- a/Source/Core/VideoBackends/Vulkan/FramebufferManager.cpp +++ /dev/null @@ -1,1198 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/Vulkan/FramebufferManager.h" - -#include -#include - -#include "Common/Assert.h" -#include "Common/CommonFuncs.h" -#include "Common/Logging/Log.h" -#include "Common/MsgHandler.h" - -#include "Core/HW/Memmap.h" - -#include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/StateTracker.h" -#include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoBackends/Vulkan/Texture2D.h" -#include "VideoBackends/Vulkan/Util.h" -#include "VideoBackends/Vulkan/VKTexture.h" -#include "VideoBackends/Vulkan/VertexFormat.h" -#include "VideoBackends/Vulkan/VulkanContext.h" - -#include "VideoCommon/RenderBase.h" -#include "VideoCommon/VideoConfig.h" - -namespace Vulkan -{ -// Maximum number of pixels poked in one batch * 6 -constexpr size_t MAX_POKE_VERTICES = 8192; -constexpr size_t POKE_VERTEX_BUFFER_SIZE = 8 * 1024 * 1024; - -FramebufferManager::FramebufferManager() -{ -} - -FramebufferManager::~FramebufferManager() -{ - DestroyEFBFramebuffer(); - - DestroyConversionShaders(); - - DestroyReadbackFramebuffer(); - DestroyReadbackTextures(); - DestroyReadbackShaders(); - - DestroyPokeVertexBuffer(); - DestroyPokeShaders(); -} - -FramebufferManager* FramebufferManager::GetInstance() -{ - return static_cast(g_framebuffer_manager.get()); -} - -u32 FramebufferManager::GetEFBWidth() const -{ - return m_efb_color_texture->GetWidth(); -} - -u32 FramebufferManager::GetEFBHeight() const -{ - return m_efb_color_texture->GetHeight(); -} - -u32 FramebufferManager::GetEFBLayers() const -{ - return m_efb_color_texture->GetLayers(); -} - -VkSampleCountFlagBits FramebufferManager::GetEFBSamples() const -{ - return m_efb_color_texture->GetSamples(); -} - -MultisamplingState FramebufferManager::GetEFBMultisamplingState() const -{ - MultisamplingState ms = {}; - ms.per_sample_shading = g_ActiveConfig.MultisamplingEnabled() && g_ActiveConfig.bSSAA; - ms.samples = static_cast(GetEFBSamples()); - return ms; -} - -bool FramebufferManager::Initialize() -{ - if (!CreateEFBRenderPasses()) - { - PanicAlert("Failed to create EFB render pass"); - return false; - } - if (!CreateEFBFramebuffer()) - { - PanicAlert("Failed to create EFB textures"); - return false; - } - - if (!CompileConversionShaders()) - { - PanicAlert("Failed to compile EFB shaders"); - return false; - } - - if (!CreateReadbackRenderPasses()) - { - PanicAlert("Failed to create readback render passes"); - return false; - } - if (!CompileReadbackShaders()) - { - PanicAlert("Failed to compile readback shaders"); - return false; - } - if (!CreateReadbackTextures()) - { - PanicAlert("Failed to create readback textures"); - return false; - } - if (!CreateReadbackFramebuffer()) - { - PanicAlert("Failed to create readback framebuffer"); - return false; - } - - CreatePokeVertexFormat(); - if (!CreatePokeVertexBuffer()) - { - PanicAlert("Failed to create poke vertex buffer"); - return false; - } - - if (!CompilePokeShaders()) - { - PanicAlert("Failed to compile poke shaders"); - return false; - } - - return true; -} - -bool FramebufferManager::CreateEFBRenderPasses() -{ - m_efb_load_render_pass = g_object_cache->GetRenderPass( - EFB_COLOR_TEXTURE_FORMAT, Util::GetVkFormatForHostTextureFormat(GetEFBDepthFormat()), - g_ActiveConfig.iMultisamples, VK_ATTACHMENT_LOAD_OP_LOAD); - m_efb_clear_render_pass = g_object_cache->GetRenderPass( - EFB_COLOR_TEXTURE_FORMAT, Util::GetVkFormatForHostTextureFormat(GetEFBDepthFormat()), - g_ActiveConfig.iMultisamples, VK_ATTACHMENT_LOAD_OP_CLEAR); - m_depth_resolve_render_pass = g_object_cache->GetRenderPass( - EFB_DEPTH_AS_COLOR_TEXTURE_FORMAT, VK_FORMAT_UNDEFINED, 1, VK_ATTACHMENT_LOAD_OP_DONT_CARE); - return m_efb_load_render_pass != VK_NULL_HANDLE && m_efb_clear_render_pass != VK_NULL_HANDLE && - m_depth_resolve_render_pass != VK_NULL_HANDLE; -} - -bool FramebufferManager::CreateEFBFramebuffer() -{ - u32 efb_width = static_cast(std::max(g_renderer->GetTargetWidth(), 1)); - u32 efb_height = static_cast(std::max(g_renderer->GetTargetHeight(), 1)); - u32 efb_layers = (g_ActiveConfig.stereo_mode != StereoMode::Off) ? 2 : 1; - VkSampleCountFlagBits efb_samples = - static_cast(g_ActiveConfig.iMultisamples); - INFO_LOG(VIDEO, "EFB size: %ux%ux%u", efb_width, efb_height, efb_layers); - - // Update the static variable in the base class. Why does this even exist? - FramebufferManagerBase::m_EFBLayers = efb_layers; - - // Allocate EFB render targets - m_efb_color_texture = - Texture2D::Create(efb_width, efb_height, 1, efb_layers, EFB_COLOR_TEXTURE_FORMAT, efb_samples, - VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); - - // We need a second texture to swap with for changing pixel formats - m_efb_convert_color_texture = - Texture2D::Create(efb_width, efb_height, 1, efb_layers, EFB_COLOR_TEXTURE_FORMAT, efb_samples, - VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); - - m_efb_depth_texture = Texture2D::Create( - efb_width, efb_height, 1, efb_layers, - Util::GetVkFormatForHostTextureFormat(GetEFBDepthFormat()), efb_samples, - VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); - - if (!m_efb_color_texture || !m_efb_convert_color_texture || !m_efb_depth_texture) - return false; - - // Create resolved textures if MSAA is on - if (g_ActiveConfig.MultisamplingEnabled()) - { - m_efb_resolve_color_texture = Texture2D::Create( - efb_width, efb_height, 1, efb_layers, EFB_COLOR_TEXTURE_FORMAT, VK_SAMPLE_COUNT_1_BIT, - VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT); - - m_efb_resolve_depth_texture = Texture2D::Create( - efb_width, efb_height, 1, efb_layers, EFB_DEPTH_AS_COLOR_TEXTURE_FORMAT, - VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT); - - if (!m_efb_resolve_color_texture || !m_efb_resolve_depth_texture) - return false; - - VkImageView attachment = m_efb_resolve_depth_texture->GetView(); - VkFramebufferCreateInfo framebuffer_info = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - nullptr, - 0, - m_depth_resolve_render_pass, - 1, - &attachment, - efb_width, - efb_height, - efb_layers}; - - VkResult res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, - &m_depth_resolve_framebuffer); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); - return false; - } - } - - VkImageView framebuffer_attachments[] = { - m_efb_color_texture->GetView(), - m_efb_depth_texture->GetView(), - }; - - VkFramebufferCreateInfo framebuffer_info = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - nullptr, - 0, - m_efb_load_render_pass, - static_cast(ArraySize(framebuffer_attachments)), - framebuffer_attachments, - efb_width, - efb_height, - efb_layers}; - - VkResult res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, - &m_efb_framebuffer); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); - return false; - } - - // Create second framebuffer for format conversions - framebuffer_attachments[0] = m_efb_convert_color_texture->GetView(); - res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, - &m_efb_convert_framebuffer); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); - return false; - } - - // Transition to state that can be used to clear - m_efb_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - m_efb_convert_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - m_efb_depth_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - // Clear the contents of the buffers. - static const VkClearColorValue clear_color = {{0.0f, 0.0f, 0.0f, 0.0f}}; - static const VkClearDepthStencilValue clear_depth = {0.0f, 0}; - VkImageSubresourceRange clear_color_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, efb_layers}; - VkImageSubresourceRange clear_depth_range = {VK_IMAGE_ASPECT_DEPTH_BIT, 0, 1, 0, efb_layers}; - vkCmdClearColorImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), - m_efb_color_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - &clear_color, 1, &clear_color_range); - vkCmdClearColorImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), - m_efb_convert_color_texture->GetImage(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, &clear_color_range); - vkCmdClearDepthStencilImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), - m_efb_depth_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - &clear_depth, 1, &clear_depth_range); - - // Transition to color attachment state ready for rendering. - m_efb_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - m_efb_depth_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); - - return true; -} - -void FramebufferManager::DestroyEFBFramebuffer() -{ - if (m_efb_framebuffer != VK_NULL_HANDLE) - { - vkDestroyFramebuffer(g_vulkan_context->GetDevice(), m_efb_framebuffer, nullptr); - m_efb_framebuffer = VK_NULL_HANDLE; - } - - if (m_efb_convert_framebuffer != VK_NULL_HANDLE) - { - vkDestroyFramebuffer(g_vulkan_context->GetDevice(), m_efb_convert_framebuffer, nullptr); - m_efb_convert_framebuffer = VK_NULL_HANDLE; - } - - if (m_depth_resolve_framebuffer != VK_NULL_HANDLE) - { - vkDestroyFramebuffer(g_vulkan_context->GetDevice(), m_depth_resolve_framebuffer, nullptr); - m_depth_resolve_framebuffer = VK_NULL_HANDLE; - } - - m_efb_color_texture.reset(); - m_efb_convert_color_texture.reset(); - m_efb_depth_texture.reset(); - m_efb_resolve_color_texture.reset(); - m_efb_resolve_depth_texture.reset(); -} - -void FramebufferManager::RecreateEFBFramebuffer() -{ - DestroyEFBFramebuffer(); - - if (!CreateEFBRenderPasses()) - PanicAlert("Failed to create EFB render pass"); - - if (!CreateEFBFramebuffer()) - PanicAlert("Failed to create EFB textures"); -} - -void FramebufferManager::RecompileShaders() -{ - DestroyConversionShaders(); - - if (!CompileConversionShaders()) - PanicAlert("Failed to compile EFB shaders"); - - DestroyReadbackShaders(); - if (!CompileReadbackShaders()) - PanicAlert("Failed to compile readback shaders"); -} - -void FramebufferManager::ReinterpretPixelData(int convtype) -{ - VkShaderModule pixel_shader = VK_NULL_HANDLE; - if (convtype == 0) - { - pixel_shader = m_ps_rgb8_to_rgba6; - } - else if (convtype == 2) - { - pixel_shader = m_ps_rgba6_to_rgb8; - } - else - { - ERROR_LOG(VIDEO, "Unhandled reinterpret pixel data %d", convtype); - return; - } - - // Transition EFB color buffer to shader resource, and the convert buffer to color attachment. - m_efb_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_efb_convert_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD), - m_efb_load_render_pass, g_shader_cache->GetScreenQuadVertexShader(), - g_shader_cache->GetScreenQuadGeometryShader(), pixel_shader); - - VkRect2D region = {{0, 0}, {GetEFBWidth(), GetEFBHeight()}}; - draw.SetMultisamplingState(GetEFBMultisamplingState()); - draw.BeginRenderPass(m_efb_convert_framebuffer, region); - draw.SetPSSampler(0, m_efb_color_texture->GetView(), g_object_cache->GetPointSampler()); - draw.SetViewportAndScissor(0, 0, GetEFBWidth(), GetEFBHeight()); - draw.DrawWithoutVertexBuffer(4); - draw.EndRenderPass(); - - // Swap EFB texture pointers - std::swap(m_efb_color_texture, m_efb_convert_color_texture); - std::swap(m_efb_framebuffer, m_efb_convert_framebuffer); -} - -Texture2D* FramebufferManager::ResolveEFBColorTexture(const VkRect2D& region) -{ - // Return the normal EFB texture if multisampling is off. - if (GetEFBSamples() == VK_SAMPLE_COUNT_1_BIT) - return m_efb_color_texture.get(); - - // Can't resolve within a render pass. - StateTracker::GetInstance()->EndRenderPass(); - - // It's not valid to resolve out-of-bounds coordinates. - // Ensuring the region is within the image is the caller's responsibility. - ASSERT(region.offset.x >= 0 && region.offset.y >= 0 && - (static_cast(region.offset.x) + region.extent.width) <= GetEFBWidth() && - (static_cast(region.offset.y) + region.extent.height) <= GetEFBHeight()); - - // Resolving is considered to be a transfer operation. - m_efb_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - m_efb_resolve_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - // Resolve to our already-created texture. - VkImageResolve resolve = { - {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, GetEFBLayers()}, // VkImageSubresourceLayers srcSubresource - {region.offset.x, region.offset.y, 0}, // VkOffset3D srcOffset - {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, GetEFBLayers()}, // VkImageSubresourceLayers dstSubresource - {region.offset.x, region.offset.y, 0}, // VkOffset3D dstOffset - {region.extent.width, region.extent.height, GetEFBLayers()} // VkExtent3D extent - }; - vkCmdResolveImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), - m_efb_color_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - m_efb_resolve_color_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - 1, &resolve); - - // Restore MSAA texture ready for rendering again - m_efb_color_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - return m_efb_resolve_color_texture.get(); -} - -Texture2D* FramebufferManager::ResolveEFBDepthTexture(const VkRect2D& region) -{ - // Return the normal EFB texture if multisampling is off. - if (GetEFBSamples() == VK_SAMPLE_COUNT_1_BIT) - return m_efb_depth_texture.get(); - - // Can't resolve within a render pass. - StateTracker::GetInstance()->EndRenderPass(); - - m_efb_depth_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_efb_resolve_depth_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - // Draw using resolve shader to write the minimum depth of all samples to the resolve texture. - UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD), - m_depth_resolve_render_pass, g_shader_cache->GetScreenQuadVertexShader(), - g_shader_cache->GetScreenQuadGeometryShader(), m_ps_depth_resolve); - draw.BeginRenderPass(m_depth_resolve_framebuffer, region); - draw.SetPSSampler(0, m_efb_depth_texture->GetView(), g_object_cache->GetPointSampler()); - draw.SetViewportAndScissor(region.offset.x, region.offset.y, region.extent.width, - region.extent.height); - draw.DrawWithoutVertexBuffer(4); - draw.EndRenderPass(); - - // Restore MSAA texture ready for rendering again - m_efb_depth_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); - - return m_efb_resolve_depth_texture.get(); -} - -bool FramebufferManager::CompileConversionShaders() -{ - static const char RGB8_TO_RGBA6_SHADER_SOURCE[] = R"( - #if MSAA_ENABLED - SAMPLER_BINDING(0) uniform sampler2DMSArray samp0; - #else - SAMPLER_BINDING(0) uniform sampler2DArray samp0; - #endif - layout(location = 0) in vec3 uv0; - layout(location = 0) out vec4 ocol0; - - void main() - { - int layer = 0; - #if EFB_LAYERS > 1 - layer = int(uv0.z); - #endif - - ivec3 coords = ivec3(gl_FragCoord.xy, layer); - - vec4 val; - #if !MSAA_ENABLED - // No MSAA - just load the first (and only) sample - val = texelFetch(samp0, coords, 0); - #elif SSAA_ENABLED - // Sample shading, shader runs once per sample - val = texelFetch(samp0, coords, gl_SampleID); - #else - // MSAA without sample shading, average out all samples. - val = vec4(0, 0, 0, 0); - for (int i = 0; i < MSAA_SAMPLES; i++) - val += texelFetch(samp0, coords, i); - val /= float(MSAA_SAMPLES); - #endif - - ivec4 src8 = ivec4(round(val * 255.f)); - ivec4 dst6; - dst6.r = src8.r >> 2; - dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4); - dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6); - dst6.a = src8.b & 0x3F; - - ocol0 = float4(dst6) / 63.f; - } - )"; - - static const char RGBA6_TO_RGB8_SHADER_SOURCE[] = R"( - #if MSAA_ENABLED - SAMPLER_BINDING(0) uniform sampler2DMSArray samp0; - #else - SAMPLER_BINDING(0) uniform sampler2DArray samp0; - #endif - layout(location = 0) in vec3 uv0; - layout(location = 0) out vec4 ocol0; - - void main() - { - int layer = 0; - #if EFB_LAYERS > 1 - layer = int(uv0.z); - #endif - - ivec3 coords = ivec3(gl_FragCoord.xy, layer); - - vec4 val; - #if !MSAA_ENABLED - // No MSAA - just load the first (and only) sample - val = texelFetch(samp0, coords, 0); - #elif SSAA_ENABLED - // Sample shading, shader runs once per sample - val = texelFetch(samp0, coords, gl_SampleID); - #else - // MSAA without sample shading, average out all samples. - val = vec4(0, 0, 0, 0); - for (int i = 0; i < MSAA_SAMPLES; i++) - val += texelFetch(samp0, coords, i); - val /= float(MSAA_SAMPLES); - #endif - - ivec4 src6 = ivec4(round(val * 63.f)); - ivec4 dst8; - dst8.r = (src6.r << 2) | (src6.g >> 4); - dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2); - dst8.b = ((src6.b & 0x3) << 6) | src6.a; - dst8.a = 255; - - ocol0 = float4(dst8) / 255.f; - } - )"; - - static const char DEPTH_RESOLVE_SHADER_SOURCE[] = R"( - SAMPLER_BINDING(0) uniform sampler2DMSArray samp0; - layout(location = 0) in vec3 uv0; - layout(location = 0) out float ocol0; - - void main() - { - int layer = 0; - #if EFB_LAYERS > 1 - layer = int(uv0.z); - #endif - - // gl_FragCoord is in window coordinates, and we're rendering to - // the same rectangle in the resolve texture. - ivec3 coords = ivec3(gl_FragCoord.xy, layer); - - // Take the minimum of all depth samples. - ocol0 = texelFetch(samp0, coords, 0).r; - for (int i = 1; i < MSAA_SAMPLES; i++) - ocol0 = min(ocol0, texelFetch(samp0, coords, i).r); - } - )"; - - std::string header = g_shader_cache->GetUtilityShaderHeader(); - DestroyConversionShaders(); - - m_ps_rgb8_to_rgba6 = Util::CompileAndCreateFragmentShader(header + RGB8_TO_RGBA6_SHADER_SOURCE); - m_ps_rgba6_to_rgb8 = Util::CompileAndCreateFragmentShader(header + RGBA6_TO_RGB8_SHADER_SOURCE); - if (GetEFBSamples() != VK_SAMPLE_COUNT_1_BIT) - m_ps_depth_resolve = Util::CompileAndCreateFragmentShader(header + DEPTH_RESOLVE_SHADER_SOURCE); - - return (m_ps_rgba6_to_rgb8 != VK_NULL_HANDLE && m_ps_rgb8_to_rgba6 != VK_NULL_HANDLE && - (GetEFBSamples() == VK_SAMPLE_COUNT_1_BIT || m_ps_depth_resolve != VK_NULL_HANDLE)); -} - -void FramebufferManager::DestroyConversionShaders() -{ - auto DestroyShader = [this](VkShaderModule& shader) { - if (shader != VK_NULL_HANDLE) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), shader, nullptr); - shader = VK_NULL_HANDLE; - } - }; - - DestroyShader(m_ps_rgb8_to_rgba6); - DestroyShader(m_ps_rgba6_to_rgb8); - DestroyShader(m_ps_depth_resolve); -} - -u32 FramebufferManager::PeekEFBColor(u32 x, u32 y) -{ - if (!m_color_readback_texture_valid && !PopulateColorReadbackTexture()) - return 0; - - u32 value; - m_color_readback_texture->ReadTexel(x, y, &value); - return value; -} - -bool FramebufferManager::PopulateColorReadbackTexture() -{ - // Can't be in our normal render pass. - StateTracker::GetInstance()->EndRenderPass(); - StateTracker::GetInstance()->OnCPUEFBAccess(); - - // Issue a copy from framebuffer -> copy texture if we have >1xIR or MSAA on. - VkRect2D src_region = {{0, 0}, {GetEFBWidth(), GetEFBHeight()}}; - Texture2D* src_texture = m_efb_color_texture.get(); - if (GetEFBSamples() > 1) - src_texture = ResolveEFBColorTexture(src_region); - - if (GetEFBWidth() != EFB_WIDTH || GetEFBHeight() != EFB_HEIGHT) - { - // Transition EFB to shader read before drawing. - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_color_copy_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD), - m_copy_color_render_pass, g_shader_cache->GetScreenQuadVertexShader(), - VK_NULL_HANDLE, m_copy_color_shader); - - VkRect2D rect = {{0, 0}, {EFB_WIDTH, EFB_HEIGHT}}; - draw.BeginRenderPass(m_color_copy_framebuffer, rect); - draw.SetPSSampler(0, src_texture->GetView(), g_object_cache->GetPointSampler()); - draw.SetViewportAndScissor(0, 0, EFB_WIDTH, EFB_HEIGHT); - draw.DrawWithoutVertexBuffer(4); - draw.EndRenderPass(); - - // Restore EFB to color attachment, since we're done with it. - if (src_texture == m_efb_color_texture.get()) - { - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - } - - // Use this as a source texture now. - src_texture = m_color_copy_texture.get(); - } - - // Copy from EFB or copy texture to staging texture. - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - static_cast(m_color_readback_texture.get()) - ->CopyFromTexture(src_texture, m_color_readback_texture->GetConfig().GetRect(), 0, 0, - m_color_readback_texture->GetConfig().GetRect()); - - // Restore original layout if we used the EFB as a source. - if (src_texture == m_efb_color_texture.get()) - { - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - } - - // Wait until the copy is complete. - m_color_readback_texture->Flush(); - m_color_readback_texture_valid = true; - return true; -} - -float FramebufferManager::PeekEFBDepth(u32 x, u32 y) -{ - if (!m_depth_readback_texture_valid && !PopulateDepthReadbackTexture()) - return 0.0f; - - float value; - m_depth_readback_texture->ReadTexel(x, y, &value); - return value; -} - -bool FramebufferManager::PopulateDepthReadbackTexture() -{ - // Can't be in our normal render pass. - StateTracker::GetInstance()->EndRenderPass(); - StateTracker::GetInstance()->OnCPUEFBAccess(); - - // Issue a copy from framebuffer -> copy texture if we have >1xIR or MSAA on. - VkRect2D src_region = {{0, 0}, {GetEFBWidth(), GetEFBHeight()}}; - Texture2D* src_texture = m_efb_depth_texture.get(); - if (GetEFBSamples() > 1) - { - // EFB depth resolves are written out as color textures - src_texture = ResolveEFBDepthTexture(src_region); - } - if (GetEFBWidth() != EFB_WIDTH || GetEFBHeight() != EFB_HEIGHT) - { - // Transition EFB to shader read before drawing. - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_depth_copy_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD), - m_copy_depth_render_pass, g_shader_cache->GetScreenQuadVertexShader(), - VK_NULL_HANDLE, m_copy_depth_shader); - - VkRect2D rect = {{0, 0}, {EFB_WIDTH, EFB_HEIGHT}}; - draw.BeginRenderPass(m_depth_copy_framebuffer, rect); - draw.SetPSSampler(0, src_texture->GetView(), g_object_cache->GetPointSampler()); - draw.SetViewportAndScissor(0, 0, EFB_WIDTH, EFB_HEIGHT); - draw.DrawWithoutVertexBuffer(4); - draw.EndRenderPass(); - - // Restore EFB to depth attachment, since we're done with it. - if (src_texture == m_efb_depth_texture.get()) - { - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); - } - - // Use this as a source texture now. - src_texture = m_depth_copy_texture.get(); - } - - // Copy from EFB or copy texture to staging texture. - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - static_cast(m_depth_readback_texture.get()) - ->CopyFromTexture(src_texture, m_depth_readback_texture->GetConfig().GetRect(), 0, 0, - m_depth_readback_texture->GetConfig().GetRect()); - - // Restore original layout if we used the EFB as a source. - if (src_texture == m_efb_depth_texture.get()) - { - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); - } - - // Wait until the copy is complete. - m_depth_readback_texture->Flush(); - m_depth_readback_texture_valid = true; - return true; -} - -void FramebufferManager::InvalidatePeekCache() -{ - m_color_readback_texture_valid = false; - m_depth_readback_texture_valid = false; -} - -bool FramebufferManager::CreateReadbackRenderPasses() -{ - m_copy_color_render_pass = g_object_cache->GetRenderPass( - EFB_COLOR_TEXTURE_FORMAT, VK_FORMAT_UNDEFINED, 1, VK_ATTACHMENT_LOAD_OP_DONT_CARE); - m_copy_depth_render_pass = g_object_cache->GetRenderPass( - EFB_DEPTH_AS_COLOR_TEXTURE_FORMAT, VK_FORMAT_UNDEFINED, 1, VK_ATTACHMENT_LOAD_OP_DONT_CARE); - if (m_copy_color_render_pass == VK_NULL_HANDLE || m_copy_depth_render_pass == VK_NULL_HANDLE) - return false; - - // Some devices don't support point sizes >1 (e.g. Adreno). - // If we can't use a point size above our maximum IR, use triangles instead. - // This means a 6x increase in the size of the vertices, though. - if (!g_vulkan_context->GetDeviceFeatures().largePoints || - g_vulkan_context->GetDeviceLimits().pointSizeGranularity > 1 || - g_vulkan_context->GetDeviceLimits().pointSizeRange[0] > 1 || - g_vulkan_context->GetDeviceLimits().pointSizeRange[1] < 16) - { - m_poke_primitive = PrimitiveType::TriangleStrip; - } - else - { - // Points should be okay. - m_poke_primitive = PrimitiveType::Points; - } - - return true; -} - -bool FramebufferManager::CompileReadbackShaders() -{ - std::string source; - - // TODO: Use input attachment here instead? - // TODO: MSAA resolve in shader. - static const char COPY_COLOR_SHADER_SOURCE[] = R"( - SAMPLER_BINDING(0) uniform sampler2DArray samp0; - layout(location = 0) in vec3 uv0; - layout(location = 0) out vec4 ocol0; - void main() - { - ocol0 = texture(samp0, uv0); - } - )"; - - static const char COPY_DEPTH_SHADER_SOURCE[] = R"( - SAMPLER_BINDING(0) uniform sampler2DArray samp0; - layout(location = 0) in vec3 uv0; - layout(location = 0) out float ocol0; - void main() - { - ocol0 = texture(samp0, uv0).r; - } - )"; - - source = g_shader_cache->GetUtilityShaderHeader() + COPY_COLOR_SHADER_SOURCE; - m_copy_color_shader = Util::CompileAndCreateFragmentShader(source); - - source = g_shader_cache->GetUtilityShaderHeader() + COPY_DEPTH_SHADER_SOURCE; - m_copy_depth_shader = Util::CompileAndCreateFragmentShader(source); - - return m_copy_color_shader != VK_NULL_HANDLE && m_copy_depth_shader != VK_NULL_HANDLE; -} - -void FramebufferManager::DestroyReadbackShaders() -{ - if (m_copy_color_shader != VK_NULL_HANDLE) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_copy_color_shader, nullptr); - m_copy_color_shader = VK_NULL_HANDLE; - } - if (m_copy_depth_shader != VK_NULL_HANDLE) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_copy_depth_shader, nullptr); - m_copy_depth_shader = VK_NULL_HANDLE; - } -} - -bool FramebufferManager::CreateReadbackTextures() -{ - m_color_copy_texture = - Texture2D::Create(EFB_WIDTH, EFB_HEIGHT, 1, 1, EFB_COLOR_TEXTURE_FORMAT, - VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - - m_depth_copy_texture = - Texture2D::Create(EFB_WIDTH, EFB_HEIGHT, 1, 1, EFB_DEPTH_AS_COLOR_TEXTURE_FORMAT, - VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - - if (!m_color_copy_texture || !m_depth_copy_texture) - { - ERROR_LOG(VIDEO, "Failed to create EFB copy textures"); - return false; - } - - TextureConfig readback_texture_config(EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, - AbstractTextureFormat::RGBA8, false); - m_color_readback_texture = - g_renderer->CreateStagingTexture(StagingTextureType::Mutable, readback_texture_config); - m_depth_readback_texture = - g_renderer->CreateStagingTexture(StagingTextureType::Mutable, readback_texture_config); - if (!m_color_readback_texture || !m_depth_readback_texture) - { - ERROR_LOG(VIDEO, "Failed to create EFB readback textures"); - return false; - } - - return true; -} - -void FramebufferManager::DestroyReadbackTextures() -{ - m_color_copy_texture.reset(); - m_color_readback_texture.reset(); - m_color_readback_texture_valid = false; - m_depth_copy_texture.reset(); - m_depth_readback_texture.reset(); - m_depth_readback_texture_valid = false; -} - -bool FramebufferManager::CreateReadbackFramebuffer() -{ - VkImageView framebuffer_attachment = m_color_copy_texture->GetView(); - VkFramebufferCreateInfo framebuffer_info = { - VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkFramebufferCreateFlags flags - m_copy_color_render_pass, // VkRenderPass renderPass - 1, // uint32_t attachmentCount - &framebuffer_attachment, // const VkImageView* pAttachments - EFB_WIDTH, // uint32_t width - EFB_HEIGHT, // uint32_t height - 1 // uint32_t layers - }; - VkResult res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, - &m_color_copy_framebuffer); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); - return false; - } - - // Swap for depth - framebuffer_info.renderPass = m_copy_depth_render_pass; - framebuffer_attachment = m_depth_copy_texture->GetView(); - res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, - &m_depth_copy_framebuffer); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); - return false; - } - - return true; -} - -void FramebufferManager::DestroyReadbackFramebuffer() -{ - if (m_color_copy_framebuffer != VK_NULL_HANDLE) - { - vkDestroyFramebuffer(g_vulkan_context->GetDevice(), m_color_copy_framebuffer, nullptr); - m_color_copy_framebuffer = VK_NULL_HANDLE; - } - if (m_depth_copy_framebuffer != VK_NULL_HANDLE) - { - vkDestroyFramebuffer(g_vulkan_context->GetDevice(), m_depth_copy_framebuffer, nullptr); - m_depth_copy_framebuffer = VK_NULL_HANDLE; - } -} - -void FramebufferManager::PokeEFBColor(u32 x, u32 y, u32 color) -{ - // Flush if we exceeded the number of vertices per batch. - if ((m_color_poke_vertices.size() + 6) > MAX_POKE_VERTICES) - FlushEFBPokes(); - - CreatePokeVertices(&m_color_poke_vertices, x, y, 0.0f, color); - - // Update the peek cache if it's valid, since we know the color of the pixel now. - if (m_color_readback_texture_valid) - m_color_readback_texture->WriteTexel(x, y, &color); -} - -void FramebufferManager::PokeEFBDepth(u32 x, u32 y, float depth) -{ - // Flush if we exceeded the number of vertices per batch. - if ((m_color_poke_vertices.size() + 6) > MAX_POKE_VERTICES) - FlushEFBPokes(); - - CreatePokeVertices(&m_depth_poke_vertices, x, y, depth, 0); - - // Update the peek cache if it's valid, since we know the color of the pixel now. - if (m_depth_readback_texture_valid) - m_depth_readback_texture->WriteTexel(x, y, &depth); -} - -void FramebufferManager::CreatePokeVertices(std::vector* destination_list, u32 x, - u32 y, float z, u32 color) -{ - if (m_poke_primitive == PrimitiveType::Points) - { - // GPU will expand the point to a quad. - float cs_x = float(x) * 2.0f / EFB_WIDTH - 1.0f; - float cs_y = float(y) * 2.0f / EFB_HEIGHT - 1.0f; - float point_size = GetEFBWidth() / static_cast(EFB_WIDTH); - destination_list->push_back({{cs_x, cs_y, z, point_size}, color}); - return; - } - - // Some devices don't support point sizes >1 (e.g. Adreno). - // Generate quad from the single point (clip-space coordinates). - float x1 = float(x) * 2.0f / EFB_WIDTH - 1.0f; - float y1 = float(y) * 2.0f / EFB_HEIGHT - 1.0f; - float x2 = float(x + 1) * 2.0f / EFB_WIDTH - 1.0f; - float y2 = float(y + 1) * 2.0f / EFB_HEIGHT - 1.0f; - destination_list->push_back({{x1, y1, z, 1.0f}, color}); - destination_list->push_back({{x2, y1, z, 1.0f}, color}); - destination_list->push_back({{x1, y2, z, 1.0f}, color}); - destination_list->push_back({{x1, y2, z, 1.0f}, color}); - destination_list->push_back({{x2, y1, z, 1.0f}, color}); - destination_list->push_back({{x2, y2, z, 1.0f}, color}); -} - -void FramebufferManager::FlushEFBPokes() -{ - if (!m_color_poke_vertices.empty()) - { - DrawPokeVertices(m_color_poke_vertices.data(), m_color_poke_vertices.size(), true, false); - m_color_poke_vertices.clear(); - } - - if (!m_depth_poke_vertices.empty()) - { - DrawPokeVertices(m_depth_poke_vertices.data(), m_depth_poke_vertices.size(), false, true); - m_depth_poke_vertices.clear(); - } -} - -void FramebufferManager::DrawPokeVertices(const EFBPokeVertex* vertices, size_t vertex_count, - bool write_color, bool write_depth) -{ - // Relatively simple since we don't have any bindings. - VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); - - // We don't use the utility shader in order to keep the vertices compact. - PipelineInfo pipeline_info = {}; - pipeline_info.vertex_format = m_poke_vertex_format.get(); - pipeline_info.pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD); - pipeline_info.vs = m_poke_vertex_shader; - pipeline_info.gs = (GetEFBLayers() > 1) ? m_poke_geometry_shader : VK_NULL_HANDLE; - pipeline_info.ps = m_poke_fragment_shader; - pipeline_info.render_pass = m_efb_load_render_pass; - pipeline_info.rasterization_state.hex = RenderState::GetNoCullRasterizationState().hex; - pipeline_info.rasterization_state.primitive = m_poke_primitive; - pipeline_info.multisampling_state.hex = GetEFBMultisamplingState().hex; - pipeline_info.depth_state.hex = RenderState::GetNoDepthTestingDepthStencilState().hex; - pipeline_info.blend_state.hex = RenderState::GetNoBlendingBlendState().hex; - pipeline_info.blend_state.colorupdate = write_color; - pipeline_info.blend_state.alphaupdate = write_color; - if (write_depth) - { - pipeline_info.depth_state.testenable = true; - pipeline_info.depth_state.updateenable = true; - pipeline_info.depth_state.func = ZMode::ALWAYS; - } - - VkPipeline pipeline = g_shader_cache->GetPipeline(pipeline_info); - if (pipeline == VK_NULL_HANDLE) - { - PanicAlert("Failed to get pipeline for EFB poke draw"); - return; - } - - // Populate vertex buffer. - size_t vertices_size = sizeof(EFBPokeVertex) * m_color_poke_vertices.size(); - if (!m_poke_vertex_stream_buffer->ReserveMemory(vertices_size, sizeof(EfbPokeData), true, true, - false)) - { - // Kick a command buffer first. - WARN_LOG(VIDEO, "Kicking command buffer due to no EFB poke space."); - Util::ExecuteCurrentCommandsAndRestoreState(false); - command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); - - if (!m_poke_vertex_stream_buffer->ReserveMemory(vertices_size, sizeof(EfbPokeData), true, true, - false)) - { - PanicAlert("Failed to get space for EFB poke vertices"); - return; - } - } - VkBuffer vb_buffer = m_poke_vertex_stream_buffer->GetBuffer(); - VkDeviceSize vb_offset = m_poke_vertex_stream_buffer->GetCurrentOffset(); - memcpy(m_poke_vertex_stream_buffer->GetCurrentHostPointer(), vertices, vertices_size); - m_poke_vertex_stream_buffer->CommitMemory(vertices_size); - - // Set up state. - StateTracker::GetInstance()->EndClearRenderPass(); - StateTracker::GetInstance()->BeginRenderPass(); - StateTracker::GetInstance()->SetPendingRebind(); - Util::SetViewportAndScissor(command_buffer, 0, 0, GetEFBWidth(), GetEFBHeight()); - vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - vkCmdBindVertexBuffers(command_buffer, 0, 1, &vb_buffer, &vb_offset); - vkCmdDraw(command_buffer, static_cast(vertex_count), 1, 0, 0); -} - -void FramebufferManager::CreatePokeVertexFormat() -{ - PortableVertexDeclaration vtx_decl = {}; - vtx_decl.position.enable = true; - vtx_decl.position.type = VAR_FLOAT; - vtx_decl.position.components = 4; - vtx_decl.position.integer = false; - vtx_decl.position.offset = offsetof(EFBPokeVertex, position); - vtx_decl.colors[0].enable = true; - vtx_decl.colors[0].type = VAR_UNSIGNED_BYTE; - vtx_decl.colors[0].components = 4; - vtx_decl.colors[0].integer = false; - vtx_decl.colors[0].offset = offsetof(EFBPokeVertex, color); - vtx_decl.stride = sizeof(EFBPokeVertex); - - m_poke_vertex_format = std::make_unique(vtx_decl); -} - -bool FramebufferManager::CreatePokeVertexBuffer() -{ - m_poke_vertex_stream_buffer = StreamBuffer::Create( - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, POKE_VERTEX_BUFFER_SIZE, POKE_VERTEX_BUFFER_SIZE); - if (!m_poke_vertex_stream_buffer) - { - ERROR_LOG(VIDEO, "Failed to create EFB poke vertex buffer"); - return false; - } - - return true; -} - -void FramebufferManager::DestroyPokeVertexBuffer() -{ - m_poke_vertex_stream_buffer.reset(); -} - -bool FramebufferManager::CompilePokeShaders() -{ - static const char POKE_VERTEX_SHADER_SOURCE[] = R"( - layout(location = 0) in vec4 ipos; - layout(location = 5) in vec4 icol0; - - layout(location = 0) out vec4 col0; - - void main() - { - gl_Position = vec4(ipos.xyz, 1.0f); - #if USE_POINT_SIZE - gl_PointSize = ipos.w; - #endif - col0 = icol0; - } - - )"; - - static const char POKE_GEOMETRY_SHADER_SOURCE[] = R"( - layout(triangles) in; - layout(triangle_strip, max_vertices = EFB_LAYERS * 3) out; - - VARYING_LOCATION(0) in VertexData - { - vec4 col0; - } in_data[]; - - VARYING_LOCATION(0) out VertexData - { - vec4 col0; - } out_data; - - void main() - { - for (int j = 0; j < EFB_LAYERS; j++) - { - for (int i = 0; i < 3; i++) - { - gl_Layer = j; - gl_Position = gl_in[i].gl_Position; - out_data.col0 = in_data[i].col0; - EmitVertex(); - } - EndPrimitive(); - } - } - )"; - - static const char POKE_PIXEL_SHADER_SOURCE[] = R"( - layout(location = 0) in vec4 col0; - layout(location = 0) out vec4 ocol0; - void main() - { - ocol0 = col0; - } - )"; - - std::string source = g_shader_cache->GetUtilityShaderHeader(); - if (m_poke_primitive == PrimitiveType::Points) - source += "#define USE_POINT_SIZE 1\n"; - source += POKE_VERTEX_SHADER_SOURCE; - m_poke_vertex_shader = Util::CompileAndCreateVertexShader(source); - if (m_poke_vertex_shader == VK_NULL_HANDLE) - return false; - - if (g_vulkan_context->SupportsGeometryShaders()) - { - source = g_shader_cache->GetUtilityShaderHeader() + POKE_GEOMETRY_SHADER_SOURCE; - m_poke_geometry_shader = Util::CompileAndCreateGeometryShader(source); - if (m_poke_geometry_shader == VK_NULL_HANDLE) - return false; - } - - source = g_shader_cache->GetUtilityShaderHeader() + POKE_PIXEL_SHADER_SOURCE; - m_poke_fragment_shader = Util::CompileAndCreateFragmentShader(source); - if (m_poke_fragment_shader == VK_NULL_HANDLE) - return false; - - return true; -} - -void FramebufferManager::DestroyPokeShaders() -{ - if (m_poke_vertex_shader != VK_NULL_HANDLE) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_poke_vertex_shader, nullptr); - m_poke_vertex_shader = VK_NULL_HANDLE; - } - if (m_poke_geometry_shader != VK_NULL_HANDLE) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_poke_geometry_shader, nullptr); - m_poke_geometry_shader = VK_NULL_HANDLE; - } - if (m_poke_fragment_shader != VK_NULL_HANDLE) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_poke_fragment_shader, nullptr); - m_poke_vertex_shader = VK_NULL_HANDLE; - } -} - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/FramebufferManager.h b/Source/Core/VideoBackends/Vulkan/FramebufferManager.h deleted file mode 100644 index f037564e1c..0000000000 --- a/Source/Core/VideoBackends/Vulkan/FramebufferManager.h +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "Common/CommonTypes.h" -#include "VideoBackends/Vulkan/Constants.h" -#include "VideoBackends/Vulkan/TextureCache.h" -#include "VideoCommon/FramebufferManagerBase.h" -#include "VideoCommon/RenderState.h" - -class AbstractStagingTexture; - -namespace Vulkan -{ -class StateTracker; -class StreamBuffer; -class Texture2D; -class VertexFormat; -class VKTexture; -class XFBSource; - -class FramebufferManager : public FramebufferManagerBase -{ -public: - FramebufferManager(); - ~FramebufferManager(); - - static FramebufferManager* GetInstance(); - - bool Initialize(); - - VkRenderPass GetEFBLoadRenderPass() const { return m_efb_load_render_pass; } - VkRenderPass GetEFBClearRenderPass() const { return m_efb_clear_render_pass; } - Texture2D* GetEFBColorTexture() const { return m_efb_color_texture.get(); } - Texture2D* GetEFBDepthTexture() const { return m_efb_depth_texture.get(); } - VkFramebuffer GetEFBFramebuffer() const { return m_efb_framebuffer; } - u32 GetEFBWidth() const; - u32 GetEFBHeight() const; - u32 GetEFBLayers() const; - VkSampleCountFlagBits GetEFBSamples() const; - MultisamplingState GetEFBMultisamplingState() const; - - void RecreateEFBFramebuffer(); - - // Recompile shaders, use when MSAA mode changes. - void RecompileShaders(); - - // Reinterpret pixel format of EFB color texture. - // Assumes no render pass is currently in progress. - // Swaps EFB framebuffers, so re-bind afterwards. - void ReinterpretPixelData(int convtype); - - // This render pass can be used for other readback operations. - VkRenderPass GetColorCopyForReadbackRenderPass() const { return m_copy_color_render_pass; } - // Resolve color/depth textures to a non-msaa texture, and return it. - Texture2D* ResolveEFBColorTexture(const VkRect2D& region); - Texture2D* ResolveEFBDepthTexture(const VkRect2D& region); - - // Returns the texture that the EFB color texture is resolved to when multisampling is enabled. - // Ensure ResolveEFBColorTexture is called before this method. - Texture2D* GetResolvedEFBColorTexture() const { return m_efb_resolve_color_texture.get(); } - // Reads a framebuffer value back from the GPU. This may block if the cache is not current. - u32 PeekEFBColor(u32 x, u32 y); - float PeekEFBDepth(u32 x, u32 y); - void InvalidatePeekCache(); - - // Writes a value to the framebuffer. This will never block, and writes will be batched. - void PokeEFBColor(u32 x, u32 y, u32 color); - void PokeEFBDepth(u32 x, u32 y, float depth); - void FlushEFBPokes(); - -private: - struct EFBPokeVertex - { - float position[4]; - u32 color; - }; - - bool CreateEFBRenderPasses(); - bool CreateEFBFramebuffer(); - void DestroyEFBFramebuffer(); - - bool CompileConversionShaders(); - void DestroyConversionShaders(); - - bool CreateReadbackRenderPasses(); - bool CompileReadbackShaders(); - void DestroyReadbackShaders(); - bool CreateReadbackTextures(); - void DestroyReadbackTextures(); - bool CreateReadbackFramebuffer(); - void DestroyReadbackFramebuffer(); - - void CreatePokeVertexFormat(); - bool CreatePokeVertexBuffer(); - void DestroyPokeVertexBuffer(); - bool CompilePokeShaders(); - void DestroyPokeShaders(); - - bool PopulateColorReadbackTexture(); - bool PopulateDepthReadbackTexture(); - - void CreatePokeVertices(std::vector* destination_list, u32 x, u32 y, float z, - u32 color); - - void DrawPokeVertices(const EFBPokeVertex* vertices, size_t vertex_count, bool write_color, - bool write_depth); - - VkRenderPass m_efb_load_render_pass = VK_NULL_HANDLE; - VkRenderPass m_efb_clear_render_pass = VK_NULL_HANDLE; - VkRenderPass m_depth_resolve_render_pass = VK_NULL_HANDLE; - - std::unique_ptr m_efb_color_texture; - std::unique_ptr m_efb_convert_color_texture; - std::unique_ptr m_efb_depth_texture; - std::unique_ptr m_efb_resolve_color_texture; - std::unique_ptr m_efb_resolve_depth_texture; - VkFramebuffer m_efb_framebuffer = VK_NULL_HANDLE; - VkFramebuffer m_efb_convert_framebuffer = VK_NULL_HANDLE; - VkFramebuffer m_depth_resolve_framebuffer = VK_NULL_HANDLE; - - // Format conversion shaders - VkShaderModule m_ps_rgb8_to_rgba6 = VK_NULL_HANDLE; - VkShaderModule m_ps_rgba6_to_rgb8 = VK_NULL_HANDLE; - VkShaderModule m_ps_depth_resolve = VK_NULL_HANDLE; - - // EFB readback texture - std::unique_ptr m_color_copy_texture; - std::unique_ptr m_depth_copy_texture; - VkFramebuffer m_color_copy_framebuffer = VK_NULL_HANDLE; - VkFramebuffer m_depth_copy_framebuffer = VK_NULL_HANDLE; - - // CPU-side EFB readback texture - std::unique_ptr m_color_readback_texture; - std::unique_ptr m_depth_readback_texture; - bool m_color_readback_texture_valid = false; - bool m_depth_readback_texture_valid = false; - - // EFB poke drawing setup - std::unique_ptr m_poke_vertex_format; - std::unique_ptr m_poke_vertex_stream_buffer; - std::vector m_color_poke_vertices; - std::vector m_depth_poke_vertices; - PrimitiveType m_poke_primitive = PrimitiveType::TriangleStrip; - - VkRenderPass m_copy_color_render_pass = VK_NULL_HANDLE; - VkRenderPass m_copy_depth_render_pass = VK_NULL_HANDLE; - VkShaderModule m_copy_color_shader = VK_NULL_HANDLE; - VkShaderModule m_copy_depth_shader = VK_NULL_HANDLE; - - VkShaderModule m_poke_vertex_shader = VK_NULL_HANDLE; - VkShaderModule m_poke_geometry_shader = VK_NULL_HANDLE; - VkShaderModule m_poke_fragment_shader = VK_NULL_HANDLE; -}; - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp index 1de68b8349..59690118aa 100644 --- a/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp +++ b/Source/Core/VideoBackends/Vulkan/ObjectCache.cpp @@ -19,7 +19,7 @@ #include "VideoBackends/Vulkan/CommandBufferManager.h" #include "VideoBackends/Vulkan/ShaderCompiler.h" #include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/VKTexture.h" #include "VideoBackends/Vulkan/VertexFormat.h" #include "VideoBackends/Vulkan/VulkanContext.h" #include "VideoCommon/Statistics.h" @@ -28,16 +28,16 @@ namespace Vulkan { std::unique_ptr g_object_cache; -ObjectCache::ObjectCache() -{ -} +ObjectCache::ObjectCache() = default; ObjectCache::~ObjectCache() { + DestroyPipelineCache(); DestroySamplers(); DestroyPipelineLayouts(); DestroyDescriptorSetLayouts(); DestroyRenderPassCache(); + m_dummy_texture.reset(); } bool ObjectCache::Initialize() @@ -48,44 +48,37 @@ bool ObjectCache::Initialize() if (!CreatePipelineLayouts()) return false; - if (!CreateUtilityShaderVertexFormat()) - return false; - if (!CreateStaticSamplers()) return false; m_texture_upload_buffer = - StreamBuffer::Create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, INITIAL_TEXTURE_UPLOAD_BUFFER_SIZE, - MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE); + StreamBuffer::Create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, TEXTURE_UPLOAD_BUFFER_SIZE); if (!m_texture_upload_buffer) { PanicAlert("Failed to create texture upload buffer"); return false; } - m_utility_shader_vertex_buffer = - StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 1024 * 1024, 4 * 1024 * 1024); - m_utility_shader_uniform_buffer = - StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 1024, 4 * 1024 * 1024); - if (!m_utility_shader_vertex_buffer || !m_utility_shader_uniform_buffer) - return false; - - m_dummy_texture = Texture2D::Create(1, 1, 1, 1, VK_FORMAT_R8G8B8A8_UNORM, VK_SAMPLE_COUNT_1_BIT, - VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); - m_dummy_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - VkClearColorValue clear_color = {}; - VkImageSubresourceRange clear_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - vkCmdClearColorImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - m_dummy_texture->GetImage(), m_dummy_texture->GetLayout(), &clear_color, 1, - &clear_range); - m_dummy_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + if (g_ActiveConfig.bShaderCache) + { + if (!LoadPipelineCache()) + return false; + } + else + { + if (!CreatePipelineCache()) + return false; + } return true; } +void ObjectCache::Shutdown() +{ + if (g_ActiveConfig.bShaderCache && m_pipeline_cache != VK_NULL_HANDLE) + SavePipelineCache(); +} + void ObjectCache::ClearSamplerCache() { for (const auto& it : m_sampler_cache) @@ -115,13 +108,9 @@ void ObjectCache::DestroySamplers() bool ObjectCache::CreateDescriptorSetLayouts() { - static const VkDescriptorSetLayoutBinding single_ubo_set_bindings[] = { - 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, - VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_FRAGMENT_BIT}; - // The geometry shader buffer must be last in this binding set, as we don't include it // if geometry shaders are not supported by the device. See the decrement below. - static const VkDescriptorSetLayoutBinding per_stage_ubo_set_bindings[] = { + static const VkDescriptorSetLayoutBinding standard_ubo_bindings[] = { {UBO_DESCRIPTOR_SET_BINDING_PS, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, {UBO_DESCRIPTOR_SET_BINDING_VS, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, @@ -129,45 +118,56 @@ bool ObjectCache::CreateDescriptorSetLayouts() {UBO_DESCRIPTOR_SET_BINDING_GS, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_GEOMETRY_BIT}}; - static const VkDescriptorSetLayoutBinding sampler_set_bindings[] = { + static const VkDescriptorSetLayoutBinding standard_sampler_bindings[] = { {0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, static_cast(NUM_PIXEL_SHADER_SAMPLERS), VK_SHADER_STAGE_FRAGMENT_BIT}}; - static const VkDescriptorSetLayoutBinding ssbo_set_bindings[] = { + static const VkDescriptorSetLayoutBinding standard_ssbo_bindings[] = { {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}}; - static const VkDescriptorSetLayoutBinding texel_buffer_set_bindings[] = { - {0, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + static const VkDescriptorSetLayoutBinding utility_ubo_bindings[] = { + 0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_FRAGMENT_BIT}; + + // Utility samplers aren't dynamically indexed. + static const VkDescriptorSetLayoutBinding utility_sampler_bindings[] = { + {0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {3, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {4, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {5, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {6, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {7, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, + {8, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, }; static const VkDescriptorSetLayoutBinding compute_set_bindings[] = { {0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_COMPUTE_BIT}, {1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, {2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, - {3, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, - {4, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, - {5, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, - {6, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, - {7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT}, + {3, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, + {4, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, + {5, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT}, }; VkDescriptorSetLayoutCreateInfo create_infos[NUM_DESCRIPTOR_SET_LAYOUTS] = { {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, - static_cast(ArraySize(single_ubo_set_bindings)), single_ubo_set_bindings}, + static_cast(ArraySize(standard_ubo_bindings)), standard_ubo_bindings}, {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, - static_cast(ArraySize(per_stage_ubo_set_bindings)), per_stage_ubo_set_bindings}, + static_cast(ArraySize(standard_sampler_bindings)), standard_sampler_bindings}, {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, - static_cast(ArraySize(sampler_set_bindings)), sampler_set_bindings}, + static_cast(ArraySize(standard_ssbo_bindings)), standard_ssbo_bindings}, {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, - static_cast(ArraySize(ssbo_set_bindings)), ssbo_set_bindings}, + static_cast(ArraySize(utility_ubo_bindings)), utility_ubo_bindings}, {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, - static_cast(ArraySize(texel_buffer_set_bindings)), texel_buffer_set_bindings}, + static_cast(ArraySize(utility_sampler_bindings)), utility_sampler_bindings}, {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, static_cast(ArraySize(compute_set_bindings)), compute_set_bindings}}; // Don't set the GS bit if geometry shaders aren't available. - if (!g_vulkan_context->SupportsGeometryShaders()) - create_infos[DESCRIPTOR_SET_LAYOUT_PER_STAGE_UNIFORM_BUFFERS].bindingCount--; + if (!g_ActiveConfig.backend_info.bSupportsGeometryShaders) + create_infos[DESCRIPTOR_SET_LAYOUT_STANDARD_UNIFORM_BUFFERS].bindingCount--; for (size_t i = 0; i < NUM_DESCRIPTOR_SET_LAYOUTS; i++) { @@ -199,22 +199,15 @@ bool ObjectCache::CreatePipelineLayouts() // Descriptor sets for each pipeline layout. // In the standard set, the SSBO must be the last descriptor, as we do not include it // when fragment stores and atomics are not supported by the device. - VkDescriptorSetLayout standard_sets[] = { - m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PER_STAGE_UNIFORM_BUFFERS], - m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS], - m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_SHADER_STORAGE_BUFFERS]}; - VkDescriptorSetLayout texture_conversion_sets[] = { - m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PER_STAGE_UNIFORM_BUFFERS], - m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS], - m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS]}; - VkDescriptorSetLayout utility_sets[] = { - m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_SINGLE_UNIFORM_BUFFER], - m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS]}; - VkDescriptorSetLayout compute_sets[] = {m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_COMPUTE]}; - VkPushConstantRange push_constant_range = { - VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, PUSH_CONSTANT_BUFFER_SIZE}; - VkPushConstantRange compute_push_constant_range = {VK_SHADER_STAGE_COMPUTE_BIT, 0, - PUSH_CONSTANT_BUFFER_SIZE}; + const VkDescriptorSetLayout standard_sets[] = { + m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_STANDARD_UNIFORM_BUFFERS], + m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_STANDARD_SAMPLERS], + m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_STANDARD_SHADER_STORAGE_BUFFERS]}; + const VkDescriptorSetLayout utility_sets[] = { + m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_UTILITY_UNIFORM_BUFFER], + m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_UTILITY_SAMPLERS]}; + const VkDescriptorSetLayout compute_sets[] = { + m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_COMPUTE]}; // Info for each pipeline layout VkPipelineLayoutCreateInfo pipeline_layout_info[NUM_PIPELINE_LAYOUTS] = { @@ -222,25 +215,16 @@ bool ObjectCache::CreatePipelineLayouts() {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0, static_cast(ArraySize(standard_sets)), standard_sets, 0, nullptr}, - // Push Constant - {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0, - static_cast(ArraySize(standard_sets)), standard_sets, 1, &push_constant_range}, - - // Texture Conversion - {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0, - static_cast(ArraySize(texture_conversion_sets)), texture_conversion_sets, 1, - &push_constant_range}, - - // Texture Conversion + // Utility {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0, static_cast(ArraySize(utility_sets)), utility_sets, 0, nullptr}, // Compute {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0, - static_cast(ArraySize(compute_sets)), compute_sets, 1, &compute_push_constant_range}}; + static_cast(ArraySize(compute_sets)), compute_sets, 0, nullptr}}; // If bounding box is unsupported, don't bother with the SSBO descriptor set. - if (!g_vulkan_context->SupportsBoundingBox()) + if (!g_ActiveConfig.backend_info.bSupportsBBox) pipeline_layout_info[PIPELINE_LAYOUT_STANDARD].setLayoutCount--; for (size_t i = 0; i < NUM_PIPELINE_LAYOUTS; i++) @@ -265,30 +249,6 @@ void ObjectCache::DestroyPipelineLayouts() } } -bool ObjectCache::CreateUtilityShaderVertexFormat() -{ - PortableVertexDeclaration vtx_decl = {}; - vtx_decl.position.enable = true; - vtx_decl.position.type = VAR_FLOAT; - vtx_decl.position.components = 4; - vtx_decl.position.integer = false; - vtx_decl.position.offset = offsetof(UtilityShaderVertex, Position); - vtx_decl.texcoords[0].enable = true; - vtx_decl.texcoords[0].type = VAR_FLOAT; - vtx_decl.texcoords[0].components = 4; - vtx_decl.texcoords[0].integer = false; - vtx_decl.texcoords[0].offset = offsetof(UtilityShaderVertex, TexCoord); - vtx_decl.colors[0].enable = true; - vtx_decl.colors[0].type = VAR_UNSIGNED_BYTE; - vtx_decl.colors[0].components = 4; - vtx_decl.colors[0].integer = false; - vtx_decl.colors[0].offset = offsetof(UtilityShaderVertex, Color); - vtx_decl.stride = sizeof(UtilityShaderVertex); - - m_utility_shader_vertex_format = std::make_unique(vtx_decl); - return true; -} - bool ObjectCache::CreateStaticSamplers() { VkSamplerCreateInfo create_info = { @@ -472,4 +432,199 @@ void ObjectCache::DestroyRenderPassCache() vkDestroyRenderPass(g_vulkan_context->GetDevice(), it.second, nullptr); m_render_pass_cache.clear(); } + +class PipelineCacheReadCallback : public LinearDiskCacheReader +{ +public: + PipelineCacheReadCallback(std::vector* data) : m_data(data) {} + void Read(const u32& key, const u8* value, u32 value_size) override + { + m_data->resize(value_size); + if (value_size > 0) + memcpy(m_data->data(), value, value_size); + } + +private: + std::vector* m_data; +}; + +class PipelineCacheReadIgnoreCallback : public LinearDiskCacheReader +{ +public: + void Read(const u32& key, const u8* value, u32 value_size) override {} +}; + +bool ObjectCache::CreatePipelineCache() +{ + // Vulkan pipeline caches can be shared between games for shader compile time reduction. + // This assumes that drivers don't create all pipelines in the cache on load time, only + // when a lookup occurs that matches a pipeline (or pipeline data) in the cache. + m_pipeline_cache_filename = GetDiskShaderCacheFileName(APIType::Vulkan, "Pipeline", false, true); + + VkPipelineCacheCreateInfo info = { + VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineCacheCreateFlags flags + 0, // size_t initialDataSize + nullptr // const void* pInitialData + }; + + VkResult res = + vkCreatePipelineCache(g_vulkan_context->GetDevice(), &info, nullptr, &m_pipeline_cache); + if (res == VK_SUCCESS) + return true; + + LOG_VULKAN_ERROR(res, "vkCreatePipelineCache failed: "); + return false; } + +bool ObjectCache::LoadPipelineCache() +{ + // We have to keep the pipeline cache file name around since when we save it + // we delete the old one, by which time the game's unique ID is already cleared. + m_pipeline_cache_filename = GetDiskShaderCacheFileName(APIType::Vulkan, "Pipeline", false, true); + + std::vector disk_data; + LinearDiskCache disk_cache; + PipelineCacheReadCallback read_callback(&disk_data); + if (disk_cache.OpenAndRead(m_pipeline_cache_filename, read_callback) != 1) + disk_data.clear(); + + if (!disk_data.empty() && !ValidatePipelineCache(disk_data.data(), disk_data.size())) + { + // Don't use this data. In fact, we should delete it to prevent it from being used next time. + File::Delete(m_pipeline_cache_filename); + return CreatePipelineCache(); + } + + VkPipelineCacheCreateInfo info = { + VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineCacheCreateFlags flags + disk_data.size(), // size_t initialDataSize + disk_data.data() // const void* pInitialData + }; + + VkResult res = + vkCreatePipelineCache(g_vulkan_context->GetDevice(), &info, nullptr, &m_pipeline_cache); + if (res == VK_SUCCESS) + return true; + + // Failed to create pipeline cache, try with it empty. + LOG_VULKAN_ERROR(res, "vkCreatePipelineCache failed, trying empty cache: "); + return CreatePipelineCache(); +} + +// Based on Vulkan 1.0 specification, +// Table 9.1. Layout for pipeline cache header version VK_PIPELINE_CACHE_HEADER_VERSION_ONE +// NOTE: This data is assumed to be in little-endian format. +#pragma pack(push, 4) +struct VK_PIPELINE_CACHE_HEADER +{ + u32 header_length; + u32 header_version; + u32 vendor_id; + u32 device_id; + u8 uuid[VK_UUID_SIZE]; +}; +#pragma pack(pop) +static_assert(std::is_trivially_copyable::value, + "VK_PIPELINE_CACHE_HEADER must be trivially copyable"); + +bool ObjectCache::ValidatePipelineCache(const u8* data, size_t data_length) +{ + if (data_length < sizeof(VK_PIPELINE_CACHE_HEADER)) + { + ERROR_LOG(VIDEO, "Pipeline cache failed validation: Invalid header"); + return false; + } + + VK_PIPELINE_CACHE_HEADER header; + std::memcpy(&header, data, sizeof(header)); + if (header.header_length < sizeof(VK_PIPELINE_CACHE_HEADER)) + { + ERROR_LOG(VIDEO, "Pipeline cache failed validation: Invalid header length"); + return false; + } + + if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) + { + ERROR_LOG(VIDEO, "Pipeline cache failed validation: Invalid header version"); + return false; + } + + if (header.vendor_id != g_vulkan_context->GetDeviceProperties().vendorID) + { + ERROR_LOG(VIDEO, + "Pipeline cache failed validation: Incorrect vendor ID (file: 0x%X, device: 0x%X)", + header.vendor_id, g_vulkan_context->GetDeviceProperties().vendorID); + return false; + } + + if (header.device_id != g_vulkan_context->GetDeviceProperties().deviceID) + { + ERROR_LOG(VIDEO, + "Pipeline cache failed validation: Incorrect device ID (file: 0x%X, device: 0x%X)", + header.device_id, g_vulkan_context->GetDeviceProperties().deviceID); + return false; + } + + if (std::memcmp(header.uuid, g_vulkan_context->GetDeviceProperties().pipelineCacheUUID, + VK_UUID_SIZE) != 0) + { + ERROR_LOG(VIDEO, "Pipeline cache failed validation: Incorrect UUID"); + return false; + } + + return true; +} + +void ObjectCache::DestroyPipelineCache() +{ + vkDestroyPipelineCache(g_vulkan_context->GetDevice(), m_pipeline_cache, nullptr); + m_pipeline_cache = VK_NULL_HANDLE; +} + +void ObjectCache::SavePipelineCache() +{ + size_t data_size; + VkResult res = + vkGetPipelineCacheData(g_vulkan_context->GetDevice(), m_pipeline_cache, &data_size, nullptr); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkGetPipelineCacheData failed: "); + return; + } + + std::vector data(data_size); + res = vkGetPipelineCacheData(g_vulkan_context->GetDevice(), m_pipeline_cache, &data_size, + data.data()); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkGetPipelineCacheData failed: "); + return; + } + + // Delete the old cache and re-create. + File::Delete(m_pipeline_cache_filename); + + // We write a single key of 1, with the entire pipeline cache data. + // Not ideal, but our disk cache class does not support just writing a single blob + // of data without specifying a key. + LinearDiskCache disk_cache; + PipelineCacheReadIgnoreCallback callback; + disk_cache.OpenAndRead(m_pipeline_cache_filename, callback); + disk_cache.Append(1, data.data(), static_cast(data.size())); + disk_cache.Close(); +} + +void ObjectCache::ReloadPipelineCache() +{ + SavePipelineCache(); + + if (g_ActiveConfig.bShaderCache) + LoadPipelineCache(); + else + CreatePipelineCache(); +} +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/ObjectCache.h b/Source/Core/VideoBackends/Vulkan/ObjectCache.h index c3502f7c66..794c52c0f0 100644 --- a/Source/Core/VideoBackends/Vulkan/ObjectCache.h +++ b/Source/Core/VideoBackends/Vulkan/ObjectCache.h @@ -16,7 +16,6 @@ #include "Common/LinearDiskCache.h" #include "VideoBackends/Vulkan/Constants.h" -#include "VideoBackends/Vulkan/Texture2D.h" #include "VideoCommon/GeometryShaderGen.h" #include "VideoCommon/PixelShaderGen.h" @@ -27,6 +26,7 @@ namespace Vulkan { class CommandBufferManager; class VertexFormat; +class VKTexture; class StreamBuffer; class ObjectCache @@ -35,29 +35,23 @@ public: ObjectCache(); ~ObjectCache(); + // Perform at startup, create descriptor layouts, compiles all static shaders. + bool Initialize(); + void Shutdown(); + // Descriptor set layout accessor. Used for allocating descriptor sets. VkDescriptorSetLayout GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT layout) const { return m_descriptor_set_layouts[layout]; } + // Pipeline layout accessor. Used to fill in required field in PipelineInfo. VkPipelineLayout GetPipelineLayout(PIPELINE_LAYOUT layout) const { return m_pipeline_layouts[layout]; } - // Shared utility shader resources - VertexFormat* GetUtilityShaderVertexFormat() const - { - return m_utility_shader_vertex_format.get(); - } - StreamBuffer* GetUtilityShaderVertexBuffer() const - { - return m_utility_shader_vertex_buffer.get(); - } - StreamBuffer* GetUtilityShaderUniformBuffer() const - { - return m_utility_shader_uniform_buffer.get(); - } + + // Staging buffer for textures. StreamBuffer* GetTextureUploadBuffer() const { return m_texture_upload_buffer.get(); } // Static samplers @@ -65,36 +59,39 @@ public: VkSampler GetLinearSampler() const { return m_linear_sampler; } VkSampler GetSampler(const SamplerState& info); - // Dummy image for samplers that are unbound - Texture2D* GetDummyImage() const { return m_dummy_texture.get(); } - VkImageView GetDummyImageView() const { return m_dummy_texture->GetView(); } // Render pass cache. VkRenderPass GetRenderPass(VkFormat color_format, VkFormat depth_format, u32 multisamples, VkAttachmentLoadOp load_op); - // Perform at startup, create descriptor layouts, compiles all static shaders. - bool Initialize(); + // Pipeline cache. Used when creating pipelines for drivers to store compiled programs. + VkPipelineCache GetPipelineCache() const { return m_pipeline_cache; } // Clear sampler cache, use when anisotropy mode changes // WARNING: Ensure none of the objects from here are in use when calling void ClearSamplerCache(); + // Saves the pipeline cache to disk. Call when shutting down. + void SavePipelineCache(); + + // Reload pipeline cache. Call when host config changes. + void ReloadPipelineCache(); + private: bool CreateDescriptorSetLayouts(); void DestroyDescriptorSetLayouts(); bool CreatePipelineLayouts(); void DestroyPipelineLayouts(); - bool CreateUtilityShaderVertexFormat(); bool CreateStaticSamplers(); void DestroySamplers(); void DestroyRenderPassCache(); + bool CreatePipelineCache(); + bool LoadPipelineCache(); + bool ValidatePipelineCache(const u8* data, size_t data_length); + void DestroyPipelineCache(); std::array m_descriptor_set_layouts = {}; std::array m_pipeline_layouts = {}; - std::unique_ptr m_utility_shader_vertex_format; - std::unique_ptr m_utility_shader_vertex_buffer; - std::unique_ptr m_utility_shader_uniform_buffer; std::unique_ptr m_texture_upload_buffer; VkSampler m_point_sampler = VK_NULL_HANDLE; @@ -103,11 +100,15 @@ private: std::map m_sampler_cache; // Dummy image for samplers that are unbound - std::unique_ptr m_dummy_texture; + std::unique_ptr m_dummy_texture; // Render pass cache using RenderPassCacheKey = std::tuple; std::map m_render_pass_cache; + + // pipeline cache + VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE; + std::string m_pipeline_cache_filename; }; extern std::unique_ptr g_object_cache; diff --git a/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp b/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp index 25334e3eb4..3a84f2e571 100644 --- a/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp +++ b/Source/Core/VideoBackends/Vulkan/PerfQuery.cpp @@ -13,20 +13,18 @@ #include "Common/MsgHandler.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/Renderer.h" #include "VideoBackends/Vulkan/StagingBuffer.h" #include "VideoBackends/Vulkan/StateTracker.h" -#include "VideoBackends/Vulkan/Util.h" #include "VideoBackends/Vulkan/VulkanContext.h" namespace Vulkan { -PerfQuery::PerfQuery() -{ -} +PerfQuery::PerfQuery() = default; PerfQuery::~PerfQuery() { - g_command_buffer_mgr->RemoveFencePointCallback(this); + g_command_buffer_mgr->RemoveFenceSignaledCallback(this); if (m_query_pool != VK_NULL_HANDLE) vkDestroyQueryPool(g_vulkan_context->GetDevice(), m_query_pool, nullptr); @@ -51,11 +49,8 @@ bool PerfQuery::Initialize() return false; } - g_command_buffer_mgr->AddFencePointCallback( - this, - std::bind(&PerfQuery::OnCommandBufferQueued, this, std::placeholders::_1, - std::placeholders::_2), - std::bind(&PerfQuery::OnCommandBufferExecuted, this, std::placeholders::_1)); + g_command_buffer_mgr->AddFenceSignaledCallback( + this, std::bind(&PerfQuery::OnFenceSignaled, this, std::placeholders::_1)); return true; } @@ -92,9 +87,6 @@ void PerfQuery::EnableQuery(PerfQueryGroup type) // TODO: Is this needed? StateTracker::GetInstance()->BeginRenderPass(); vkCmdBeginQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, index, flags); - - // Prevent background command buffer submission while the query is active. - StateTracker::GetInstance()->SetBackgroundCommandBufferExecution(false); } } @@ -105,8 +97,6 @@ void PerfQuery::DisableQuery(PerfQueryGroup type) // DisableQuery should be called for each EnableQuery, so subtract one to get the previous one. u32 index = (m_query_read_pos + m_query_count - 1) % PERF_QUERY_BUFFER_SIZE; vkCmdEndQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, index); - StateTracker::GetInstance()->SetBackgroundCommandBufferExecution(true); - DEBUG_LOG(VIDEO, "end query %u", index); } } @@ -198,40 +188,42 @@ bool PerfQuery::CreateReadbackBuffer() return true; } -void PerfQuery::QueueCopyQueryResults(VkCommandBuffer command_buffer, VkFence fence, - u32 start_index, u32 query_count) +void PerfQuery::QueueCopyQueryResults(u32 start_index, u32 query_count) { DEBUG_LOG(VIDEO, "queue copy of queries %u-%u", start_index, start_index + query_count - 1); // Transition buffer for GPU write // TODO: Is this needed? - m_readback_buffer->PrepareForGPUWrite(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, + m_readback_buffer->PrepareForGPUWrite(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); // Copy from queries -> buffer - vkCmdCopyQueryPoolResults(command_buffer, m_query_pool, start_index, query_count, - m_readback_buffer->GetBuffer(), start_index * sizeof(PerfQueryDataType), - sizeof(PerfQueryDataType), VK_QUERY_RESULT_WAIT_BIT); + vkCmdCopyQueryPoolResults(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, + start_index, query_count, m_readback_buffer->GetBuffer(), + start_index * sizeof(PerfQueryDataType), sizeof(PerfQueryDataType), + VK_QUERY_RESULT_WAIT_BIT); // Prepare for host readback - m_readback_buffer->FlushGPUCache(command_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT); + m_readback_buffer->FlushGPUCache(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); // Reset queries so they're ready to use again - vkCmdResetQueryPool(command_buffer, m_query_pool, start_index, query_count); + vkCmdResetQueryPool(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, start_index, + query_count); // Flag all queries as available, but with a fence that has to be completed first for (u32 i = 0; i < query_count; i++) { u32 index = start_index + i; ActiveQuery& entry = m_query_buffer[index]; - entry.pending_fence = fence; + entry.pending_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence(); entry.available = true; entry.active = false; } } -void PerfQuery::OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fence) +void PerfQuery::FlushQueries() { // Flag all pending queries that aren't available as available after execution. u32 copy_start_index = 0; @@ -254,7 +246,7 @@ void PerfQuery::OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fe ASSERT(entry.active); if (index < copy_start_index) { - QueueCopyQueryResults(command_buffer, fence, copy_start_index, copy_count); + QueueCopyQueryResults(copy_start_index, copy_count); copy_start_index = index; copy_count = 0; } @@ -266,10 +258,10 @@ void PerfQuery::OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fe } if (copy_count > 0) - QueueCopyQueryResults(command_buffer, fence, copy_start_index, copy_count); + QueueCopyQueryResults(copy_start_index, copy_count); } -void PerfQuery::OnCommandBufferExecuted(VkFence fence) +void PerfQuery::OnFenceSignaled(VkFence fence) { // Need to save these since ProcessResults will modify them. u32 query_read_pos = m_query_read_pos; @@ -350,7 +342,7 @@ void PerfQuery::NonBlockingPartialFlush() // Submit a command buffer in the background if the front query is not bound to one. // Ideally this will complete before the buffer fills. if (m_query_buffer[m_query_read_pos].pending_fence == VK_NULL_HANDLE) - Util::ExecuteCurrentCommandsAndRestoreState(true, false); + Renderer::GetInstance()->ExecuteCommandBuffer(true, false); } void PerfQuery::BlockingPartialFlush() @@ -364,7 +356,7 @@ void PerfQuery::BlockingPartialFlush() { // This will callback OnCommandBufferQueued which will set the fence on the entry. // We wait for completion, which will also call OnCommandBufferExecuted, and clear the fence. - Util::ExecuteCurrentCommandsAndRestoreState(false, true); + Renderer::GetInstance()->ExecuteCommandBuffer(false, true); } else { @@ -373,4 +365,4 @@ void PerfQuery::BlockingPartialFlush() g_command_buffer_mgr->WaitForFence(entry.pending_fence); } } -} +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/PerfQuery.h b/Source/Core/VideoBackends/Vulkan/PerfQuery.h index c5f5d13e90..47ccf22a66 100644 --- a/Source/Core/VideoBackends/Vulkan/PerfQuery.h +++ b/Source/Core/VideoBackends/Vulkan/PerfQuery.h @@ -24,6 +24,7 @@ public: static PerfQuery* GetInstance(); bool Initialize(); + void FlushQueries(); void EnableQuery(PerfQueryGroup type) override; void DisableQuery(PerfQueryGroup type) override; @@ -43,12 +44,11 @@ private: bool CreateQueryPool(); bool CreateReadbackBuffer(); - void QueueCopyQueryResults(VkCommandBuffer command_buffer, VkFence fence, u32 start_index, - u32 query_count); + void QueueCopyQueryResults(u32 start_index, u32 query_count); void ProcessResults(u32 start_index, u32 query_count); void OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fence); - void OnCommandBufferExecuted(VkFence fence); + void OnFenceSignaled(VkFence fence); void NonBlockingPartialFlush(); void BlockingPartialFlush(); diff --git a/Source/Core/VideoBackends/Vulkan/PostProcessing.cpp b/Source/Core/VideoBackends/Vulkan/PostProcessing.cpp deleted file mode 100644 index 57dbd3e391..0000000000 --- a/Source/Core/VideoBackends/Vulkan/PostProcessing.cpp +++ /dev/null @@ -1,316 +0,0 @@ -// Copyright 2017 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/Vulkan/PostProcessing.h" -#include - -#include "Common/Assert.h" -#include "Common/StringUtil.h" - -#include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/ShaderCache.h" -#include "VideoBackends/Vulkan/Texture2D.h" -#include "VideoBackends/Vulkan/Util.h" -#include "VideoBackends/Vulkan/VulkanContext.h" - -#include "VideoCommon/VideoCommon.h" -#include "VideoCommon/VideoConfig.h" - -namespace Vulkan -{ -VulkanPostProcessing::~VulkanPostProcessing() -{ - if (m_default_fragment_shader != VK_NULL_HANDLE) - vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_default_fragment_shader, nullptr); - if (m_fragment_shader != VK_NULL_HANDLE) - vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_fragment_shader, nullptr); -} - -bool VulkanPostProcessing::Initialize() -{ - if (!CompileDefaultShader()) - return false; - - RecompileShader(); - return true; -} - -void VulkanPostProcessing::BlitFromTexture(const TargetRectangle& dst, const TargetRectangle& src, - const Texture2D* src_tex, int src_layer, - VkRenderPass render_pass) -{ - // If the source layer is negative we simply copy all available layers. - VkShaderModule geometry_shader = - src_layer < 0 ? g_shader_cache->GetPassthroughGeometryShader() : VK_NULL_HANDLE; - VkShaderModule fragment_shader = - m_fragment_shader != VK_NULL_HANDLE ? m_fragment_shader : m_default_fragment_shader; - UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD), render_pass, - g_shader_cache->GetPassthroughVertexShader(), geometry_shader, - fragment_shader); - - // Source is always bound. - draw.SetPSSampler(0, src_tex->GetView(), g_object_cache->GetLinearSampler()); - - // No need to allocate uniforms for the default shader. - // The config will also still contain the invalid shader at this point. - if (fragment_shader != m_default_fragment_shader) - { - size_t uniforms_size = CalculateUniformsSize(); - u8* uniforms = draw.AllocatePSUniforms(uniforms_size); - FillUniformBuffer(uniforms, src, src_tex, src_layer); - draw.CommitPSUniforms(uniforms_size); - } - - draw.DrawQuad(dst.left, dst.top, dst.GetWidth(), dst.GetHeight(), src.left, src.top, src_layer, - src.GetWidth(), src.GetHeight(), static_cast(src_tex->GetWidth()), - static_cast(src_tex->GetHeight())); -} - -struct BuiltinUniforms -{ - float resolution[4]; - float src_rect[4]; - u32 time; - u32 unused[3]; -}; - -size_t VulkanPostProcessing::CalculateUniformsSize() const -{ - // Allocate a vec4 for each uniform to simplify allocation. - return sizeof(BuiltinUniforms) + m_config.GetOptions().size() * sizeof(float) * 4; -} - -void VulkanPostProcessing::FillUniformBuffer(u8* buf, const TargetRectangle& src, - const Texture2D* src_tex, int src_layer) -{ - float src_width_float = static_cast(src_tex->GetWidth()); - float src_height_float = static_cast(src_tex->GetHeight()); - BuiltinUniforms builtin_uniforms = { - {src_width_float, src_height_float, 1.0f / src_width_float, 1.0f / src_height_float}, - {static_cast(src.left) / src_width_float, - static_cast(src.top) / src_height_float, - static_cast(src.GetWidth()) / src_width_float, - static_cast(src.GetHeight()) / src_height_float}, - static_cast(m_timer.GetTimeElapsed())}; - - std::memcpy(buf, &builtin_uniforms, sizeof(builtin_uniforms)); - buf += sizeof(builtin_uniforms); - - for (const auto& it : m_config.GetOptions()) - { - union - { - u32 as_bool[4]; - s32 as_int[4]; - float as_float[4]; - } value = {}; - - switch (it.second.m_type) - { - case PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_BOOL: - value.as_bool[0] = it.second.m_bool_value ? 1 : 0; - break; - - case PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_INTEGER: - ASSERT(it.second.m_integer_values.size() < 4); - std::copy_n(it.second.m_integer_values.begin(), it.second.m_integer_values.size(), - value.as_int); - break; - - case PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_FLOAT: - ASSERT(it.second.m_float_values.size() < 4); - std::copy_n(it.second.m_float_values.begin(), it.second.m_float_values.size(), - value.as_float); - break; - } - - std::memcpy(buf, &value, sizeof(value)); - buf += sizeof(value); - } -} - -constexpr char DEFAULT_FRAGMENT_SHADER_SOURCE[] = R"( - layout(set = 1, binding = 0) uniform sampler2DArray samp0; - - layout(location = 0) in float3 uv0; - layout(location = 1) in float4 col0; - layout(location = 0) out float4 ocol0; - - void main() - { - ocol0 = float4(texture(samp0, uv0).xyz, 1.0); - } -)"; - -constexpr char POSTPROCESSING_SHADER_HEADER[] = R"( - SAMPLER_BINDING(0) uniform sampler2DArray samp0; - SAMPLER_BINDING(1) uniform sampler2DArray samp1; - - layout(location = 0) in float3 uv0; - layout(location = 1) in float4 col0; - layout(location = 0) out float4 ocol0; - - // Interfacing functions - // The EFB may have a zero alpha value, which we don't want to write to the frame dump, so set it to one here. - float4 Sample() - { - return float4(texture(samp0, uv0).xyz, 1.0); - } - - float4 SampleLocation(float2 location) - { - return float4(texture(samp0, float3(location, uv0.z)).xyz, 1.0); - } - - float4 SampleLayer(int layer) - { - return float4(texture(samp0, float3(uv0.xy, float(layer))).xyz, 1.0); - } - - #define SampleOffset(offset) float4(textureOffset(samp0, uv0, offset).xyz, 1.0) - - float2 GetResolution() - { - return options.resolution.xy; - } - - float2 GetInvResolution() - { - return options.resolution.zw; - } - - float2 GetCoordinates() - { - return uv0.xy; - } - - uint GetTime() - { - return options.time; - } - - void SetOutput(float4 color) - { - ocol0 = color; - } - - #define GetOption(x) (options.x) - #define OptionEnabled(x) (options.x != 0) - - // Workaround because there is no getter function for src rect/layer. - float4 src_rect = options.src_rect; - int layer = int(uv0.z); -)"; - -void VulkanPostProcessing::UpdateConfig() -{ - if (m_config.GetShader() == g_ActiveConfig.sPostProcessingShader) - return; - - RecompileShader(); -} - -bool VulkanPostProcessing::CompileDefaultShader() -{ - m_default_fragment_shader = Util::CompileAndCreateFragmentShader(DEFAULT_FRAGMENT_SHADER_SOURCE); - if (m_default_fragment_shader == VK_NULL_HANDLE) - { - PanicAlert("Failed to compile default post-processing shader."); - return false; - } - - return true; -} - -bool VulkanPostProcessing::RecompileShader() -{ - // As a driver can return the same new module pointer when destroying a shader and re-compiling, - // we need to wipe out the pipeline cache, otherwise we risk using old pipelines with old shaders. - // We can't just clear a single pipeline, because we don't know which render pass is going to be - // used here either. - if (m_fragment_shader != VK_NULL_HANDLE) - { - g_command_buffer_mgr->WaitForGPUIdle(); - g_shader_cache->ClearPipelineCache(); - vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_fragment_shader, nullptr); - m_fragment_shader = VK_NULL_HANDLE; - } - - // If post-processing is disabled, just use the default shader. - // This way we don't need to allocate uniforms. - if (g_ActiveConfig.sPostProcessingShader.empty()) - return true; - - // Generate GLSL and compile the new shader. - std::string main_code = m_config.LoadShader(); - std::string options_code = GetGLSLUniformBlock(); - std::string code = options_code + POSTPROCESSING_SHADER_HEADER + main_code; - m_fragment_shader = Util::CompileAndCreateFragmentShader(code); - if (m_fragment_shader == VK_NULL_HANDLE) - { - // BlitFromTexture will use the default shader as a fallback. - PanicAlert("Failed to compile post-processing shader %s", m_config.GetShader().c_str()); - return false; - } - - return true; -} - -std::string VulkanPostProcessing::GetGLSLUniformBlock() const -{ - std::stringstream ss; - u32 unused_counter = 1; - ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n"; - - // Builtin uniforms - ss << " float4 resolution;\n"; - ss << " float4 src_rect;\n"; - ss << " uint time;\n"; - for (u32 i = 0; i < 3; i++) - ss << " uint unused" << unused_counter++ << ";\n\n"; - - // Custom options/uniforms - for (const auto& it : m_config.GetOptions()) - { - if (it.second.m_type == - PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_BOOL) - { - ss << StringFromFormat(" int %s;\n", it.first.c_str()); - for (u32 i = 0; i < 3; i++) - ss << " int unused" << unused_counter++ << ";\n"; - } - else if (it.second.m_type == - PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_INTEGER) - { - u32 count = static_cast(it.second.m_integer_values.size()); - if (count == 1) - ss << StringFromFormat(" int %s;\n", it.first.c_str()); - else - ss << StringFromFormat(" int%u %s;\n", count, it.first.c_str()); - - for (u32 i = count; i < 4; i++) - ss << " int unused" << unused_counter++ << ";\n"; - } - else if (it.second.m_type == - PostProcessingShaderConfiguration::ConfigurationOption::OptionType::OPTION_FLOAT) - { - u32 count = static_cast(it.second.m_float_values.size()); - if (count == 1) - ss << StringFromFormat(" float %s;\n", it.first.c_str()); - else - ss << StringFromFormat(" float%u %s;\n", count, it.first.c_str()); - - for (u32 i = count; i < 4; i++) - ss << " float unused" << unused_counter++ << ";\n"; - } - } - - ss << "} options;\n\n"; - - return ss.str(); -} - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/PostProcessing.h b/Source/Core/VideoBackends/Vulkan/PostProcessing.h deleted file mode 100644 index e9c18b9d2d..0000000000 --- a/Source/Core/VideoBackends/Vulkan/PostProcessing.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2017 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "VideoBackends/Vulkan/VulkanContext.h" - -#include "VideoCommon/PostProcessing.h" -#include "VideoCommon/VideoCommon.h" - -namespace Vulkan -{ -class Texture2D; - -class VulkanPostProcessing : public PostProcessingShaderImplementation -{ -public: - VulkanPostProcessing() = default; - ~VulkanPostProcessing(); - - bool Initialize(); - - void BlitFromTexture(const TargetRectangle& dst, const TargetRectangle& src, - const Texture2D* src_tex, int src_layer, VkRenderPass render_pass); - - void UpdateConfig(); - -private: - size_t CalculateUniformsSize() const; - void FillUniformBuffer(u8* buf, const TargetRectangle& src, const Texture2D* src_tex, - int src_layer); - - bool CompileDefaultShader(); - bool RecompileShader(); - std::string GetGLSLUniformBlock() const; - - VkShaderModule m_fragment_shader = VK_NULL_HANDLE; - VkShaderModule m_default_fragment_shader = VK_NULL_HANDLE; -}; - -} // namespace diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.cpp b/Source/Core/VideoBackends/Vulkan/Renderer.cpp index a3085aa486..0254f128a8 100644 --- a/Source/Core/VideoBackends/Vulkan/Renderer.cpp +++ b/Source/Core/VideoBackends/Vulkan/Renderer.cpp @@ -18,27 +18,22 @@ #include "VideoBackends/Vulkan/BoundingBox.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/FramebufferManager.h" #include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/PostProcessing.h" +#include "VideoBackends/Vulkan/PerfQuery.h" #include "VideoBackends/Vulkan/Renderer.h" #include "VideoBackends/Vulkan/StateTracker.h" #include "VideoBackends/Vulkan/StreamBuffer.h" #include "VideoBackends/Vulkan/SwapChain.h" -#include "VideoBackends/Vulkan/TextureCache.h" -#include "VideoBackends/Vulkan/Util.h" #include "VideoBackends/Vulkan/VKPipeline.h" #include "VideoBackends/Vulkan/VKShader.h" #include "VideoBackends/Vulkan/VKTexture.h" +#include "VideoBackends/Vulkan/VertexFormat.h" #include "VideoBackends/Vulkan/VulkanContext.h" -#include "VideoCommon/BPFunctions.h" -#include "VideoCommon/BPMemory.h" #include "VideoCommon/DriverDetails.h" -#include "VideoCommon/OnScreenDisplay.h" -#include "VideoCommon/PixelEngine.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/RenderState.h" -#include "VideoCommon/TextureCacheBase.h" +#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VideoBackendBase.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" @@ -59,11 +54,6 @@ Renderer::Renderer(std::unique_ptr swap_chain, float backbuffer_scale Renderer::~Renderer() = default; -Renderer* Renderer::GetInstance() -{ - return static_cast(g_renderer.get()); -} - bool Renderer::IsHeadless() const { return m_swap_chain == nullptr; @@ -74,8 +64,6 @@ bool Renderer::Initialize() if (!::Renderer::Initialize()) return false; - BindEFBToStateTracker(); - m_bounding_box = std::make_unique(); if (!m_bounding_box->Initialize()) { @@ -83,34 +71,16 @@ bool Renderer::Initialize() return false; } - if (g_vulkan_context->SupportsBoundingBox()) - { - // Bind bounding box to state tracker - StateTracker::GetInstance()->SetBBoxBuffer(m_bounding_box->GetGPUBuffer(), - m_bounding_box->GetGPUBufferOffset(), - m_bounding_box->GetGPUBufferSize()); - } - - // Initialize post processing. - m_post_processor = std::make_unique(); - if (!static_cast(m_post_processor.get())->Initialize()) - { - PanicAlert("failed to initialize post processor."); - return false; - } - // Various initialization routines will have executed commands on the command buffer. // Execute what we have done before beginning the first frame. - g_command_buffer_mgr->PrepareToSubmitCommandBuffer(); - g_command_buffer_mgr->SubmitCommandBuffer(false); - BeginFrame(); - + ExecuteCommandBuffer(true, false); return true; } void Renderer::Shutdown() { ::Renderer::Shutdown(); + m_swap_chain.reset(); } std::unique_ptr Renderer::CreateTexture(const TextureConfig& config) @@ -136,17 +106,22 @@ std::unique_ptr Renderer::CreateShaderFromBinary(ShaderStage sta return VKShader::CreateFromBinary(stage, data, length); } +std::unique_ptr +Renderer::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) +{ + return std::make_unique(vtx_decl); +} + std::unique_ptr Renderer::CreatePipeline(const AbstractPipelineConfig& config) { return VKPipeline::Create(config); } -std::unique_ptr -Renderer::CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) +std::unique_ptr Renderer::CreateFramebuffer(AbstractTexture* color_attachment, + AbstractTexture* depth_attachment) { - return VKFramebuffer::Create(static_cast(color_attachment), - static_cast(depth_attachment)); + return VKFramebuffer::Create(static_cast(color_attachment), + static_cast(depth_attachment)); } void Renderer::SetPipeline(const AbstractPipeline* pipeline) @@ -154,90 +129,6 @@ void Renderer::SetPipeline(const AbstractPipeline* pipeline) StateTracker::GetInstance()->SetPipeline(static_cast(pipeline)); } -u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) -{ - if (type == EFBAccessType::PeekColor) - { - u32 color = FramebufferManager::GetInstance()->PeekEFBColor(x, y); - - // a little-endian value is expected to be returned - color = ((color & 0xFF00FF00) | ((color >> 16) & 0xFF) | ((color << 16) & 0xFF0000)); - - // check what to do with the alpha channel (GX_PokeAlphaRead) - PixelEngine::UPEAlphaReadReg alpha_read_mode = PixelEngine::GetAlphaReadMode(); - - if (bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24) - { - color = RGBA8ToRGBA6ToRGBA8(color); - } - else if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) - { - color = RGBA8ToRGB565ToRGBA8(color); - } - if (bpmem.zcontrol.pixel_format != PEControl::RGBA6_Z24) - { - color |= 0xFF000000; - } - - if (alpha_read_mode.ReadMode == 2) - { - return color; // GX_READ_NONE - } - else if (alpha_read_mode.ReadMode == 1) - { - return color | 0xFF000000; // GX_READ_FF - } - else /*if(alpha_read_mode.ReadMode == 0)*/ - { - return color & 0x00FFFFFF; // GX_READ_00 - } - } - else // if (type == EFBAccessType::PeekZ) - { - // Depth buffer is inverted for improved precision near far plane - float depth = 1.0f - FramebufferManager::GetInstance()->PeekEFBDepth(x, y); - u32 ret = 0; - - if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) - { - // if Z is in 16 bit format you must return a 16 bit integer - ret = MathUtil::Clamp(static_cast(depth * 65536.0f), 0, 0xFFFF); - } - else - { - ret = MathUtil::Clamp(static_cast(depth * 16777216.0f), 0, 0xFFFFFF); - } - - return ret; - } -} - -void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) -{ - if (type == EFBAccessType::PokeColor) - { - for (size_t i = 0; i < num_points; i++) - { - // Convert to expected format (BGRA->RGBA) - // TODO: Check alpha, depending on mode? - const EfbPokeData& point = points[i]; - u32 color = ((point.data & 0xFF00FF00) | ((point.data >> 16) & 0xFF) | - ((point.data << 16) & 0xFF0000)); - FramebufferManager::GetInstance()->PokeEFBColor(point.x, point.y, color); - } - } - else // if (type == EFBAccessType::PokeZ) - { - for (size_t i = 0; i < num_points; i++) - { - // Convert to floating-point depth. - const EfbPokeData& point = points[i]; - float depth = (1.0f - float(point.data & 0xFFFFFF) / 16777216.0f); - FramebufferManager::GetInstance()->PokeEFBDepth(point.x, point.y, depth); - } - } -} - u16 Renderer::BBoxRead(int index) { s32 value = m_bounding_box->Get(static_cast(index)); @@ -285,31 +176,18 @@ void Renderer::BBoxWrite(int index, u16 value) m_bounding_box->Set(static_cast(index), scaled_value); } -TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc) +void Renderer::BBoxFlush() { - TargetRectangle result; - result.left = EFBToScaledX(rc.left); - result.top = EFBToScaledY(rc.top); - result.right = EFBToScaledX(rc.right); - result.bottom = EFBToScaledY(rc.bottom); - return result; -} - -void Renderer::BeginFrame() -{ - // Activate a new command list, and restore state ready for the next draw - g_command_buffer_mgr->ActivateCommandBuffer(); - - // Ensure that the state tracker rebinds everything, and allocates a new set - // of descriptors out of the next pool. - StateTracker::GetInstance()->InvalidateDescriptorSets(); - StateTracker::GetInstance()->InvalidateConstants(); - StateTracker::GetInstance()->SetPendingRebind(); + m_bounding_box->Flush(); + m_bounding_box->Invalidate(); } void Renderer::ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha_enable, bool z_enable, u32 color, u32 z) { + g_framebuffer_manager->FlushEFBPokes(); + g_framebuffer_manager->InvalidatePeekCache(); + // Native -> EFB coordinates TargetRectangle target_rc = Renderer::ConvertEFBRectangle(rc); @@ -340,7 +218,9 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha clear_color_value.color.float32[1] = static_cast((color >> 8) & 0xFF) / 255.0f; clear_color_value.color.float32[2] = static_cast((color >> 0) & 0xFF) / 255.0f; clear_color_value.color.float32[3] = static_cast((color >> 24) & 0xFF) / 255.0f; - clear_depth_value.depthStencil.depth = (1.0f - (static_cast(z & 0xFFFFFF) / 16777216.0f)); + clear_depth_value.depthStencil.depth = static_cast(z & 0xFFFFFF) / 16777216.0f; + if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) + clear_depth_value.depthStencil.depth = 1.0f - clear_depth_value.depthStencil.depth; // If we're not in a render pass (start of the frame), we can use a clear render pass // to discard the data, rather than loading and then clearing. @@ -396,7 +276,7 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha } if (num_clear_attachments > 0) { - VkClearRect vk_rect = {target_vk_rc, 0, FramebufferManager::GetInstance()->GetEFBLayers()}; + VkClearRect vk_rect = {target_vk_rc, 0, g_framebuffer_manager->GetEFBLayers()}; if (!StateTracker::GetInstance()->IsWithinRenderArea( target_vk_rc.offset.x, target_vk_rc.offset.y, target_vk_rc.extent.width, target_vk_rc.extent.height)) @@ -414,57 +294,17 @@ void Renderer::ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha if (!color_enable && !alpha_enable && !z_enable) return; - // Clearing must occur within a render pass. - if (!StateTracker::GetInstance()->IsWithinRenderArea(target_vk_rc.offset.x, target_vk_rc.offset.y, - target_vk_rc.extent.width, - target_vk_rc.extent.height)) - { - StateTracker::GetInstance()->EndClearRenderPass(); - } - StateTracker::GetInstance()->BeginRenderPass(); - StateTracker::GetInstance()->SetPendingRebind(); - - // Mask away the appropriate colors and use a shader - BlendingState blend_state = RenderState::GetNoBlendingBlendState(); - blend_state.colorupdate = color_enable; - blend_state.alphaupdate = alpha_enable; - - DepthState depth_state = RenderState::GetNoDepthTestingDepthStencilState(); - depth_state.testenable = z_enable; - depth_state.updateenable = z_enable; - depth_state.func = ZMode::ALWAYS; - - // No need to start a new render pass, but we do need to restore viewport state - UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD), - FramebufferManager::GetInstance()->GetEFBLoadRenderPass(), - g_shader_cache->GetPassthroughVertexShader(), - g_shader_cache->GetPassthroughGeometryShader(), - g_shader_cache->GetClearFragmentShader()); - - draw.SetMultisamplingState(FramebufferManager::GetInstance()->GetEFBMultisamplingState()); - draw.SetDepthState(depth_state); - draw.SetBlendState(blend_state); - - draw.DrawColoredQuad(target_rc.left, target_rc.top, target_rc.GetWidth(), target_rc.GetHeight(), - clear_color_value.color.float32[0], clear_color_value.color.float32[1], - clear_color_value.color.float32[2], clear_color_value.color.float32[3], - clear_depth_value.depthStencil.depth); -} - -void Renderer::ReinterpretPixelData(unsigned int convtype) -{ - StateTracker::GetInstance()->EndRenderPass(); - StateTracker::GetInstance()->SetPendingRebind(); - FramebufferManager::GetInstance()->ReinterpretPixelData(convtype); - - // EFB framebuffer has now changed, so update accordingly. - BindEFBToStateTracker(); + g_framebuffer_manager->ClearEFB(rc, color_enable, alpha_enable, z_enable, color, z); } void Renderer::Flush() { - Util::ExecuteCurrentCommandsAndRestoreState(true, false); + ExecuteCommandBuffer(true, false); +} + +void Renderer::WaitForGPUIdle() +{ + ExecuteCommandBuffer(false, true); } void Renderer::BindBackbuffer(const ClearColor& clear_color) @@ -475,35 +315,13 @@ void Renderer::BindBackbuffer(const ClearColor& clear_color) CheckForSurfaceChange(); CheckForSurfaceResize(); - // Ensure the worker thread is not still submitting a previous command buffer. - // In other words, the last frame has been submitted (otherwise the next call would - // be a race, as the image may not have been consumed yet). - g_command_buffer_mgr->PrepareToSubmitCommandBuffer(); - - VkResult res; - if (!g_command_buffer_mgr->CheckLastPresentFail()) - { - // Grab the next image from the swap chain in preparation for drawing the window. - res = m_swap_chain->AcquireNextImage(); - } - else - { - // If the last present failed, we need to recreate the swap chain. - res = VK_ERROR_OUT_OF_DATE_KHR; - } - + VkResult res = g_command_buffer_mgr->CheckLastPresentFail() ? VK_ERROR_OUT_OF_DATE_KHR : + m_swap_chain->AcquireNextImage(); if (res == VK_SUBOPTIMAL_KHR || res == VK_ERROR_OUT_OF_DATE_KHR) { - // There's an issue here. We can't resize the swap chain while the GPU is still busy with it, - // but calling WaitForGPUIdle would create a deadlock as PrepareToSubmitCommandBuffer has been - // called by SwapImpl. WaitForGPUIdle waits on the semaphore, which PrepareToSubmitCommandBuffer - // has already done, so it blocks indefinitely. To work around this, we submit the current - // command buffer, resize the swap chain (which calls WaitForGPUIdle), and then finally call - // PrepareToSubmitCommandBuffer to return to the state that the caller expects. - g_command_buffer_mgr->SubmitCommandBuffer(false); + // Execute cmdbuffer before resizing, as the last frame could still be presenting. + ExecuteCommandBuffer(false, true); m_swap_chain->ResizeSwapChain(); - BeginFrame(); - g_command_buffer_mgr->PrepareToSubmitCommandBuffer(); res = m_swap_chain->AcquireNextImage(); } if (res != VK_SUCCESS) @@ -512,30 +330,18 @@ void Renderer::BindBackbuffer(const ClearColor& clear_color) // Transition from undefined (or present src, but it can be substituted) to // color attachment ready for writing. These transitions must occur outside // a render pass, unless the render pass declares a self-dependency. - Texture2D* backbuffer = m_swap_chain->GetCurrentTexture(); - backbuffer->OverrideImageLayout(VK_IMAGE_LAYOUT_UNDEFINED); - backbuffer->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - m_current_framebuffer = nullptr; - m_current_framebuffer_width = backbuffer->GetWidth(); - m_current_framebuffer_height = backbuffer->GetHeight(); - - // Draw to the backbuffer. - VkRect2D region = {{0, 0}, {backbuffer->GetWidth(), backbuffer->GetHeight()}}; - StateTracker::GetInstance()->SetRenderPass(m_swap_chain->GetLoadRenderPass(), - m_swap_chain->GetClearRenderPass()); - StateTracker::GetInstance()->SetFramebuffer(m_swap_chain->GetCurrentFramebuffer(), region); - - // Begin render pass for rendering to the swap chain. - VkClearValue clear_value = {{{0.0f, 0.0f, 0.0f, 1.0f}}}; - StateTracker::GetInstance()->BeginClearRenderPass(region, &clear_value, 1); + m_swap_chain->GetCurrentTexture()->OverrideImageLayout(VK_IMAGE_LAYOUT_UNDEFINED); + m_swap_chain->GetCurrentTexture()->TransitionToLayout( + g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + SetAndClearFramebuffer(m_swap_chain->GetCurrentFramebuffer(), + ClearColor{{0.0f, 0.0f, 0.0f, 1.0f}}); } void Renderer::PresentBackbuffer() { // End drawing to backbuffer StateTracker::GetInstance()->EndRenderPass(); - StateTracker::GetInstance()->OnEndFrame(); + PerfQuery::GetInstance()->FlushQueries(); // Transition the backbuffer to PRESENT_SRC to ensure all commands drawing // to it have finished before present. @@ -546,47 +352,25 @@ void Renderer::PresentBackbuffer() // Because this final command buffer is rendering to the swap chain, we need to wait for // the available semaphore to be signaled before executing the buffer. This final submission // can happen off-thread in the background while we're preparing the next frame. - g_command_buffer_mgr->SubmitCommandBuffer(true, m_swap_chain->GetImageAvailableSemaphore(), - m_swap_chain->GetRenderingFinishedSemaphore(), - m_swap_chain->GetSwapChain(), + g_command_buffer_mgr->SubmitCommandBuffer(true, m_swap_chain->GetSwapChain(), m_swap_chain->GetCurrentImageIndex()); - BeginFrame(); + + // New cmdbuffer, so invalidate state. + StateTracker::GetInstance()->InvalidateCachedState(); } -void Renderer::RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) +void Renderer::ExecuteCommandBuffer(bool submit_off_thread, bool wait_for_completion) { - const TargetRectangle target_rc = GetTargetRectangle(); + StateTracker::GetInstance()->EndRenderPass(); + PerfQuery::GetInstance()->FlushQueries(); - VulkanPostProcessing* post_processor = static_cast(m_post_processor.get()); - if (g_ActiveConfig.stereo_mode == StereoMode::SBS || - g_ActiveConfig.stereo_mode == StereoMode::TAB) - { - TargetRectangle left_rect; - TargetRectangle right_rect; - std::tie(left_rect, right_rect) = ConvertStereoRectangle(target_rc); + // If we're waiting for completion, don't bother waking the worker thread. + const VkFence pending_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence(); + g_command_buffer_mgr->SubmitCommandBuffer(submit_off_thread && wait_for_completion); + if (wait_for_completion) + g_command_buffer_mgr->WaitForFence(pending_fence); - post_processor->BlitFromTexture(left_rect, rc, - static_cast(texture)->GetRawTexIdentifier(), - 0, m_swap_chain->GetLoadRenderPass()); - post_processor->BlitFromTexture(right_rect, rc, - static_cast(texture)->GetRawTexIdentifier(), - 1, m_swap_chain->GetLoadRenderPass()); - } - else if (g_ActiveConfig.stereo_mode == StereoMode::QuadBuffer) - { - post_processor->BlitFromTexture(target_rc, rc, - static_cast(texture)->GetRawTexIdentifier(), - -1, m_swap_chain->GetLoadRenderPass()); - } - else - { - post_processor->BlitFromTexture(target_rc, rc, - static_cast(texture)->GetRawTexIdentifier(), - 0, m_swap_chain->GetLoadRenderPass()); - } - - // The post-processor uses the old-style Vulkan draws, which mess with the tracked state. - StateTracker::GetInstance()->SetPendingRebind(); + StateTracker::GetInstance()->InvalidateCachedState(); } void Renderer::CheckForSurfaceChange() @@ -595,8 +379,7 @@ void Renderer::CheckForSurfaceChange() return; // Submit the current draws up until rendering the XFB. - g_command_buffer_mgr->ExecuteCommandBuffer(false, false); - g_command_buffer_mgr->WaitForGPUIdle(); + ExecuteCommandBuffer(false, true); // Clear the present failed flag, since we don't want to resize after recreating. g_command_buffer_mgr->CheckLastPresentFail(); @@ -624,8 +407,7 @@ void Renderer::CheckForSurfaceResize() } // Wait for the GPU to catch up since we're going to destroy the swap chain. - g_command_buffer_mgr->ExecuteCommandBuffer(false, false); - g_command_buffer_mgr->WaitForGPUIdle(); + ExecuteCommandBuffer(false, true); // Clear the present failed flag, since we don't want to resize after recreating. g_command_buffer_mgr->CheckLastPresentFail(); @@ -637,45 +419,29 @@ void Renderer::CheckForSurfaceResize() void Renderer::OnConfigChanged(u32 bits) { - // Update texture cache settings with any changed options. - TextureCache::GetInstance()->OnConfigChanged(g_ActiveConfig); - - // Handle settings that can cause the EFB framebuffer to change. - if (bits & CONFIG_CHANGE_BIT_TARGET_SIZE) - RecreateEFBFramebuffer(); - - // MSAA samples changed, we need to recreate the EFB render pass. - // If the stereoscopy mode changed, we need to recreate the buffers as well. - // SSAA changed on/off, we have to recompile shaders. - // Changing stereoscopy from off<->on also requires shaders to be recompiled. - if (bits & (CONFIG_CHANGE_BIT_HOST_CONFIG | CONFIG_CHANGE_BIT_MULTISAMPLES)) - { - RecreateEFBFramebuffer(); - FramebufferManager::GetInstance()->RecompileShaders(); - g_shader_cache->ReloadPipelineCache(); - g_shader_cache->RecompileSharedShaders(); - } + if (bits & CONFIG_CHANGE_BIT_HOST_CONFIG) + g_object_cache->ReloadPipelineCache(); // For vsync, we need to change the present mode, which means recreating the swap chain. if (m_swap_chain && bits & CONFIG_CHANGE_BIT_VSYNC) { - g_command_buffer_mgr->WaitForGPUIdle(); + ExecuteCommandBuffer(false, true); m_swap_chain->SetVSync(g_ActiveConfig.bVSyncActive); } // For quad-buffered stereo we need to change the layer count, so recreate the swap chain. if (m_swap_chain && bits & CONFIG_CHANGE_BIT_STEREO_MODE) { - g_command_buffer_mgr->WaitForGPUIdle(); + ExecuteCommandBuffer(false, true); m_swap_chain->RecreateSwapChain(); } // Wipe sampler cache if force texture filtering or anisotropy changes. if (bits & (CONFIG_CHANGE_BIT_ANISOTROPY | CONFIG_CHANGE_BIT_FORCE_TEXTURE_FILTERING)) + { + ExecuteCommandBuffer(false, true); ResetSamplerStates(); - - // Check for a changed post-processing shader and recompile if needed. - static_cast(m_post_processor.get())->UpdateConfig(); + } } void Renderer::OnSwapChainResized() @@ -684,103 +450,55 @@ void Renderer::OnSwapChainResized() m_backbuffer_height = m_swap_chain->GetHeight(); } -void Renderer::BindEFBToStateTracker() -{ - // Update framebuffer in state tracker - VkRect2D framebuffer_size = {{0, 0}, - {FramebufferManager::GetInstance()->GetEFBWidth(), - FramebufferManager::GetInstance()->GetEFBHeight()}}; - StateTracker::GetInstance()->SetRenderPass( - FramebufferManager::GetInstance()->GetEFBLoadRenderPass(), - FramebufferManager::GetInstance()->GetEFBClearRenderPass()); - StateTracker::GetInstance()->SetFramebuffer( - FramebufferManager::GetInstance()->GetEFBFramebuffer(), framebuffer_size); - m_current_framebuffer = nullptr; - m_current_framebuffer_width = FramebufferManager::GetInstance()->GetEFBWidth(); - m_current_framebuffer_height = FramebufferManager::GetInstance()->GetEFBHeight(); -} - -void Renderer::RecreateEFBFramebuffer() -{ - // Ensure the GPU is finished with the current EFB textures. - g_command_buffer_mgr->WaitForGPUIdle(); - FramebufferManager::GetInstance()->RecreateEFBFramebuffer(); - BindEFBToStateTracker(); - - // Viewport and scissor rect have to be reset since they will be scaled differently. - BPFunctions::SetViewport(); - BPFunctions::SetScissor(); -} - -void Renderer::ApplyState() -{ -} - -void Renderer::ResetAPIState() -{ - // End the EFB render pass if active - StateTracker::GetInstance()->EndRenderPass(); -} - -void Renderer::RestoreAPIState() +void Renderer::BindFramebuffer(VKFramebuffer* fb) { StateTracker::GetInstance()->EndRenderPass(); - if (m_current_framebuffer) - static_cast(m_current_framebuffer)->TransitionForSample(); - BindEFBToStateTracker(); - BPFunctions::SetViewport(); - BPFunctions::SetScissor(); - - // Instruct the state tracker to re-bind everything before the next draw - StateTracker::GetInstance()->SetPendingRebind(); -} - -void Renderer::BindFramebuffer(const VKFramebuffer* fb) -{ - const VkRect2D render_area = {static_cast(fb->GetWidth()), - static_cast(fb->GetHeight())}; - - StateTracker::GetInstance()->EndRenderPass(); - if (m_current_framebuffer) - static_cast(m_current_framebuffer)->TransitionForSample(); + // Shouldn't be bound as a texture. + if (fb->GetColorAttachment()) + { + StateTracker::GetInstance()->UnbindTexture( + static_cast(fb->GetColorAttachment())->GetView()); + } + if (fb->GetDepthAttachment()) + { + StateTracker::GetInstance()->UnbindTexture( + static_cast(fb->GetDepthAttachment())->GetView()); + } fb->TransitionForRender(); - StateTracker::GetInstance()->SetFramebuffer(fb->GetFB(), render_area); - StateTracker::GetInstance()->SetRenderPass(fb->GetLoadRenderPass(), fb->GetClearRenderPass()); + StateTracker::GetInstance()->SetFramebuffer(fb); m_current_framebuffer = fb; - m_current_framebuffer_width = fb->GetWidth(); - m_current_framebuffer_height = fb->GetHeight(); } -void Renderer::SetFramebuffer(const AbstractFramebuffer* framebuffer) +void Renderer::SetFramebuffer(AbstractFramebuffer* framebuffer) { - const VKFramebuffer* vkfb = static_cast(framebuffer); + if (m_current_framebuffer == framebuffer) + return; + + VKFramebuffer* vkfb = static_cast(framebuffer); BindFramebuffer(vkfb); - StateTracker::GetInstance()->BeginRenderPass(); } -void Renderer::SetAndDiscardFramebuffer(const AbstractFramebuffer* framebuffer) +void Renderer::SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) { - const VKFramebuffer* vkfb = static_cast(framebuffer); + if (m_current_framebuffer == framebuffer) + return; + + VKFramebuffer* vkfb = static_cast(framebuffer); BindFramebuffer(vkfb); // If we're discarding, begin the discard pass, then switch to a load pass. // This way if the command buffer is flushed, we don't start another discard pass. - StateTracker::GetInstance()->SetRenderPass(vkfb->GetDiscardRenderPass(), - vkfb->GetClearRenderPass()); - StateTracker::GetInstance()->BeginRenderPass(); - StateTracker::GetInstance()->SetRenderPass(vkfb->GetLoadRenderPass(), vkfb->GetClearRenderPass()); + StateTracker::GetInstance()->BeginDiscardRenderPass(); } -void Renderer::SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, +void Renderer::SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value, float depth_value) { - const VKFramebuffer* vkfb = static_cast(framebuffer); + VKFramebuffer* vkfb = static_cast(framebuffer); BindFramebuffer(vkfb); - const VkRect2D render_area = {static_cast(vkfb->GetWidth()), - static_cast(vkfb->GetHeight())}; std::array clear_values; u32 num_clear_values = 0; if (vkfb->GetColorFormat() != AbstractTextureFormat::Undefined) @@ -795,7 +513,7 @@ void Renderer::SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, clear_values[num_clear_values].depthStencil.stencil = 0; num_clear_values++; } - StateTracker::GetInstance()->BeginClearRenderPass(render_area, clear_values.data(), + StateTracker::GetInstance()->BeginClearRenderPass(vkfb->GetRect(), clear_values.data(), num_clear_values); } @@ -803,9 +521,27 @@ void Renderer::SetTexture(u32 index, const AbstractTexture* texture) { // Texture should always be in SHADER_READ_ONLY layout prior to use. // This is so we don't need to transition during render passes. - auto* tex = texture ? static_cast(texture)->GetRawTexIdentifier() : nullptr; - DEBUG_ASSERT(!tex || tex->GetLayout() == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - StateTracker::GetInstance()->SetTexture(index, tex ? tex->GetView() : VK_NULL_HANDLE); + const VKTexture* tex = static_cast(texture); + if (tex) + { + if (tex->GetLayout() != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) + { + if (StateTracker::GetInstance()->InRenderPass()) + { + WARN_LOG(VIDEO, "Transitioning image in render pass in Renderer::SetTexture()"); + StateTracker::GetInstance()->EndRenderPass(); + } + + tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + + StateTracker::GetInstance()->SetTexture(index, tex->GetView()); + } + else + { + StateTracker::GetInstance()->SetTexture(0, VK_NULL_HANDLE); + } } void Renderer::SetSamplerState(u32 index, const SamplerState& state) @@ -826,10 +562,27 @@ void Renderer::SetSamplerState(u32 index, const SamplerState& state) m_sampler_states[index].hex = state.hex; } +void Renderer::SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) +{ + VKTexture* vk_texture = static_cast(texture); + if (vk_texture) + { + StateTracker::GetInstance()->EndRenderPass(); + StateTracker::GetInstance()->SetImageTexture(vk_texture->GetView()); + vk_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + read ? (write ? VKTexture::ComputeImageLayout::ReadWrite : + VKTexture::ComputeImageLayout::ReadOnly) : + VKTexture::ComputeImageLayout::WriteOnly); + } + else + { + StateTracker::GetInstance()->SetImageTexture(VK_NULL_HANDLE); + } +} + void Renderer::UnbindTexture(const AbstractTexture* texture) { - StateTracker::GetInstance()->UnbindTexture( - static_cast(texture)->GetRawTexIdentifier()->GetView()); + StateTracker::GetInstance()->UnbindTexture(static_cast(texture)->GetView()); } void Renderer::ResetSamplerStates() @@ -839,7 +592,7 @@ void Renderer::ResetSamplerStates() g_command_buffer_mgr->WaitForGPUIdle(); // Invalidate all sampler states, next draw will re-initialize them. - for (size_t i = 0; i < m_sampler_states.size(); i++) + for (u32 i = 0; i < m_sampler_states.size(); i++) { m_sampler_states[i].hex = RenderState::GetPointSamplerState().hex; StateTracker::GetInstance()->SetSampler(i, g_object_cache->GetPointSampler()); @@ -849,10 +602,6 @@ void Renderer::ResetSamplerStates() g_object_cache->ClearSamplerCache(); } -void Renderer::SetInterlacingMode() -{ -} - void Renderer::SetScissorRect(const MathUtil::Rectangle& rc) { VkRect2D scissor = {{rc.left, rc.top}, @@ -863,14 +612,13 @@ void Renderer::SetScissorRect(const MathUtil::Rectangle& rc) void Renderer::SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) { - VkViewport viewport = {x, y, std::max(width, 1.0f), std::max(height, 1.0f), - near_depth, far_depth}; + VkViewport viewport = {x, y, width, height, near_depth, far_depth}; StateTracker::GetInstance()->SetViewport(viewport); } void Renderer::Draw(u32 base_vertex, u32 num_vertices) { - if (StateTracker::GetInstance()->Bind()) + if (!StateTracker::GetInstance()->Bind()) return; vkCmdDraw(g_command_buffer_mgr->GetCurrentCommandBuffer(), num_vertices, 1, base_vertex, 0); @@ -884,4 +632,13 @@ void Renderer::DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) vkCmdDrawIndexed(g_command_buffer_mgr->GetCurrentCommandBuffer(), num_indices, 1, base_index, base_vertex, 0); } + +void Renderer::DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, + u32 groups_z) +{ + StateTracker::GetInstance()->SetComputeShader(static_cast(shader)); + if (StateTracker::GetInstance()->BindCompute()) + vkCmdDispatch(g_command_buffer_mgr->GetCurrentCommandBuffer(), groups_x, groups_y, groups_z); +} + } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.h b/Source/Core/VideoBackends/Vulkan/Renderer.h index 43e97279f0..2b3390b263 100644 --- a/Source/Core/VideoBackends/Vulkan/Renderer.h +++ b/Source/Core/VideoBackends/Vulkan/Renderer.h @@ -17,10 +17,8 @@ struct XFBSourceBase; namespace Vulkan { class BoundingBox; -class FramebufferManager; class SwapChain; class StagingTexture2D; -class Texture2D; class VKFramebuffer; class VKPipeline; class VKTexture; @@ -31,7 +29,7 @@ public: Renderer(std::unique_ptr swap_chain, float backbuffer_scale); ~Renderer() override; - static Renderer* GetInstance(); + static Renderer* GetInstance() { return static_cast(g_renderer.get()); } bool IsHeadless() const override; @@ -42,67 +40,60 @@ public: std::unique_ptr CreateStagingTexture(StagingTextureType type, const TextureConfig& config) override; std::unique_ptr - CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) override; + CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) override; std::unique_ptr CreateShaderFromSource(ShaderStage stage, const char* source, size_t length) override; std::unique_ptr CreateShaderFromBinary(ShaderStage stage, const void* data, size_t length) override; + std::unique_ptr + CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; std::unique_ptr CreatePipeline(const AbstractPipelineConfig& config) override; SwapChain* GetSwapChain() const { return m_swap_chain.get(); } BoundingBox* GetBoundingBox() const { return m_bounding_box.get(); } - u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override; - void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) override; u16 BBoxRead(int index) override; void BBoxWrite(int index, u16 value) override; - TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; + void BBoxFlush() override; void Flush() override; - void RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) override; + void WaitForGPUIdle() override; void OnConfigChanged(u32 bits) override; void ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha_enable, bool z_enable, u32 color, u32 z) override; - void ReinterpretPixelData(unsigned int convtype) override; - - void ApplyState() override; - - void ResetAPIState() override; - void RestoreAPIState() override; - void SetPipeline(const AbstractPipeline* pipeline) override; - void SetFramebuffer(const AbstractFramebuffer* framebuffer) override; - void SetAndDiscardFramebuffer(const AbstractFramebuffer* framebuffer) override; - void SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, - const ClearColor& color_value = {}, + void SetFramebuffer(AbstractFramebuffer* framebuffer) override; + void SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) override; + void SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, const ClearColor& color_value = {}, float depth_value = 0.0f) override; void SetScissorRect(const MathUtil::Rectangle& rc) override; void SetTexture(u32 index, const AbstractTexture* texture) override; void SetSamplerState(u32 index, const SamplerState& state) override; + void SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) override; void UnbindTexture(const AbstractTexture* texture) override; - void SetInterlacingMode() override; void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) override; void Draw(u32 base_vertex, u32 num_vertices) override; void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) override; + void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, + u32 groups_z) override; void BindBackbuffer(const ClearColor& clear_color = {}) override; void PresentBackbuffer() override; -private: - void BeginFrame(); + // Completes the current render pass, executes the command buffer, and restores state ready for + // next render. Use when you want to kick the current buffer to make room for new data. + void ExecuteCommandBuffer(bool execute_off_thread, bool wait_for_completion = false); +private: void CheckForSurfaceChange(); void CheckForSurfaceResize(); void ResetSamplerStates(); void OnSwapChainResized(); - void BindEFBToStateTracker(); - void RecreateEFBFramebuffer(); - void BindFramebuffer(const VKFramebuffer* fb); + void BindFramebuffer(VKFramebuffer* fb); std::unique_ptr m_swap_chain; std::unique_ptr m_bounding_box; @@ -110,4 +101,4 @@ private: // Keep a copy of sampler states to avoid cache lookups every draw std::array m_sampler_states = {}; }; -} +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/ShaderCache.cpp b/Source/Core/VideoBackends/Vulkan/ShaderCache.cpp deleted file mode 100644 index 33181e920c..0000000000 --- a/Source/Core/VideoBackends/Vulkan/ShaderCache.cpp +++ /dev/null @@ -1,859 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/Vulkan/ShaderCache.h" - -#include -#include -#include -#include - -#include "Common/Assert.h" -#include "Common/CommonFuncs.h" -#include "Common/LinearDiskCache.h" -#include "Common/MsgHandler.h" - -#include "Core/ConfigManager.h" -#include "Core/Host.h" - -#include "VideoBackends/Vulkan/FramebufferManager.h" -#include "VideoBackends/Vulkan/ShaderCompiler.h" -#include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoBackends/Vulkan/Util.h" -#include "VideoBackends/Vulkan/VertexFormat.h" -#include "VideoBackends/Vulkan/VulkanContext.h" -#include "VideoCommon/Statistics.h" - -namespace Vulkan -{ -std::unique_ptr g_shader_cache; - -ShaderCache::ShaderCache() -{ -} - -ShaderCache::~ShaderCache() -{ - DestroyPipelineCache(); - DestroySharedShaders(); -} - -bool ShaderCache::Initialize() -{ - if (g_ActiveConfig.bShaderCache) - { - if (!LoadPipelineCache()) - return false; - } - else - { - if (!CreatePipelineCache()) - return false; - } - - if (!CompileSharedShaders()) - return false; - - return true; -} - -void ShaderCache::Shutdown() -{ - if (g_ActiveConfig.bShaderCache && m_pipeline_cache != VK_NULL_HANDLE) - SavePipelineCache(); -} - -static bool IsStripPrimitiveTopology(VkPrimitiveTopology topology) -{ - return topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP || - topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP || - topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY || - topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY; -} - -static VkPipelineRasterizationStateCreateInfo -GetVulkanRasterizationState(const RasterizationState& state) -{ - static constexpr std::array cull_modes = { - {VK_CULL_MODE_NONE, VK_CULL_MODE_BACK_BIT, VK_CULL_MODE_FRONT_BIT, - VK_CULL_MODE_FRONT_AND_BACK}}; - - bool depth_clamp = g_ActiveConfig.backend_info.bSupportsDepthClamp; - - return { - VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkPipelineRasterizationStateCreateFlags flags - depth_clamp, // VkBool32 depthClampEnable - VK_FALSE, // VkBool32 rasterizerDiscardEnable - VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode - cull_modes[state.cullmode], // VkCullModeFlags cullMode - VK_FRONT_FACE_CLOCKWISE, // VkFrontFace frontFace - VK_FALSE, // VkBool32 depthBiasEnable - 0.0f, // float depthBiasConstantFactor - 0.0f, // float depthBiasClamp - 0.0f, // float depthBiasSlopeFactor - 1.0f // float lineWidth - }; -} - -static VkPipelineMultisampleStateCreateInfo -GetVulkanMultisampleState(const MultisamplingState& state) -{ - return { - VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkPipelineMultisampleStateCreateFlags flags - static_cast( - state.samples.Value()), // VkSampleCountFlagBits rasterizationSamples - state.per_sample_shading, // VkBool32 sampleShadingEnable - 1.0f, // float minSampleShading - nullptr, // const VkSampleMask* pSampleMask; - VK_FALSE, // VkBool32 alphaToCoverageEnable - VK_FALSE // VkBool32 alphaToOneEnable - }; -} - -static VkPipelineDepthStencilStateCreateInfo GetVulkanDepthStencilState(const DepthState& state) -{ - // Less/greater are swapped due to inverted depth. - static constexpr std::array funcs = { - {VK_COMPARE_OP_NEVER, VK_COMPARE_OP_GREATER, VK_COMPARE_OP_EQUAL, - VK_COMPARE_OP_GREATER_OR_EQUAL, VK_COMPARE_OP_LESS, VK_COMPARE_OP_NOT_EQUAL, - VK_COMPARE_OP_LESS_OR_EQUAL, VK_COMPARE_OP_ALWAYS}}; - - return { - VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkPipelineDepthStencilStateCreateFlags flags - state.testenable, // VkBool32 depthTestEnable - state.updateenable, // VkBool32 depthWriteEnable - funcs[state.func], // VkCompareOp depthCompareOp - VK_FALSE, // VkBool32 depthBoundsTestEnable - VK_FALSE, // VkBool32 stencilTestEnable - {}, // VkStencilOpState front - {}, // VkStencilOpState back - 0.0f, // float minDepthBounds - 1.0f // float maxDepthBounds - }; -} - -static VkPipelineColorBlendAttachmentState GetVulkanAttachmentBlendState(const BlendingState& state) -{ - VkPipelineColorBlendAttachmentState vk_state = {}; - vk_state.blendEnable = static_cast(state.blendenable); - vk_state.colorBlendOp = state.subtract ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD; - vk_state.alphaBlendOp = state.subtractAlpha ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD; - - if (state.usedualsrc && g_vulkan_context->SupportsDualSourceBlend()) - { - static constexpr std::array src_factors = { - {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR, - VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; - static constexpr std::array dst_factors = { - {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR, - VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; - - vk_state.srcColorBlendFactor = src_factors[state.srcfactor]; - vk_state.srcAlphaBlendFactor = src_factors[state.srcfactoralpha]; - vk_state.dstColorBlendFactor = dst_factors[state.dstfactor]; - vk_state.dstAlphaBlendFactor = dst_factors[state.dstfactoralpha]; - } - else - { - static constexpr std::array src_factors = { - {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR, - VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; - - static constexpr std::array dst_factors = { - {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR, - VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, - VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; - - vk_state.srcColorBlendFactor = src_factors[state.srcfactor]; - vk_state.srcAlphaBlendFactor = src_factors[state.srcfactoralpha]; - vk_state.dstColorBlendFactor = dst_factors[state.dstfactor]; - vk_state.dstAlphaBlendFactor = dst_factors[state.dstfactoralpha]; - } - - if (state.colorupdate) - { - vk_state.colorWriteMask = - VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT; - } - else - { - vk_state.colorWriteMask = 0; - } - - if (state.alphaupdate) - vk_state.colorWriteMask |= VK_COLOR_COMPONENT_A_BIT; - - return vk_state; -} - -static VkPipelineColorBlendStateCreateInfo -GetVulkanColorBlendState(const BlendingState& state, - const VkPipelineColorBlendAttachmentState* attachments, - uint32_t num_attachments) -{ - static constexpr std::array vk_logic_ops = { - {VK_LOGIC_OP_CLEAR, VK_LOGIC_OP_AND, VK_LOGIC_OP_AND_REVERSE, VK_LOGIC_OP_COPY, - VK_LOGIC_OP_AND_INVERTED, VK_LOGIC_OP_NO_OP, VK_LOGIC_OP_XOR, VK_LOGIC_OP_OR, - VK_LOGIC_OP_NOR, VK_LOGIC_OP_EQUIVALENT, VK_LOGIC_OP_INVERT, VK_LOGIC_OP_OR_REVERSE, - VK_LOGIC_OP_COPY_INVERTED, VK_LOGIC_OP_OR_INVERTED, VK_LOGIC_OP_NAND, VK_LOGIC_OP_SET}}; - - VkBool32 vk_logic_op_enable = static_cast(state.logicopenable); - if (vk_logic_op_enable && !g_vulkan_context->SupportsLogicOps()) - { - // At the time of writing, Adreno and Mali drivers didn't support logic ops. - // The "emulation" through blending path has been removed, so just disable it completely. - // These drivers don't support dual-source blend either, so issues are to be expected. - vk_logic_op_enable = VK_FALSE; - } - - VkLogicOp vk_logic_op = vk_logic_op_enable ? vk_logic_ops[state.logicmode] : VK_LOGIC_OP_CLEAR; - - VkPipelineColorBlendStateCreateInfo vk_state = { - VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkPipelineColorBlendStateCreateFlags flags - vk_logic_op_enable, // VkBool32 logicOpEnable - vk_logic_op, // VkLogicOp logicOp - num_attachments, // uint32_t attachmentCount - attachments, // const VkPipelineColorBlendAttachmentState* pAttachments - {1.0f, 1.0f, 1.0f, 1.0f} // float blendConstants[4] - }; - - return vk_state; -} - -VkPipeline ShaderCache::CreatePipeline(const PipelineInfo& info) -{ - // Declare descriptors for empty vertex buffers/attributes - static const VkPipelineVertexInputStateCreateInfo empty_vertex_input_state = { - VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkPipelineVertexInputStateCreateFlags flags - 0, // uint32_t vertexBindingDescriptionCount - nullptr, // const VkVertexInputBindingDescription* pVertexBindingDescriptions - 0, // uint32_t vertexAttributeDescriptionCount - nullptr // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions - }; - - // Vertex inputs - const VkPipelineVertexInputStateCreateInfo& vertex_input_state = - info.vertex_format ? info.vertex_format->GetVertexInputStateInfo() : empty_vertex_input_state; - - // Input assembly - static constexpr std::array vk_primitive_topologies = { - {VK_PRIMITIVE_TOPOLOGY_POINT_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_LIST, - VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP}}; - VkPipelineInputAssemblyStateCreateInfo input_assembly_state = { - VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, nullptr, 0, - vk_primitive_topologies[static_cast(info.rasterization_state.primitive.Value())], - VK_FALSE}; - - // See Vulkan spec, section 19: - // If topology is VK_PRIMITIVE_TOPOLOGY_POINT_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_LIST, - // VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, - // VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY or VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, - // primitiveRestartEnable must be VK_FALSE - if (g_ActiveConfig.backend_info.bSupportsPrimitiveRestart && - IsStripPrimitiveTopology(input_assembly_state.topology)) - { - input_assembly_state.primitiveRestartEnable = VK_TRUE; - } - - // Shaders to stages - VkPipelineShaderStageCreateInfo shader_stages[3]; - uint32_t num_shader_stages = 0; - if (info.vs != VK_NULL_HANDLE) - { - shader_stages[num_shader_stages++] = {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - nullptr, - 0, - VK_SHADER_STAGE_VERTEX_BIT, - info.vs, - "main"}; - } - if (info.gs != VK_NULL_HANDLE) - { - shader_stages[num_shader_stages++] = {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - nullptr, - 0, - VK_SHADER_STAGE_GEOMETRY_BIT, - info.gs, - "main"}; - } - if (info.ps != VK_NULL_HANDLE) - { - shader_stages[num_shader_stages++] = {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - nullptr, - 0, - VK_SHADER_STAGE_FRAGMENT_BIT, - info.ps, - "main"}; - } - - // Fill in Vulkan descriptor structs from our state structures. - VkPipelineRasterizationStateCreateInfo rasterization_state = - GetVulkanRasterizationState(info.rasterization_state); - VkPipelineMultisampleStateCreateInfo multisample_state = - GetVulkanMultisampleState(info.multisampling_state); - VkPipelineDepthStencilStateCreateInfo depth_stencil_state = - GetVulkanDepthStencilState(info.depth_state); - VkPipelineColorBlendAttachmentState blend_attachment_state = - GetVulkanAttachmentBlendState(info.blend_state); - VkPipelineColorBlendStateCreateInfo blend_state = - GetVulkanColorBlendState(info.blend_state, &blend_attachment_state, 1); - - // This viewport isn't used, but needs to be specified anyway. - static const VkViewport viewport = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; - static const VkRect2D scissor = {{0, 0}, {1, 1}}; - static const VkPipelineViewportStateCreateInfo viewport_state = { - VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - nullptr, - 0, // VkPipelineViewportStateCreateFlags flags; - 1, // uint32_t viewportCount - &viewport, // const VkViewport* pViewports - 1, // uint32_t scissorCount - &scissor // const VkRect2D* pScissors - }; - - // Set viewport and scissor dynamic state so we can change it elsewhere. - static const VkDynamicState dynamic_states[] = {VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR}; - static const VkPipelineDynamicStateCreateInfo dynamic_state = { - VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, nullptr, - 0, // VkPipelineDynamicStateCreateFlags flags - static_cast(ArraySize(dynamic_states)), // uint32_t dynamicStateCount - dynamic_states // const VkDynamicState* pDynamicStates - }; - - // Combine to full pipeline info structure. - VkGraphicsPipelineCreateInfo pipeline_info = { - VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - nullptr, // VkStructureType sType - 0, // VkPipelineCreateFlags flags - num_shader_stages, // uint32_t stageCount - shader_stages, // const VkPipelineShaderStageCreateInfo* pStages - &vertex_input_state, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState - &input_assembly_state, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState - nullptr, // const VkPipelineTessellationStateCreateInfo* pTessellationState - &viewport_state, // const VkPipelineViewportStateCreateInfo* pViewportState - &rasterization_state, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState - &multisample_state, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState - &depth_stencil_state, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState - &blend_state, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState - &dynamic_state, // const VkPipelineDynamicStateCreateInfo* pDynamicState - info.pipeline_layout, // VkPipelineLayout layout - info.render_pass, // VkRenderPass renderPass - 0, // uint32_t subpass - VK_NULL_HANDLE, // VkPipeline basePipelineHandle - -1 // int32_t basePipelineIndex - }; - - VkPipeline pipeline; - VkResult res = vkCreateGraphicsPipelines(g_vulkan_context->GetDevice(), m_pipeline_cache, 1, - &pipeline_info, nullptr, &pipeline); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateGraphicsPipelines failed: "); - return VK_NULL_HANDLE; - } - - return pipeline; -} - -VkPipeline ShaderCache::GetPipeline(const PipelineInfo& info) -{ - auto iter = m_pipeline_objects.find(info); - if (iter != m_pipeline_objects.end()) - return iter->second; - - VkPipeline pipeline = CreatePipeline(info); - m_pipeline_objects.emplace(info, pipeline); - return pipeline; -} - -VkPipeline ShaderCache::CreateComputePipeline(const ComputePipelineInfo& info) -{ - VkComputePipelineCreateInfo pipeline_info = {VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - nullptr, - 0, - {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - nullptr, 0, VK_SHADER_STAGE_COMPUTE_BIT, info.cs, - "main", nullptr}, - info.pipeline_layout, - VK_NULL_HANDLE, - -1}; - - VkPipeline pipeline; - VkResult res = vkCreateComputePipelines(g_vulkan_context->GetDevice(), VK_NULL_HANDLE, 1, - &pipeline_info, nullptr, &pipeline); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateComputePipelines failed: "); - return VK_NULL_HANDLE; - } - - return pipeline; -} - -VkPipeline ShaderCache::GetComputePipeline(const ComputePipelineInfo& info) -{ - auto iter = m_compute_pipeline_objects.find(info); - if (iter != m_compute_pipeline_objects.end()) - return iter->second; - - VkPipeline pipeline = CreateComputePipeline(info); - m_compute_pipeline_objects.emplace(info, pipeline); - return pipeline; -} - -void ShaderCache::ClearPipelineCache() -{ - for (const auto& it : m_pipeline_objects) - { - if (it.second != VK_NULL_HANDLE) - vkDestroyPipeline(g_vulkan_context->GetDevice(), it.second, nullptr); - } - m_pipeline_objects.clear(); - - for (const auto& it : m_compute_pipeline_objects) - { - if (it.second != VK_NULL_HANDLE) - vkDestroyPipeline(g_vulkan_context->GetDevice(), it.second, nullptr); - } - m_compute_pipeline_objects.clear(); -} - -class PipelineCacheReadCallback : public LinearDiskCacheReader -{ -public: - PipelineCacheReadCallback(std::vector* data) : m_data(data) {} - void Read(const u32& key, const u8* value, u32 value_size) override - { - m_data->resize(value_size); - if (value_size > 0) - memcpy(m_data->data(), value, value_size); - } - -private: - std::vector* m_data; -}; - -class PipelineCacheReadIgnoreCallback : public LinearDiskCacheReader -{ -public: - void Read(const u32& key, const u8* value, u32 value_size) override {} -}; - -bool ShaderCache::CreatePipelineCache() -{ - // Vulkan pipeline caches can be shared between games for shader compile time reduction. - // This assumes that drivers don't create all pipelines in the cache on load time, only - // when a lookup occurs that matches a pipeline (or pipeline data) in the cache. - m_pipeline_cache_filename = GetDiskShaderCacheFileName(APIType::Vulkan, "Pipeline", false, true); - - VkPipelineCacheCreateInfo info = { - VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkPipelineCacheCreateFlags flags - 0, // size_t initialDataSize - nullptr // const void* pInitialData - }; - - VkResult res = - vkCreatePipelineCache(g_vulkan_context->GetDevice(), &info, nullptr, &m_pipeline_cache); - if (res == VK_SUCCESS) - return true; - - LOG_VULKAN_ERROR(res, "vkCreatePipelineCache failed: "); - return false; -} - -bool ShaderCache::LoadPipelineCache() -{ - // We have to keep the pipeline cache file name around since when we save it - // we delete the old one, by which time the game's unique ID is already cleared. - m_pipeline_cache_filename = GetDiskShaderCacheFileName(APIType::Vulkan, "Pipeline", false, true); - - std::vector disk_data; - LinearDiskCache disk_cache; - PipelineCacheReadCallback read_callback(&disk_data); - if (disk_cache.OpenAndRead(m_pipeline_cache_filename, read_callback) != 1) - disk_data.clear(); - - if (!disk_data.empty() && !ValidatePipelineCache(disk_data.data(), disk_data.size())) - { - // Don't use this data. In fact, we should delete it to prevent it from being used next time. - File::Delete(m_pipeline_cache_filename); - return CreatePipelineCache(); - } - - VkPipelineCacheCreateInfo info = { - VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkPipelineCacheCreateFlags flags - disk_data.size(), // size_t initialDataSize - disk_data.data() // const void* pInitialData - }; - - VkResult res = - vkCreatePipelineCache(g_vulkan_context->GetDevice(), &info, nullptr, &m_pipeline_cache); - if (res == VK_SUCCESS) - return true; - - // Failed to create pipeline cache, try with it empty. - LOG_VULKAN_ERROR(res, "vkCreatePipelineCache failed, trying empty cache: "); - return CreatePipelineCache(); -} - -// Based on Vulkan 1.0 specification, -// Table 9.1. Layout for pipeline cache header version VK_PIPELINE_CACHE_HEADER_VERSION_ONE -// NOTE: This data is assumed to be in little-endian format. -#pragma pack(push, 4) -struct VK_PIPELINE_CACHE_HEADER -{ - u32 header_length; - u32 header_version; - u32 vendor_id; - u32 device_id; - u8 uuid[VK_UUID_SIZE]; -}; -#pragma pack(pop) -static_assert(std::is_trivially_copyable::value, - "VK_PIPELINE_CACHE_HEADER must be trivially copyable"); - -bool ShaderCache::ValidatePipelineCache(const u8* data, size_t data_length) -{ - if (data_length < sizeof(VK_PIPELINE_CACHE_HEADER)) - { - ERROR_LOG(VIDEO, "Pipeline cache failed validation: Invalid header"); - return false; - } - - VK_PIPELINE_CACHE_HEADER header; - std::memcpy(&header, data, sizeof(header)); - if (header.header_length < sizeof(VK_PIPELINE_CACHE_HEADER)) - { - ERROR_LOG(VIDEO, "Pipeline cache failed validation: Invalid header length"); - return false; - } - - if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) - { - ERROR_LOG(VIDEO, "Pipeline cache failed validation: Invalid header version"); - return false; - } - - if (header.vendor_id != g_vulkan_context->GetDeviceProperties().vendorID) - { - ERROR_LOG(VIDEO, - "Pipeline cache failed validation: Incorrect vendor ID (file: 0x%X, device: 0x%X)", - header.vendor_id, g_vulkan_context->GetDeviceProperties().vendorID); - return false; - } - - if (header.device_id != g_vulkan_context->GetDeviceProperties().deviceID) - { - ERROR_LOG(VIDEO, - "Pipeline cache failed validation: Incorrect device ID (file: 0x%X, device: 0x%X)", - header.device_id, g_vulkan_context->GetDeviceProperties().deviceID); - return false; - } - - if (std::memcmp(header.uuid, g_vulkan_context->GetDeviceProperties().pipelineCacheUUID, - VK_UUID_SIZE) != 0) - { - ERROR_LOG(VIDEO, "Pipeline cache failed validation: Incorrect UUID"); - return false; - } - - return true; -} - -void ShaderCache::DestroyPipelineCache() -{ - ClearPipelineCache(); - vkDestroyPipelineCache(g_vulkan_context->GetDevice(), m_pipeline_cache, nullptr); - m_pipeline_cache = VK_NULL_HANDLE; -} - -void ShaderCache::SavePipelineCache() -{ - size_t data_size; - VkResult res = - vkGetPipelineCacheData(g_vulkan_context->GetDevice(), m_pipeline_cache, &data_size, nullptr); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkGetPipelineCacheData failed: "); - return; - } - - std::vector data(data_size); - res = vkGetPipelineCacheData(g_vulkan_context->GetDevice(), m_pipeline_cache, &data_size, - data.data()); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkGetPipelineCacheData failed: "); - return; - } - - // Delete the old cache and re-create. - File::Delete(m_pipeline_cache_filename); - - // We write a single key of 1, with the entire pipeline cache data. - // Not ideal, but our disk cache class does not support just writing a single blob - // of data without specifying a key. - LinearDiskCache disk_cache; - PipelineCacheReadIgnoreCallback callback; - disk_cache.OpenAndRead(m_pipeline_cache_filename, callback); - disk_cache.Append(1, data.data(), static_cast(data.size())); - disk_cache.Close(); -} - -void ShaderCache::RecompileSharedShaders() -{ - DestroySharedShaders(); - if (!CompileSharedShaders()) - PanicAlert("Failed to recompile shared shaders."); -} - -void ShaderCache::ReloadPipelineCache() -{ - SavePipelineCache(); - DestroyPipelineCache(); - - if (g_ActiveConfig.bShaderCache) - LoadPipelineCache(); - else - CreatePipelineCache(); -} - -std::string ShaderCache::GetUtilityShaderHeader() const -{ - std::stringstream ss; - if (g_ActiveConfig.iMultisamples > 1) - { - ss << "#define MSAA_ENABLED 1" << std::endl; - ss << "#define MSAA_SAMPLES " << g_ActiveConfig.iMultisamples << std::endl; - if (g_ActiveConfig.bSSAA) - ss << "#define SSAA_ENABLED 1" << std::endl; - } - - u32 efb_layers = (g_ActiveConfig.stereo_mode != StereoMode::Off) ? 2 : 1; - ss << "#define EFB_LAYERS " << efb_layers << std::endl; - - return ss.str(); -} - -std::size_t PipelineInfoHash::operator()(const PipelineInfo& key) const -{ - return static_cast(XXH64(&key, sizeof(key), 0)); -} - -bool operator==(const PipelineInfo& lhs, const PipelineInfo& rhs) -{ - return std::memcmp(&lhs, &rhs, sizeof(lhs)) == 0; -} - -bool operator!=(const PipelineInfo& lhs, const PipelineInfo& rhs) -{ - return !operator==(lhs, rhs); -} - -bool operator<(const PipelineInfo& lhs, const PipelineInfo& rhs) -{ - return std::memcmp(&lhs, &rhs, sizeof(lhs)) < 0; -} - -bool operator>(const PipelineInfo& lhs, const PipelineInfo& rhs) -{ - return std::memcmp(&lhs, &rhs, sizeof(lhs)) > 0; -} - -std::size_t ComputePipelineInfoHash::operator()(const ComputePipelineInfo& key) const -{ - return static_cast(XXH64(&key, sizeof(key), 0)); -} - -bool operator==(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs) -{ - return std::memcmp(&lhs, &rhs, sizeof(lhs)) == 0; -} - -bool operator!=(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs) -{ - return !operator==(lhs, rhs); -} - -bool operator<(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs) -{ - return std::memcmp(&lhs, &rhs, sizeof(lhs)) < 0; -} - -bool operator>(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs) -{ - return std::memcmp(&lhs, &rhs, sizeof(lhs)) > 0; -} - -bool ShaderCache::CompileSharedShaders() -{ - static const char PASSTHROUGH_VERTEX_SHADER_SOURCE[] = R"( - layout(location = 0) in vec4 ipos; - layout(location = 5) in vec4 icol0; - layout(location = 8) in vec3 itex0; - - layout(location = 0) out vec3 uv0; - layout(location = 1) out vec4 col0; - - void main() - { - gl_Position = ipos; - uv0 = itex0; - col0 = icol0; - } - )"; - - static const char PASSTHROUGH_GEOMETRY_SHADER_SOURCE[] = R"( - layout(triangles) in; - layout(triangle_strip, max_vertices = EFB_LAYERS * 3) out; - - layout(location = 0) in vec3 in_uv0[]; - layout(location = 1) in vec4 in_col0[]; - - layout(location = 0) out vec3 out_uv0; - layout(location = 1) out vec4 out_col0; - - void main() - { - for (int j = 0; j < EFB_LAYERS; j++) - { - for (int i = 0; i < 3; i++) - { - gl_Layer = j; - gl_Position = gl_in[i].gl_Position; - out_uv0 = vec3(in_uv0[i].xy, float(j)); - out_col0 = in_col0[i]; - EmitVertex(); - } - EndPrimitive(); - } - } - )"; - - static const char SCREEN_QUAD_VERTEX_SHADER_SOURCE[] = R"( - layout(location = 0) out vec3 uv0; - - void main() - { - /* - * id &1 &2 clamp(*2-1) - * 0 0,0 0,0 -1,-1 TL - * 1 1,0 1,0 1,-1 TR - * 2 0,2 0,1 -1,1 BL - * 3 1,2 1,1 1,1 BR - */ - vec2 rawpos = vec2(float(gl_VertexID & 1), clamp(float(gl_VertexID & 2), 0.0f, 1.0f)); - gl_Position = vec4(rawpos * 2.0f - 1.0f, 0.0f, 1.0f); - uv0 = vec3(rawpos, 0.0f); - } - )"; - - static const char SCREEN_QUAD_GEOMETRY_SHADER_SOURCE[] = R"( - layout(triangles) in; - layout(triangle_strip, max_vertices = EFB_LAYERS * 3) out; - - layout(location = 0) in vec3 in_uv0[]; - - layout(location = 0) out vec3 out_uv0; - - void main() - { - for (int j = 0; j < EFB_LAYERS; j++) - { - for (int i = 0; i < 3; i++) - { - gl_Layer = j; - gl_Position = gl_in[i].gl_Position; - out_uv0 = vec3(in_uv0[i].xy, float(j)); - EmitVertex(); - } - EndPrimitive(); - } - } - )"; - - static const char CLEAR_FRAGMENT_SHADER_SOURCE[] = R"( - layout(location = 0) in float3 uv0; - layout(location = 1) in float4 col0; - layout(location = 0) out float4 ocol0; - - void main() - { - ocol0 = col0; - } - - )"; - - const std::string header = GetUtilityShaderHeader(); - - m_screen_quad_vertex_shader = - Util::CompileAndCreateVertexShader(header + SCREEN_QUAD_VERTEX_SHADER_SOURCE); - m_passthrough_vertex_shader = - Util::CompileAndCreateVertexShader(header + PASSTHROUGH_VERTEX_SHADER_SOURCE); - if (m_screen_quad_vertex_shader == VK_NULL_HANDLE || - m_passthrough_vertex_shader == VK_NULL_HANDLE) - { - return false; - } - - if (g_ActiveConfig.stereo_mode != StereoMode::Off && g_vulkan_context->SupportsGeometryShaders()) - { - m_screen_quad_geometry_shader = - Util::CompileAndCreateGeometryShader(header + SCREEN_QUAD_GEOMETRY_SHADER_SOURCE); - m_passthrough_geometry_shader = - Util::CompileAndCreateGeometryShader(header + PASSTHROUGH_GEOMETRY_SHADER_SOURCE); - if (m_screen_quad_geometry_shader == VK_NULL_HANDLE || - m_passthrough_geometry_shader == VK_NULL_HANDLE) - { - return false; - } - } - - m_clear_fragment_shader = - Util::CompileAndCreateFragmentShader(header + CLEAR_FRAGMENT_SHADER_SOURCE); - if (m_clear_fragment_shader == VK_NULL_HANDLE) - return false; - - return true; -} - -void ShaderCache::DestroySharedShaders() -{ - auto DestroyShader = [this](VkShaderModule& shader) { - if (shader != VK_NULL_HANDLE) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), shader, nullptr); - shader = VK_NULL_HANDLE; - } - }; - - DestroyShader(m_screen_quad_vertex_shader); - DestroyShader(m_passthrough_vertex_shader); - DestroyShader(m_screen_quad_geometry_shader); - DestroyShader(m_passthrough_geometry_shader); - DestroyShader(m_clear_fragment_shader); -} -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/ShaderCache.h b/Source/Core/VideoBackends/Vulkan/ShaderCache.h deleted file mode 100644 index 51060358e2..0000000000 --- a/Source/Core/VideoBackends/Vulkan/ShaderCache.h +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "Common/CommonTypes.h" -#include "Common/LinearDiskCache.h" - -#include "VideoBackends/Vulkan/Constants.h" -#include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/ShaderCompiler.h" - -#include "VideoCommon/RenderState.h" - -namespace Vulkan -{ -class CommandBufferManager; -class VertexFormat; -class StreamBuffer; - -struct PipelineInfo -{ - // These are packed in descending order of size, to avoid any padding so that the structure - // can be copied/compared as a single block of memory. 64-bit pointer size is assumed. - const VertexFormat* vertex_format; - VkPipelineLayout pipeline_layout; - VkShaderModule vs; - VkShaderModule gs; - VkShaderModule ps; - VkRenderPass render_pass; - BlendingState blend_state; - RasterizationState rasterization_state; - DepthState depth_state; - MultisamplingState multisampling_state; -}; - -struct PipelineInfoHash -{ - std::size_t operator()(const PipelineInfo& key) const; -}; - -bool operator==(const PipelineInfo& lhs, const PipelineInfo& rhs); -bool operator!=(const PipelineInfo& lhs, const PipelineInfo& rhs); -bool operator<(const PipelineInfo& lhs, const PipelineInfo& rhs); -bool operator>(const PipelineInfo& lhs, const PipelineInfo& rhs); - -struct ComputePipelineInfo -{ - VkPipelineLayout pipeline_layout; - VkShaderModule cs; -}; - -struct ComputePipelineInfoHash -{ - std::size_t operator()(const ComputePipelineInfo& key) const; -}; - -bool operator==(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs); -bool operator!=(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs); -bool operator<(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs); -bool operator>(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs); - -class ShaderCache -{ -public: - ShaderCache(); - ~ShaderCache(); - - // Get utility shader header based on current config. - std::string GetUtilityShaderHeader() const; - - // Perform at startup, create descriptor layouts, compiles all static shaders. - bool Initialize(); - void Shutdown(); - - // Creates a pipeline for the specified description. The resulting pipeline, if successful - // is not stored anywhere, this is left up to the caller. - VkPipeline CreatePipeline(const PipelineInfo& info); - - // Find a pipeline by the specified description, if not found, attempts to create it. - VkPipeline GetPipeline(const PipelineInfo& info); - - // Creates a compute pipeline, and does not track the handle. - VkPipeline CreateComputePipeline(const ComputePipelineInfo& info); - - // Find a pipeline by the specified description, if not found, attempts to create it - VkPipeline GetComputePipeline(const ComputePipelineInfo& info); - - // Clears our pipeline cache of all objects. This is necessary when recompiling shaders, - // as drivers are free to return the same pointer again, which means that we may end up using - // and old pipeline object if they are not cleared first. Some stutter may be experienced - // while our cache is rebuilt on use, but the pipeline cache object should mitigate this. - // NOTE: Ensure that none of these objects are in use before calling. - void ClearPipelineCache(); - - // Saves the pipeline cache to disk. Call when shutting down. - void SavePipelineCache(); - - // Recompile shared shaders, call when stereo mode changes. - void RecompileSharedShaders(); - - // Reload pipeline cache. This will destroy all pipelines. - void ReloadPipelineCache(); - - // Shared shader accessors - VkShaderModule GetScreenQuadVertexShader() const { return m_screen_quad_vertex_shader; } - VkShaderModule GetPassthroughVertexShader() const { return m_passthrough_vertex_shader; } - VkShaderModule GetScreenQuadGeometryShader() const { return m_screen_quad_geometry_shader; } - VkShaderModule GetPassthroughGeometryShader() const { return m_passthrough_geometry_shader; } - VkShaderModule GetClearFragmentShader() const { return m_clear_fragment_shader; } - -private: - bool CreatePipelineCache(); - bool LoadPipelineCache(); - bool ValidatePipelineCache(const u8* data, size_t data_length); - void DestroyPipelineCache(); - bool CompileSharedShaders(); - void DestroySharedShaders(); - - std::unordered_map m_pipeline_objects; - std::unordered_map - m_compute_pipeline_objects; - VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE; - std::string m_pipeline_cache_filename; - - // Utility/shared shaders - VkShaderModule m_screen_quad_vertex_shader = VK_NULL_HANDLE; - VkShaderModule m_passthrough_vertex_shader = VK_NULL_HANDLE; - VkShaderModule m_screen_quad_geometry_shader = VK_NULL_HANDLE; - VkShaderModule m_passthrough_geometry_shader = VK_NULL_HANDLE; - VkShaderModule m_clear_fragment_shader = VK_NULL_HANDLE; -}; - -extern std::unique_ptr g_shader_cache; - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp index d430c8bab9..abe6df2653 100644 --- a/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp +++ b/Source/Core/VideoBackends/Vulkan/ShaderCompiler.cpp @@ -53,12 +53,13 @@ static const char SHADER_HEADER[] = R"( #define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y) layout(location = x, index = y) #define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1)) #define SAMPLER_BINDING(x) layout(set = 1, binding = x) + #define TEXEL_BUFFER_BINDING(x) layout(set = 1, binding = (x + 8)) #define SSBO_BINDING(x) layout(set = 2, binding = x) - #define TEXEL_BUFFER_BINDING(x) layout(set = 2, binding = x) #define VARYING_LOCATION(x) layout(location = x) #define FORCE_EARLY_Z layout(early_fragment_tests) in // hlsl to glsl function translation + #define API_VULKAN 1 #define float2 vec2 #define float3 vec3 #define float4 vec4 @@ -79,12 +80,13 @@ static const char COMPUTE_SHADER_HEADER[] = R"( // Target GLSL 4.5. #version 450 core // All resources are packed into one descriptor set for compute. - #define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (0 + x)) + #define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (x - 1)) #define SAMPLER_BINDING(x) layout(set = 0, binding = (1 + x)) - #define TEXEL_BUFFER_BINDING(x) layout(set = 0, binding = (5 + x)) - #define IMAGE_BINDING(format, x) layout(format, set = 0, binding = (7 + x)) + #define TEXEL_BUFFER_BINDING(x) layout(set = 0, binding = (3 + x)) + #define IMAGE_BINDING(format, x) layout(format, set = 0, binding = (5 + x)) // hlsl to glsl function translation + #define API_VULKAN 1 #define float2 vec2 #define float3 vec3 #define float4 vec4 diff --git a/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp b/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp index 7a2c238489..50d7c2f16a 100644 --- a/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp +++ b/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp @@ -9,7 +9,6 @@ #include "VideoBackends/Vulkan/CommandBufferManager.h" #include "VideoBackends/Vulkan/StagingBuffer.h" -#include "VideoBackends/Vulkan/Util.h" #include "VideoBackends/Vulkan/VulkanContext.h" namespace Vulkan @@ -30,6 +29,28 @@ StagingBuffer::~StagingBuffer() g_command_buffer_mgr->DeferBufferDestruction(m_buffer); } +void StagingBuffer::BufferMemoryBarrier(VkCommandBuffer command_buffer, VkBuffer buffer, + VkAccessFlags src_access_mask, + VkAccessFlags dst_access_mask, VkDeviceSize offset, + VkDeviceSize size, VkPipelineStageFlags src_stage_mask, + VkPipelineStageFlags dst_stage_mask) +{ + VkBufferMemoryBarrier buffer_info = { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType + nullptr, // const void* pNext + src_access_mask, // VkAccessFlags srcAccessMask + dst_access_mask, // VkAccessFlags dstAccessMask + VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex + VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex + buffer, // VkBuffer buffer + offset, // VkDeviceSize offset + size // VkDeviceSize size + }; + + vkCmdPipelineBarrier(command_buffer, src_stage_mask, dst_stage_mask, 0, 0, nullptr, 1, + &buffer_info, 0, nullptr); +} + bool StagingBuffer::Map(VkDeviceSize offset, VkDeviceSize size) { m_map_offset = offset; @@ -84,8 +105,8 @@ void StagingBuffer::InvalidateGPUCache(VkCommandBuffer command_buffer, return; ASSERT((offset + size) <= m_size || (offset < m_size && size == VK_WHOLE_SIZE)); - Util::BufferMemoryBarrier(command_buffer, m_buffer, VK_ACCESS_HOST_WRITE_BIT, dest_access_flags, - offset, size, VK_PIPELINE_STAGE_HOST_BIT, dest_pipeline_stage); + BufferMemoryBarrier(command_buffer, m_buffer, VK_ACCESS_HOST_WRITE_BIT, dest_access_flags, offset, + size, VK_PIPELINE_STAGE_HOST_BIT, dest_pipeline_stage); } void StagingBuffer::PrepareForGPUWrite(VkCommandBuffer command_buffer, @@ -97,8 +118,8 @@ void StagingBuffer::PrepareForGPUWrite(VkCommandBuffer command_buffer, return; ASSERT((offset + size) <= m_size || (offset < m_size && size == VK_WHOLE_SIZE)); - Util::BufferMemoryBarrier(command_buffer, m_buffer, 0, dst_access_flags, offset, size, - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, dst_pipeline_stage); + BufferMemoryBarrier(command_buffer, m_buffer, 0, dst_access_flags, offset, size, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, dst_pipeline_stage); } void StagingBuffer::FlushGPUCache(VkCommandBuffer command_buffer, VkAccessFlagBits src_access_flags, @@ -109,8 +130,8 @@ void StagingBuffer::FlushGPUCache(VkCommandBuffer command_buffer, VkAccessFlagBi return; ASSERT((offset + size) <= m_size || (offset < m_size && size == VK_WHOLE_SIZE)); - Util::BufferMemoryBarrier(command_buffer, m_buffer, src_access_flags, VK_ACCESS_HOST_READ_BIT, - offset, size, src_pipeline_stage, VK_PIPELINE_STAGE_HOST_BIT); + BufferMemoryBarrier(command_buffer, m_buffer, src_access_flags, VK_ACCESS_HOST_READ_BIT, offset, + size, src_pipeline_stage, VK_PIPELINE_STAGE_HOST_BIT); } void StagingBuffer::InvalidateCPUCache(VkDeviceSize offset, VkDeviceSize size) diff --git a/Source/Core/VideoBackends/Vulkan/StagingBuffer.h b/Source/Core/VideoBackends/Vulkan/StagingBuffer.h index 2ecb21cb22..bf2f9fb2e5 100644 --- a/Source/Core/VideoBackends/Vulkan/StagingBuffer.h +++ b/Source/Core/VideoBackends/Vulkan/StagingBuffer.h @@ -63,6 +63,13 @@ public: static bool AllocateBuffer(STAGING_BUFFER_TYPE type, VkDeviceSize size, VkBufferUsageFlags usage, VkBuffer* out_buffer, VkDeviceMemory* out_memory, bool* out_coherent); + // Wrapper for creating an barrier on a buffer + static void BufferMemoryBarrier(VkCommandBuffer command_buffer, VkBuffer buffer, + VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, + VkDeviceSize offset, VkDeviceSize size, + VkPipelineStageFlags src_stage_mask, + VkPipelineStageFlags dst_stage_mask); + protected: STAGING_BUFFER_TYPE m_type; VkBuffer m_buffer; @@ -74,4 +81,4 @@ protected: VkDeviceSize m_map_offset = 0; VkDeviceSize m_map_size = 0; }; -} +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/StateTracker.cpp b/Source/Core/VideoBackends/Vulkan/StateTracker.cpp index 089b385ffe..6bb1b8981d 100644 --- a/Source/Core/VideoBackends/Vulkan/StateTracker.cpp +++ b/Source/Core/VideoBackends/Vulkan/StateTracker.cpp @@ -4,32 +4,25 @@ #include "VideoBackends/Vulkan/StateTracker.h" -#include - -#include "Common/Align.h" #include "Common/Assert.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/Constants.h" #include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/ShaderCache.h" -#include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/Renderer.h" #include "VideoBackends/Vulkan/VKPipeline.h" +#include "VideoBackends/Vulkan/VKShader.h" +#include "VideoBackends/Vulkan/VKTexture.h" #include "VideoBackends/Vulkan/VertexFormat.h" #include "VideoBackends/Vulkan/VulkanContext.h" -#include "VideoCommon/GeometryShaderManager.h" -#include "VideoCommon/PixelShaderManager.h" -#include "VideoCommon/Statistics.h" -#include "VideoCommon/VertexLoaderManager.h" -#include "VideoCommon/VertexShaderManager.h" -#include "VideoCommon/VideoConfig.h" - namespace Vulkan { static std::unique_ptr s_state_tracker; +StateTracker::StateTracker() = default; + +StateTracker::~StateTracker() = default; + StateTracker* StateTracker::GetInstance() { return s_state_tracker.get(); @@ -49,46 +42,37 @@ bool StateTracker::CreateInstance() void StateTracker::DestroyInstance() { + if (!s_state_tracker) + return; + + // When the dummy texture is destroyed, it unbinds itself, then references itself. + // Clear everything out so this doesn't happen. + for (auto& it : s_state_tracker->m_bindings.samplers) + it.imageView = VK_NULL_HANDLE; + s_state_tracker->m_bindings.image_texture.imageView = VK_NULL_HANDLE; + s_state_tracker->m_dummy_texture.reset(); + s_state_tracker.reset(); } bool StateTracker::Initialize() { + // Create a dummy texture which can be used in place of a real binding. + m_dummy_texture = + VKTexture::Create(TextureConfig(1, 1, 1, 1, 1, AbstractTextureFormat::RGBA8, 0)); + if (!m_dummy_texture) + return false; + // Initialize all samplers to point by default for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++) { - m_bindings.ps_samplers[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - m_bindings.ps_samplers[i].imageView = g_object_cache->GetDummyImageView(); - m_bindings.ps_samplers[i].sampler = g_object_cache->GetPointSampler(); + m_bindings.samplers[i].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + m_bindings.samplers[i].imageView = m_dummy_texture->GetView(); + m_bindings.samplers[i].sampler = g_object_cache->GetPointSampler(); } - // Create the streaming uniform buffer - m_uniform_stream_buffer = - StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, INITIAL_UNIFORM_STREAM_BUFFER_SIZE, - MAXIMUM_UNIFORM_STREAM_BUFFER_SIZE); - if (!m_uniform_stream_buffer) - { - PanicAlert("Failed to create uniform stream buffer"); - return false; - } - - // The validation layer complains if max(offsets) + max(ubo_ranges) >= ubo_size. - // To work around this we reserve the maximum buffer size at all times, but only commit - // as many bytes as we use. - m_uniform_buffer_reserve_size = sizeof(PixelShaderConstants); - m_uniform_buffer_reserve_size = Common::AlignUp(m_uniform_buffer_reserve_size, - g_vulkan_context->GetUniformBufferAlignment()) + - sizeof(VertexShaderConstants); - m_uniform_buffer_reserve_size = Common::AlignUp(m_uniform_buffer_reserve_size, - g_vulkan_context->GetUniformBufferAlignment()) + - sizeof(GeometryShaderConstants); - // Default dirty flags include all descriptors - InvalidateDescriptorSets(); - SetPendingRebind(); - - // Set default constants - UploadAllConstants(); + InvalidateCachedState(); return true; } @@ -113,20 +97,11 @@ void StateTracker::SetIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexT m_dirty_flags |= DIRTY_FLAG_INDEX_BUFFER; } -void StateTracker::SetRenderPass(VkRenderPass load_render_pass, VkRenderPass clear_render_pass) -{ - // Should not be changed within a render pass. - ASSERT(!InRenderPass()); - m_load_render_pass = load_render_pass; - m_clear_render_pass = clear_render_pass; -} - -void StateTracker::SetFramebuffer(VkFramebuffer framebuffer, const VkRect2D& render_area) +void StateTracker::SetFramebuffer(VKFramebuffer* framebuffer) { // Should not be changed within a render pass. ASSERT(!InRenderPass()); m_framebuffer = framebuffer; - m_framebuffer_size = render_area; } void StateTracker::SetPipeline(const VKPipeline* pipeline) @@ -134,264 +109,143 @@ void StateTracker::SetPipeline(const VKPipeline* pipeline) if (m_pipeline == pipeline) return; + // If the usage changes, we need to re-bind everything, as the layout is different. const bool new_usage = pipeline && (!m_pipeline || m_pipeline->GetUsage() != pipeline->GetUsage()); m_pipeline = pipeline; m_dirty_flags |= DIRTY_FLAG_PIPELINE; if (new_usage) - m_dirty_flags |= DIRTY_FLAG_ALL_DESCRIPTOR_SETS; + m_dirty_flags |= DIRTY_FLAG_DESCRIPTOR_SETS; } -void StateTracker::UpdateVertexShaderConstants() +void StateTracker::SetComputeShader(const VKShader* shader) { - if (!VertexShaderManager::dirty || !ReserveConstantStorage()) + if (m_compute_shader == shader) return; - // Buffer allocation changed? - if (m_uniform_stream_buffer->GetBuffer() != - m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_VS].buffer) - { - m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_VS].buffer = - m_uniform_stream_buffer->GetBuffer(); - m_dirty_flags |= DIRTY_FLAG_VS_UBO; - } - - m_bindings.uniform_buffer_offsets[UBO_DESCRIPTOR_SET_BINDING_VS] = - static_cast(m_uniform_stream_buffer->GetCurrentOffset()); - m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS; - - memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &VertexShaderManager::constants, - sizeof(VertexShaderConstants)); - ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(VertexShaderConstants)); - m_uniform_stream_buffer->CommitMemory(sizeof(VertexShaderConstants)); - VertexShaderManager::dirty = false; + m_compute_shader = shader; + m_dirty_flags |= DIRTY_FLAG_COMPUTE_SHADER; } -void StateTracker::UpdateGeometryShaderConstants() +void StateTracker::SetGXUniformBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size) { - if (!GeometryShaderManager::dirty || !ReserveConstantStorage()) + auto& binding = m_bindings.gx_ubo_bindings[index]; + if (binding.buffer != buffer || binding.range != size) + { + binding.buffer = buffer; + binding.range = size; + m_dirty_flags |= DIRTY_FLAG_GX_UBOS; + } + + if (m_bindings.gx_ubo_offsets[index] != offset) + { + m_bindings.gx_ubo_offsets[index] = offset; + m_dirty_flags |= DIRTY_FLAG_GX_UBO_OFFSETS; + } +} + +void StateTracker::SetUtilityUniformBuffer(VkBuffer buffer, u32 offset, u32 size) +{ + auto& binding = m_bindings.utility_ubo_binding; + if (binding.buffer != buffer || binding.range != size) + { + binding.buffer = buffer; + binding.range = size; + m_dirty_flags |= DIRTY_FLAG_UTILITY_UBO; + } + + if (m_bindings.utility_ubo_offset != offset) + { + m_bindings.utility_ubo_offset = offset; + m_dirty_flags |= DIRTY_FLAG_UTILITY_UBO_OFFSET | DIRTY_FLAG_COMPUTE_DESCRIPTOR_SET; + } +} + +void StateTracker::SetTexture(u32 index, VkImageView view) +{ + if (m_bindings.samplers[index].imageView == view) return; - // Buffer allocation changed? - if (m_uniform_stream_buffer->GetBuffer() != - m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_GS].buffer) - { - m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_GS].buffer = - m_uniform_stream_buffer->GetBuffer(); - m_dirty_flags |= DIRTY_FLAG_GS_UBO; - } - - m_bindings.uniform_buffer_offsets[UBO_DESCRIPTOR_SET_BINDING_GS] = - static_cast(m_uniform_stream_buffer->GetCurrentOffset()); - m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS; - - memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &GeometryShaderManager::constants, - sizeof(GeometryShaderConstants)); - ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(GeometryShaderConstants)); - m_uniform_stream_buffer->CommitMemory(sizeof(GeometryShaderConstants)); - GeometryShaderManager::dirty = false; + m_bindings.samplers[index].imageView = view; + m_bindings.samplers[index].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + m_dirty_flags |= + DIRTY_FLAG_GX_SAMPLERS | DIRTY_FLAG_UTILITY_BINDINGS | DIRTY_FLAG_COMPUTE_BINDINGS; } -void StateTracker::UpdatePixelShaderConstants() +void StateTracker::SetSampler(u32 index, VkSampler sampler) { - if (!PixelShaderManager::dirty || !ReserveConstantStorage()) + if (m_bindings.samplers[index].sampler == sampler) return; - // Buffer allocation changed? - if (m_uniform_stream_buffer->GetBuffer() != - m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_PS].buffer) - { - m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_PS].buffer = - m_uniform_stream_buffer->GetBuffer(); - m_dirty_flags |= DIRTY_FLAG_PS_UBO; - } - - m_bindings.uniform_buffer_offsets[UBO_DESCRIPTOR_SET_BINDING_PS] = - static_cast(m_uniform_stream_buffer->GetCurrentOffset()); - m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS; - - memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &PixelShaderManager::constants, - sizeof(PixelShaderConstants)); - ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(PixelShaderConstants)); - m_uniform_stream_buffer->CommitMemory(sizeof(PixelShaderConstants)); - PixelShaderManager::dirty = false; + m_bindings.samplers[index].sampler = sampler; + m_dirty_flags |= + DIRTY_FLAG_GX_SAMPLERS | DIRTY_FLAG_UTILITY_BINDINGS | DIRTY_FLAG_COMPUTE_BINDINGS; } -void StateTracker::UpdateConstants(const void* data, u32 data_size) +void StateTracker::SetSSBO(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize range) { - if (!m_uniform_stream_buffer->ReserveMemory( - data_size, g_vulkan_context->GetUniformBufferAlignment(), true, true, false)) - { - WARN_LOG(VIDEO, "Executing command buffer while waiting for ext space in uniform buffer"); - Util::ExecuteCurrentCommandsAndRestoreState(false); - } - - for (u32 binding = 0; binding < NUM_UBO_DESCRIPTOR_SET_BINDINGS; binding++) - { - if (m_bindings.uniform_buffer_bindings[binding].buffer != m_uniform_stream_buffer->GetBuffer()) - { - m_bindings.uniform_buffer_bindings[binding].buffer = m_uniform_stream_buffer->GetBuffer(); - m_dirty_flags |= DIRTY_FLAG_VS_UBO << binding; - } - m_bindings.uniform_buffer_offsets[binding] = - static_cast(m_uniform_stream_buffer->GetCurrentOffset()); - } - m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS; - - std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), data, data_size); - ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size); - m_uniform_stream_buffer->CommitMemory(data_size); - - // Cached data is now out-of-sync. - VertexShaderManager::dirty = true; - GeometryShaderManager::dirty = true; - PixelShaderManager::dirty = true; -} - -bool StateTracker::ReserveConstantStorage() -{ - // Since we invalidate all constants on command buffer execution, it doesn't matter if this - // causes the stream buffer to be resized. - if (m_uniform_stream_buffer->ReserveMemory(m_uniform_buffer_reserve_size, - g_vulkan_context->GetUniformBufferAlignment(), true, - true, false)) - { - return true; - } - - // The only places that call constant updates are safe to have state restored. - WARN_LOG(VIDEO, "Executing command buffer while waiting for space in uniform buffer"); - Util::ExecuteCurrentCommandsAndRestoreState(false); - - // Since we are on a new command buffer, all constants have been invalidated, and we need - // to reupload them. We may as well do this now, since we're issuing a draw anyway. - UploadAllConstants(); - return false; -} - -void StateTracker::UploadAllConstants() -{ - // We are free to re-use parts of the buffer now since we're uploading all constants. - size_t ub_alignment = g_vulkan_context->GetUniformBufferAlignment(); - size_t pixel_constants_offset = 0; - size_t vertex_constants_offset = - Common::AlignUp(pixel_constants_offset + sizeof(PixelShaderConstants), ub_alignment); - size_t geometry_constants_offset = - Common::AlignUp(vertex_constants_offset + sizeof(VertexShaderConstants), ub_alignment); - size_t allocation_size = geometry_constants_offset + sizeof(GeometryShaderConstants); - - // Allocate everything at once. - // We should only be here if the buffer was full and a command buffer was submitted anyway. - if (!m_uniform_stream_buffer->ReserveMemory(allocation_size, ub_alignment, true, true, false)) - { - PanicAlert("Failed to allocate space for constants in streaming buffer"); - return; - } - - // Update bindings - for (size_t i = 0; i < NUM_UBO_DESCRIPTOR_SET_BINDINGS; i++) - { - m_bindings.uniform_buffer_bindings[i].buffer = m_uniform_stream_buffer->GetBuffer(); - m_bindings.uniform_buffer_bindings[i].offset = 0; - } - m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_PS].range = - sizeof(PixelShaderConstants); - m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_VS].range = - sizeof(VertexShaderConstants); - m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_GS].range = - sizeof(GeometryShaderConstants); - - // Update dynamic offsets - m_bindings.uniform_buffer_offsets[UBO_DESCRIPTOR_SET_BINDING_PS] = - static_cast(m_uniform_stream_buffer->GetCurrentOffset() + pixel_constants_offset); - - m_bindings.uniform_buffer_offsets[UBO_DESCRIPTOR_SET_BINDING_VS] = - static_cast(m_uniform_stream_buffer->GetCurrentOffset() + vertex_constants_offset); - - m_bindings.uniform_buffer_offsets[UBO_DESCRIPTOR_SET_BINDING_GS] = static_cast( - m_uniform_stream_buffer->GetCurrentOffset() + geometry_constants_offset); - - m_dirty_flags |= DIRTY_FLAG_ALL_DESCRIPTOR_SETS | DIRTY_FLAG_DYNAMIC_OFFSETS | DIRTY_FLAG_VS_UBO | - DIRTY_FLAG_GS_UBO | DIRTY_FLAG_PS_UBO; - - // Copy the actual data in - memcpy(m_uniform_stream_buffer->GetCurrentHostPointer() + pixel_constants_offset, - &PixelShaderManager::constants, sizeof(PixelShaderConstants)); - memcpy(m_uniform_stream_buffer->GetCurrentHostPointer() + vertex_constants_offset, - &VertexShaderManager::constants, sizeof(VertexShaderConstants)); - memcpy(m_uniform_stream_buffer->GetCurrentHostPointer() + geometry_constants_offset, - &GeometryShaderManager::constants, sizeof(GeometryShaderConstants)); - - // Finally, flush buffer memory after copying - m_uniform_stream_buffer->CommitMemory(allocation_size); - - // Clear dirty flags - VertexShaderManager::dirty = false; - GeometryShaderManager::dirty = false; - PixelShaderManager::dirty = false; -} - -void StateTracker::SetTexture(size_t index, VkImageView view) -{ - if (m_bindings.ps_samplers[index].imageView == view) - return; - - m_bindings.ps_samplers[index].imageView = view; - m_bindings.ps_samplers[index].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - m_dirty_flags |= DIRTY_FLAG_PS_SAMPLERS; -} - -void StateTracker::SetSampler(size_t index, VkSampler sampler) -{ - if (m_bindings.ps_samplers[index].sampler == sampler) - return; - - m_bindings.ps_samplers[index].sampler = sampler; - m_dirty_flags |= DIRTY_FLAG_PS_SAMPLERS; -} - -void StateTracker::SetBBoxBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize range) -{ - if (m_bindings.ps_ssbo.buffer == buffer && m_bindings.ps_ssbo.offset == offset && - m_bindings.ps_ssbo.range == range) + if (m_bindings.ssbo.buffer == buffer && m_bindings.ssbo.offset == offset && + m_bindings.ssbo.range == range) { return; } - m_bindings.ps_ssbo.buffer = buffer; - m_bindings.ps_ssbo.offset = offset; - m_bindings.ps_ssbo.range = range; - m_dirty_flags |= DIRTY_FLAG_PS_SSBO; + m_bindings.ssbo.buffer = buffer; + m_bindings.ssbo.offset = offset; + m_bindings.ssbo.range = range; + m_dirty_flags |= DIRTY_FLAG_GX_SSBO; +} + +void StateTracker::SetTexelBuffer(u32 index, VkBufferView view) +{ + if (m_bindings.texel_buffers[index] == view) + return; + + m_bindings.texel_buffers[index] = view; + m_dirty_flags |= DIRTY_FLAG_UTILITY_BINDINGS | DIRTY_FLAG_COMPUTE_BINDINGS; +} + +void StateTracker::SetImageTexture(VkImageView view) +{ + if (m_bindings.image_texture.imageView == view) + return; + + m_bindings.image_texture.imageView = view; + m_bindings.image_texture.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + m_dirty_flags |= DIRTY_FLAG_COMPUTE_BINDINGS; } void StateTracker::UnbindTexture(VkImageView view) { - for (VkDescriptorImageInfo& it : m_bindings.ps_samplers) + for (VkDescriptorImageInfo& it : m_bindings.samplers) { if (it.imageView == view) - it.imageView = g_object_cache->GetDummyImageView(); + { + it.imageView = m_dummy_texture->GetView(); + it.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + } + } + + if (m_bindings.image_texture.imageView == view) + { + m_bindings.image_texture.imageView = m_dummy_texture->GetView(); + m_bindings.image_texture.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; } } -void StateTracker::InvalidateDescriptorSets() +void StateTracker::InvalidateCachedState() { - m_descriptor_sets.fill(VK_NULL_HANDLE); - m_dirty_flags |= DIRTY_FLAG_ALL_DESCRIPTOR_SETS; -} - -void StateTracker::InvalidateConstants() -{ - VertexShaderManager::dirty = true; - GeometryShaderManager::dirty = true; - PixelShaderManager::dirty = true; -} - -void StateTracker::SetPendingRebind() -{ - m_dirty_flags |= DIRTY_FLAG_DYNAMIC_OFFSETS | DIRTY_FLAG_DESCRIPTOR_SET_BINDING | - DIRTY_FLAG_VERTEX_BUFFER | DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_VIEWPORT | - DIRTY_FLAG_SCISSOR | DIRTY_FLAG_PIPELINE; + m_gx_descriptor_sets.fill(VK_NULL_HANDLE); + m_utility_descriptor_sets.fill(VK_NULL_HANDLE); + m_compute_descriptor_set = VK_NULL_HANDLE; + m_dirty_flags |= DIRTY_FLAG_ALL_DESCRIPTORS | DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR | + DIRTY_FLAG_PIPELINE | DIRTY_FLAG_COMPUTE_SHADER | DIRTY_FLAG_DESCRIPTOR_SETS | + DIRTY_FLAG_COMPUTE_DESCRIPTOR_SET; + if (m_vertex_buffer != VK_NULL_HANDLE) + m_dirty_flags |= DIRTY_FLAG_VERTEX_BUFFER; + if (m_index_buffer != VK_NULL_HANDLE) + m_dirty_flags |= DIRTY_FLAG_INDEX_BUFFER; } void StateTracker::BeginRenderPass() @@ -399,13 +253,33 @@ void StateTracker::BeginRenderPass() if (InRenderPass()) return; - m_current_render_pass = m_load_render_pass; - m_framebuffer_render_area = m_framebuffer_size; + m_current_render_pass = m_framebuffer->GetLoadRenderPass(); + m_framebuffer_render_area = m_framebuffer->GetRect(); VkRenderPassBeginInfo begin_info = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, nullptr, m_current_render_pass, - m_framebuffer, + m_framebuffer->GetFB(), + m_framebuffer_render_area, + 0, + nullptr}; + + vkCmdBeginRenderPass(g_command_buffer_mgr->GetCurrentCommandBuffer(), &begin_info, + VK_SUBPASS_CONTENTS_INLINE); +} + +void StateTracker::BeginDiscardRenderPass() +{ + if (InRenderPass()) + return; + + m_current_render_pass = m_framebuffer->GetDiscardRenderPass(); + m_framebuffer_render_area = m_framebuffer->GetRect(); + + VkRenderPassBeginInfo begin_info = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + nullptr, + m_current_render_pass, + m_framebuffer->GetFB(), m_framebuffer_render_area, 0, nullptr}; @@ -428,13 +302,13 @@ void StateTracker::BeginClearRenderPass(const VkRect2D& area, const VkClearValue { ASSERT(!InRenderPass()); - m_current_render_pass = m_clear_render_pass; + m_current_render_pass = m_framebuffer->GetClearRenderPass(); m_framebuffer_render_area = area; VkRenderPassBeginInfo begin_info = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, nullptr, m_current_render_pass, - m_framebuffer, + m_framebuffer->GetFB(), m_framebuffer_render_area, num_clear_values, clear_values}; @@ -461,22 +335,22 @@ void StateTracker::SetScissor(const VkRect2D& scissor) m_dirty_flags |= DIRTY_FLAG_SCISSOR; } -bool StateTracker::Bind(bool rebind_all /*= false*/) +bool StateTracker::Bind() { // Must have a pipeline. if (!m_pipeline) return false; // Check the render area if we were in a clear pass. - if (m_current_render_pass == m_clear_render_pass && !IsViewportWithinRenderArea()) + if (m_current_render_pass == m_framebuffer->GetClearRenderPass() && !IsViewportWithinRenderArea()) EndRenderPass(); // Get a new descriptor set if any parts have changed - if (m_dirty_flags & DIRTY_FLAG_ALL_DESCRIPTOR_SETS && !UpdateDescriptorSet()) + if (!UpdateDescriptorSet()) { // We can fail to allocate descriptors if we exhaust the pool for this command buffer. WARN_LOG(VIDEO, "Failed to get a descriptor set, executing buffer"); - Util::ExecuteCurrentCommandsAndRestoreState(false, false); + Renderer::GetInstance()->ExecuteCommandBuffer(false, false); if (!UpdateDescriptorSet()) { // Something strange going on. @@ -490,151 +364,57 @@ bool StateTracker::Bind(bool rebind_all /*= false*/) BeginRenderPass(); // Re-bind parts of the pipeline - VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); - if (m_dirty_flags & DIRTY_FLAG_VERTEX_BUFFER || rebind_all) + const VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); + if (m_dirty_flags & DIRTY_FLAG_VERTEX_BUFFER) vkCmdBindVertexBuffers(command_buffer, 0, 1, &m_vertex_buffer, &m_vertex_buffer_offset); - if (m_dirty_flags & DIRTY_FLAG_INDEX_BUFFER || rebind_all) + if (m_dirty_flags & DIRTY_FLAG_INDEX_BUFFER) vkCmdBindIndexBuffer(command_buffer, m_index_buffer, m_index_buffer_offset, m_index_type); - if (m_dirty_flags & DIRTY_FLAG_PIPELINE || rebind_all) + if (m_dirty_flags & DIRTY_FLAG_PIPELINE) vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipeline()); - if (m_dirty_flags & DIRTY_FLAG_DESCRIPTOR_SET_BINDING || rebind_all) - { - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - m_pipeline->GetVkPipelineLayout(), 0, m_num_active_descriptor_sets, - m_descriptor_sets.data(), m_num_dynamic_offsets, - m_bindings.uniform_buffer_offsets.data()); - } - else if (m_dirty_flags & DIRTY_FLAG_DYNAMIC_OFFSETS) - { - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - m_pipeline->GetVkPipelineLayout(), - DESCRIPTOR_SET_BIND_POINT_UNIFORM_BUFFERS, 1, - &m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_UNIFORM_BUFFERS], - m_num_dynamic_offsets, m_bindings.uniform_buffer_offsets.data()); - } - - if (m_dirty_flags & DIRTY_FLAG_VIEWPORT || rebind_all) + if (m_dirty_flags & DIRTY_FLAG_VIEWPORT) vkCmdSetViewport(command_buffer, 0, 1, &m_viewport); - if (m_dirty_flags & DIRTY_FLAG_SCISSOR || rebind_all) + if (m_dirty_flags & DIRTY_FLAG_SCISSOR) vkCmdSetScissor(command_buffer, 0, 1, &m_scissor); - m_dirty_flags = 0; + m_dirty_flags &= ~(DIRTY_FLAG_VERTEX_BUFFER | DIRTY_FLAG_INDEX_BUFFER | DIRTY_FLAG_PIPELINE | + DIRTY_FLAG_VIEWPORT | DIRTY_FLAG_SCISSOR); return true; } -void StateTracker::OnDraw() +bool StateTracker::BindCompute() { - m_draw_counter++; + if (!m_compute_shader) + return false; - // If we didn't have any CPU access last frame, do nothing. - if (m_scheduled_command_buffer_kicks.empty() || !m_allow_background_execution) - return; + // Can't kick compute in a render pass. + if (InRenderPass()) + EndRenderPass(); - // Check if this draw is scheduled to kick a command buffer. - // The draw counters will always be sorted so a binary search is possible here. - if (std::binary_search(m_scheduled_command_buffer_kicks.begin(), - m_scheduled_command_buffer_kicks.end(), m_draw_counter)) + const VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); + if (m_dirty_flags & DIRTY_FLAG_COMPUTE_SHADER) { - // Kick a command buffer on the background thread. - Util::ExecuteCurrentCommandsAndRestoreState(true); - } -} - -void StateTracker::OnCPUEFBAccess() -{ - // Check this isn't another access without any draws inbetween. - if (!m_cpu_accesses_this_frame.empty() && m_cpu_accesses_this_frame.back() == m_draw_counter) - return; - - // Store the current draw counter for scheduling in OnEndFrame. - m_cpu_accesses_this_frame.emplace_back(m_draw_counter); -} - -void StateTracker::OnEFBCopyToRAM() -{ - // If we're not deferring, try to preempt it next frame. - if (!g_ActiveConfig.bDeferEFBCopies) - { - OnCPUEFBAccess(); - return; + vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + m_compute_shader->GetComputePipeline()); } - // Otherwise, only execute if we have at least 10 objects between us and the last copy. - const u32 diff = m_draw_counter - m_last_efb_copy_draw_counter; - m_last_efb_copy_draw_counter = m_draw_counter; - if (diff < MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK) - return; - - Util::ExecuteCurrentCommandsAndRestoreState(true); -} - -void StateTracker::OnEndFrame() -{ - m_draw_counter = 0; - m_last_efb_copy_draw_counter = 0; - m_scheduled_command_buffer_kicks.clear(); - - // If we have no CPU access at all, leave everything in the one command buffer for maximum - // parallelism between CPU/GPU, at the cost of slightly higher latency. - if (m_cpu_accesses_this_frame.empty()) - return; - - // In order to reduce CPU readback latency, we want to kick a command buffer roughly halfway - // between the draw counters that invoked the readback, or every 250 draws, whichever is smaller. - if (g_ActiveConfig.iCommandBufferExecuteInterval > 0) + if (!UpdateComputeDescriptorSet()) { - u32 last_draw_counter = 0; - u32 interval = static_cast(g_ActiveConfig.iCommandBufferExecuteInterval); - for (u32 draw_counter : m_cpu_accesses_this_frame) + WARN_LOG(VIDEO, "Failed to get a compute descriptor set, executing buffer"); + Renderer::GetInstance()->ExecuteCommandBuffer(false, false); + if (!UpdateComputeDescriptorSet()) { - // We don't want to waste executing command buffers for only a few draws, so set a minimum. - // Leave last_draw_counter as-is, so we get the correct number of draws between submissions. - u32 draw_count = draw_counter - last_draw_counter; - if (draw_count < MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK) - continue; - - if (draw_count <= interval) - { - u32 mid_point = draw_count / 2; - m_scheduled_command_buffer_kicks.emplace_back(last_draw_counter + mid_point); - } - else - { - u32 counter = interval; - while (counter < draw_count) - { - m_scheduled_command_buffer_kicks.emplace_back(last_draw_counter + counter); - counter += interval; - } - } - - last_draw_counter = draw_counter; + // Something strange going on. + ERROR_LOG(VIDEO, "Failed to get descriptor set, skipping dispatch"); + return false; } } -#if 0 - { - std::stringstream ss; - std::for_each(m_cpu_accesses_this_frame.begin(), m_cpu_accesses_this_frame.end(), [&ss](u32 idx) { ss << idx << ","; }); - WARN_LOG(VIDEO, "CPU EFB accesses in last frame: %s", ss.str().c_str()); - } - { - std::stringstream ss; - std::for_each(m_scheduled_command_buffer_kicks.begin(), m_scheduled_command_buffer_kicks.end(), [&ss](u32 idx) { ss << idx << ","; }); - WARN_LOG(VIDEO, "Scheduled command buffer kicks: %s", ss.str().c_str()); - } -#endif - - m_cpu_accesses_this_frame.clear(); -} - -void StateTracker::SetBackgroundCommandBufferExecution(bool enabled) -{ - m_allow_background_execution = enabled; + m_dirty_flags &= ~DIRTY_FLAG_COMPUTE_SHADER; + return true; } bool StateTracker::IsWithinRenderArea(s32 x, s32 y, u32 width, u32 height) const @@ -661,7 +441,7 @@ bool StateTracker::IsViewportWithinRenderArea() const void StateTracker::EndClearRenderPass() { - if (m_current_render_pass != m_clear_render_pass) + if (m_current_render_pass != m_framebuffer->GetClearRenderPass()) return; // End clear render pass. Bind() will call BeginRenderPass() which @@ -685,135 +465,244 @@ bool StateTracker::UpdateGXDescriptorSet() std::array writes; u32 num_writes = 0; - if (m_dirty_flags & (DIRTY_FLAG_VS_UBO | DIRTY_FLAG_GS_UBO | DIRTY_FLAG_PS_UBO) || - m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_UNIFORM_BUFFERS] == VK_NULL_HANDLE) + if (m_dirty_flags & DIRTY_FLAG_GX_UBOS || m_gx_descriptor_sets[0] == VK_NULL_HANDLE) { - VkDescriptorSetLayout layout = - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_PER_STAGE_UNIFORM_BUFFERS); - VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet(layout); - if (set == VK_NULL_HANDLE) + m_gx_descriptor_sets[0] = g_command_buffer_mgr->AllocateDescriptorSet( + g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_STANDARD_UNIFORM_BUFFERS)); + if (m_gx_descriptor_sets[0] == VK_NULL_HANDLE) return false; for (size_t i = 0; i < NUM_UBO_DESCRIPTOR_SET_BINDINGS; i++) { - if (i == UBO_DESCRIPTOR_SET_BINDING_GS && !g_vulkan_context->SupportsGeometryShaders()) + if (i == UBO_DESCRIPTOR_SET_BINDING_GS && + !g_ActiveConfig.backend_info.bSupportsGeometryShaders) + { continue; + } writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, - set, + m_gx_descriptor_sets[0], static_cast(i), 0, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, nullptr, - &m_bindings.uniform_buffer_bindings[i], + &m_bindings.gx_ubo_bindings[i], nullptr}; } - m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_UNIFORM_BUFFERS] = set; - m_dirty_flags |= DIRTY_FLAG_DESCRIPTOR_SET_BINDING; + m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_GX_UBOS) | DIRTY_FLAG_DESCRIPTOR_SETS; } - if (m_dirty_flags & DIRTY_FLAG_PS_SAMPLERS || - m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_PIXEL_SHADER_SAMPLERS] == VK_NULL_HANDLE) + if (m_dirty_flags & DIRTY_FLAG_GX_SAMPLERS || m_gx_descriptor_sets[1] == VK_NULL_HANDLE) { - VkDescriptorSetLayout layout = - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS); - VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet(layout); - if (set == VK_NULL_HANDLE) + m_gx_descriptor_sets[1] = g_command_buffer_mgr->AllocateDescriptorSet( + g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_STANDARD_SAMPLERS)); + if (m_gx_descriptor_sets[1] == VK_NULL_HANDLE) return false; writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, - set, + m_gx_descriptor_sets[1], 0, 0, static_cast(NUM_PIXEL_SHADER_SAMPLERS), VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - m_bindings.ps_samplers.data(), + m_bindings.samplers.data(), nullptr, nullptr}; - - m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_PIXEL_SHADER_SAMPLERS] = set; - m_dirty_flags |= DIRTY_FLAG_DESCRIPTOR_SET_BINDING; + m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_GX_SAMPLERS) | DIRTY_FLAG_DESCRIPTOR_SETS; } - if (g_vulkan_context->SupportsBoundingBox() && - (m_dirty_flags & DIRTY_FLAG_PS_SSBO || - m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER] == VK_NULL_HANDLE)) + if (g_ActiveConfig.backend_info.bSupportsBBox && + (m_dirty_flags & DIRTY_FLAG_GX_SSBO || m_gx_descriptor_sets[2] == VK_NULL_HANDLE)) { - VkDescriptorSetLayout layout = - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_SHADER_STORAGE_BUFFERS); - VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet(layout); - if (set == VK_NULL_HANDLE) + m_gx_descriptor_sets[2] = + g_command_buffer_mgr->AllocateDescriptorSet(g_object_cache->GetDescriptorSetLayout( + DESCRIPTOR_SET_LAYOUT_STANDARD_SHADER_STORAGE_BUFFERS)); + if (m_gx_descriptor_sets[2] == VK_NULL_HANDLE) return false; - writes[num_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - 0, - 0, - 1, - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - nullptr, - &m_bindings.ps_ssbo, - nullptr}; - - m_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER] = set; - m_dirty_flags |= DIRTY_FLAG_DESCRIPTOR_SET_BINDING; + writes[num_writes++] = { + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, m_gx_descriptor_sets[2], 0, 0, 1, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, nullptr, &m_bindings.ssbo, nullptr}; + m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_GX_SSBO) | DIRTY_FLAG_DESCRIPTOR_SETS; } if (num_writes > 0) vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), num_writes, writes.data(), 0, nullptr); - m_num_active_descriptor_sets = g_vulkan_context->SupportsBoundingBox() ? - NUM_GX_DRAW_DESCRIPTOR_SETS_SSBO : - NUM_GX_DRAW_DESCRIPTOR_SETS; - m_num_dynamic_offsets = NUM_UBO_DESCRIPTOR_SET_BINDINGS; + if (m_dirty_flags & DIRTY_FLAG_DESCRIPTOR_SETS) + { + vkCmdBindDescriptorSets(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipelineLayout(), 0, + g_ActiveConfig.backend_info.bSupportsBBox ? + NUM_GX_DESCRIPTOR_SETS : + (NUM_GX_DESCRIPTOR_SETS - 1), + m_gx_descriptor_sets.data(), NUM_UBO_DESCRIPTOR_SET_BINDINGS, + m_bindings.gx_ubo_offsets.data()); + m_dirty_flags &= ~(DIRTY_FLAG_DESCRIPTOR_SETS | DIRTY_FLAG_GX_UBO_OFFSETS); + } + else if (m_dirty_flags & DIRTY_FLAG_GX_UBO_OFFSETS) + { + vkCmdBindDescriptorSets(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipelineLayout(), 0, + 1, m_gx_descriptor_sets.data(), NUM_UBO_DESCRIPTOR_SET_BINDINGS, + m_bindings.gx_ubo_offsets.data()); + m_dirty_flags &= ~DIRTY_FLAG_GX_UBO_OFFSETS; + } + return true; } bool StateTracker::UpdateUtilityDescriptorSet() { + // Max number of updates - UBO, Samplers, TexelBuffer + std::array dswrites; + u32 writes = 0; + // Allocate descriptor sets. - m_descriptor_sets[0] = g_command_buffer_mgr->AllocateDescriptorSet( - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_SINGLE_UNIFORM_BUFFER)); - m_descriptor_sets[1] = g_command_buffer_mgr->AllocateDescriptorSet( - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS)); - if (m_descriptor_sets[0] == VK_NULL_HANDLE || m_descriptor_sets[1] == VK_NULL_HANDLE) + if (m_dirty_flags & DIRTY_FLAG_UTILITY_UBO || m_utility_descriptor_sets[0] == VK_NULL_HANDLE) { - return false; + m_utility_descriptor_sets[0] = g_command_buffer_mgr->AllocateDescriptorSet( + g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_UTILITY_UNIFORM_BUFFER)); + if (!m_utility_descriptor_sets[0]) + return false; + + dswrites[writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + m_utility_descriptor_sets[0], + 0, + 0, + 1, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, + nullptr, + &m_bindings.utility_ubo_binding, + nullptr}; + + m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_UTILITY_UBO) | DIRTY_FLAG_DESCRIPTOR_SETS; } - // Build UBO descriptor set. - std::array dswrites; - dswrites[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - m_descriptor_sets[0], - 0, - 0, - 1, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, - nullptr, - &m_bindings.uniform_buffer_bindings[UBO_DESCRIPTOR_SET_BINDING_VS], - nullptr}; - dswrites[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - m_descriptor_sets[1], - 0, - 0, - NUM_PIXEL_SHADER_SAMPLERS, - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - m_bindings.ps_samplers.data(), - nullptr, - nullptr}; + if (m_dirty_flags & DIRTY_FLAG_UTILITY_BINDINGS || m_utility_descriptor_sets[1] == VK_NULL_HANDLE) + { + m_utility_descriptor_sets[1] = g_command_buffer_mgr->AllocateDescriptorSet( + g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_UTILITY_SAMPLERS)); + if (!m_utility_descriptor_sets[1]) + return false; + + dswrites[writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + m_utility_descriptor_sets[1], + 0, + 0, + NUM_PIXEL_SHADER_SAMPLERS, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + m_bindings.samplers.data(), + nullptr, + nullptr}; + dswrites[writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + m_utility_descriptor_sets[1], + 8, + 0, + 1, + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + nullptr, + nullptr, + m_bindings.texel_buffers.data()}; + + m_dirty_flags = (m_dirty_flags & ~DIRTY_FLAG_UTILITY_BINDINGS) | DIRTY_FLAG_DESCRIPTOR_SETS; + } + + if (writes > 0) + vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), writes, dswrites.data(), 0, nullptr); + + if (m_dirty_flags & DIRTY_FLAG_DESCRIPTOR_SETS) + { + vkCmdBindDescriptorSets(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipelineLayout(), 0, + NUM_UTILITY_DESCRIPTOR_SETS, m_utility_descriptor_sets.data(), 1, + &m_bindings.utility_ubo_offset); + m_dirty_flags &= ~(DIRTY_FLAG_DESCRIPTOR_SETS | DIRTY_FLAG_UTILITY_UBO_OFFSET); + } + else if (m_dirty_flags & DIRTY_FLAG_UTILITY_UBO_OFFSET) + { + vkCmdBindDescriptorSets(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline->GetVkPipelineLayout(), 0, + 1, m_utility_descriptor_sets.data(), 1, &m_bindings.utility_ubo_offset); + m_dirty_flags &= ~(DIRTY_FLAG_DESCRIPTOR_SETS | DIRTY_FLAG_UTILITY_UBO_OFFSET); + } + + return true; +} + +bool StateTracker::UpdateComputeDescriptorSet() +{ + // Max number of updates - UBO, Samplers, TexelBuffer, Image + std::array dswrites; + + // Allocate descriptor sets. + if (m_dirty_flags & DIRTY_FLAG_COMPUTE_BINDINGS) + { + m_compute_descriptor_set = g_command_buffer_mgr->AllocateDescriptorSet( + g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_COMPUTE)); + dswrites[0] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + m_compute_descriptor_set, + 0, + 0, + 1, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, + nullptr, + &m_bindings.utility_ubo_binding, + nullptr}; + dswrites[1] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + m_compute_descriptor_set, + 1, + 0, + NUM_COMPUTE_SHADER_SAMPLERS, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + m_bindings.samplers.data(), + nullptr, + nullptr}; + dswrites[2] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + m_compute_descriptor_set, + 3, + 0, + NUM_COMPUTE_TEXEL_BUFFERS, + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, + nullptr, + nullptr, + m_bindings.texel_buffers.data()}; + dswrites[3] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + m_compute_descriptor_set, + 5, + 0, + 1, + VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + &m_bindings.image_texture, + nullptr, + nullptr}; + + vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), static_cast(dswrites.size()), + dswrites.data(), 0, nullptr); + m_dirty_flags = + (m_dirty_flags & ~DIRTY_FLAG_COMPUTE_BINDINGS) | DIRTY_FLAG_COMPUTE_DESCRIPTOR_SET; + } + + if (m_dirty_flags & DIRTY_FLAG_COMPUTE_DESCRIPTOR_SET) + { + vkCmdBindDescriptorSets(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_PIPELINE_BIND_POINT_COMPUTE, + g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_COMPUTE), 0, 1, + &m_compute_descriptor_set, 1, &m_bindings.utility_ubo_offset); + m_dirty_flags &= ~DIRTY_FLAG_COMPUTE_DESCRIPTOR_SET; + } - vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), static_cast(dswrites.size()), - dswrites.data(), 0, nullptr); - m_num_active_descriptor_sets = NUM_UTILITY_DRAW_DESCRIPTOR_SETS; - m_num_dynamic_offsets = 1; - m_dirty_flags |= DIRTY_FLAG_DESCRIPTOR_SET_BINDING; return true; } diff --git a/Source/Core/VideoBackends/Vulkan/StateTracker.h b/Source/Core/VideoBackends/Vulkan/StateTracker.h index f4cbc8e180..10e804d69f 100644 --- a/Source/Core/VideoBackends/Vulkan/StateTracker.h +++ b/Source/Core/VideoBackends/Vulkan/StateTracker.h @@ -10,64 +10,53 @@ #include "Common/CommonTypes.h" #include "VideoBackends/Vulkan/Constants.h" -#include "VideoBackends/Vulkan/ShaderCache.h" -#include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/RenderBase.h" namespace Vulkan { +class VKFramebuffer; +class VKShader; class VKPipeline; +class VKTexture; class StreamBuffer; class VertexFormat; class StateTracker { public: - StateTracker() = default; - ~StateTracker() = default; + StateTracker(); + ~StateTracker(); static StateTracker* GetInstance(); static bool CreateInstance(); static void DestroyInstance(); - VkFramebuffer GetFramebuffer() const { return m_framebuffer; } + VKFramebuffer* GetFramebuffer() const { return m_framebuffer; } const VKPipeline* GetPipeline() const { return m_pipeline; } void SetVertexBuffer(VkBuffer buffer, VkDeviceSize offset); void SetIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType type); - - void SetRenderPass(VkRenderPass load_render_pass, VkRenderPass clear_render_pass); - void SetFramebuffer(VkFramebuffer framebuffer, const VkRect2D& render_area); + void SetFramebuffer(VKFramebuffer* framebuffer); void SetPipeline(const VKPipeline* pipeline); - - void UpdateVertexShaderConstants(); - void UpdateGeometryShaderConstants(); - void UpdatePixelShaderConstants(); - - // Updates constants from external data, e.g. utility draws. - void UpdateConstants(const void* data, u32 data_size); - - void SetTexture(size_t index, VkImageView view); - void SetSampler(size_t index, VkSampler sampler); - - void SetBBoxBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize range); + void SetComputeShader(const VKShader* shader); + void SetGXUniformBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size); + void SetUtilityUniformBuffer(VkBuffer buffer, u32 offset, u32 size); + void SetTexture(u32 index, VkImageView view); + void SetSampler(u32 index, VkSampler sampler); + void SetSSBO(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize range); + void SetTexelBuffer(u32 index, VkBufferView view); + void SetImageTexture(VkImageView view); void UnbindTexture(VkImageView view); - // When executing a command buffer, we want to recreate the descriptor set, as it will - // now be in a different pool for the new command buffer. - void InvalidateDescriptorSets(); - - // Same with the uniforms, as the current storage will belong to the previous command buffer. - void InvalidateConstants(); - // Set dirty flags on everything to force re-bind at next draw time. - void SetPendingRebind(); + void InvalidateCachedState(); // Ends a render pass if we're currently in one. // When Bind() is next called, the pass will be restarted. // Calling this function is allowed even if a pass has not begun. bool InRenderPass() const { return m_current_render_pass != VK_NULL_HANDLE; } void BeginRenderPass(); + void BeginDiscardRenderPass(); void EndRenderPass(); // Ends the current render pass if it was a clear render pass. @@ -78,53 +67,48 @@ public: void SetViewport(const VkViewport& viewport); void SetScissor(const VkRect2D& scissor); - bool Bind(bool rebind_all = false); + // Binds all dirty state to the commmand buffer. + // If this returns false, you should not issue the draw. + bool Bind(); - // CPU Access Tracking - // Call after a draw call is made. - void OnDraw(); - - // Call after CPU access is requested. - void OnCPUEFBAccess(); - - // Call after an EFB copy to RAM. If true, the current command buffer should be executed. - void OnEFBCopyToRAM(); - - // Call at the end of a frame. - void OnEndFrame(); - - // Prevent/allow background command buffer execution. - // Use when queries are active. - void SetBackgroundCommandBufferExecution(bool enabled); + // Binds all dirty compute state to the command buffer. + // If this returns false, you should not dispatch the shader. + bool BindCompute(); + // Returns true if the specified rectangle is inside the current render area (used for clears). bool IsWithinRenderArea(s32 x, s32 y, u32 width, u32 height) const; private: // Number of descriptor sets for game draws. enum { - NUM_GX_DRAW_DESCRIPTOR_SETS = DESCRIPTOR_SET_BIND_POINT_PIXEL_SHADER_SAMPLERS + 1, - NUM_GX_DRAW_DESCRIPTOR_SETS_SSBO = DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER + 1, - NUM_UTILITY_DRAW_DESCRIPTOR_SETS = 2 + NUM_GX_DESCRIPTOR_SETS = 3, + NUM_UTILITY_DESCRIPTOR_SETS = 2, + NUM_COMPUTE_DESCRIPTOR_SETS = 1 }; - enum DITRY_FLAG : u32 + enum DIRTY_FLAG : u32 { - DIRTY_FLAG_VS_UBO = (1 << 0), - DIRTY_FLAG_GS_UBO = (1 << 1), - DIRTY_FLAG_PS_UBO = (1 << 2), - DIRTY_FLAG_PS_SAMPLERS = (1 << 3), - DIRTY_FLAG_PS_SSBO = (1 << 4), - DIRTY_FLAG_DYNAMIC_OFFSETS = (1 << 5), - DIRTY_FLAG_VERTEX_BUFFER = (1 << 6), - DIRTY_FLAG_INDEX_BUFFER = (1 << 7), - DIRTY_FLAG_VIEWPORT = (1 << 8), - DIRTY_FLAG_SCISSOR = (1 << 9), - DIRTY_FLAG_PIPELINE = (1 << 10), - DIRTY_FLAG_DESCRIPTOR_SET_BINDING = (1 << 11), + DIRTY_FLAG_GX_UBOS = (1 << 0), + DIRTY_FLAG_GX_UBO_OFFSETS = (1 << 1), + DIRTY_FLAG_GX_SAMPLERS = (1 << 4), + DIRTY_FLAG_GX_SSBO = (1 << 5), + DIRTY_FLAG_UTILITY_UBO = (1 << 2), + DIRTY_FLAG_UTILITY_UBO_OFFSET = (1 << 3), + DIRTY_FLAG_UTILITY_BINDINGS = (1 << 6), + DIRTY_FLAG_COMPUTE_BINDINGS = (1 << 7), + DIRTY_FLAG_VERTEX_BUFFER = (1 << 8), + DIRTY_FLAG_INDEX_BUFFER = (1 << 9), + DIRTY_FLAG_VIEWPORT = (1 << 10), + DIRTY_FLAG_SCISSOR = (1 << 11), + DIRTY_FLAG_PIPELINE = (1 << 12), + DIRTY_FLAG_COMPUTE_SHADER = (1 << 13), + DIRTY_FLAG_DESCRIPTOR_SETS = (1 << 14), + DIRTY_FLAG_COMPUTE_DESCRIPTOR_SET = (1 << 15), - DIRTY_FLAG_ALL_DESCRIPTOR_SETS = DIRTY_FLAG_VS_UBO | DIRTY_FLAG_GS_UBO | DIRTY_FLAG_PS_UBO | - DIRTY_FLAG_PS_SAMPLERS | DIRTY_FLAG_PS_SSBO + DIRTY_FLAG_ALL_DESCRIPTORS = DIRTY_FLAG_GX_UBOS | DIRTY_FLAG_UTILITY_UBO | + DIRTY_FLAG_GX_SAMPLERS | DIRTY_FLAG_GX_SSBO | + DIRTY_FLAG_UTILITY_BINDINGS | DIRTY_FLAG_COMPUTE_BINDINGS }; bool Initialize(); @@ -136,12 +120,7 @@ private: bool UpdateDescriptorSet(); bool UpdateGXDescriptorSet(); bool UpdateUtilityDescriptorSet(); - - // Allocates storage in the uniform buffer of the specified size. If this storage cannot be - // allocated immediately, the current command buffer will be submitted and all stage's - // constants will be re-uploaded. false will be returned in this case, otherwise true. - bool ReserveConstantStorage(); - void UploadAllConstants(); + bool UpdateComputeDescriptorSet(); // Which bindings/state has to be updated before the next draw. u32 m_dirty_flags = 0; @@ -155,42 +134,33 @@ private: // pipeline state const VKPipeline* m_pipeline = nullptr; + const VKShader* m_compute_shader = nullptr; // shader bindings - std::array m_descriptor_sets = {}; struct { - std::array uniform_buffer_bindings = - {}; - std::array uniform_buffer_offsets = {}; - - std::array ps_samplers = {}; - - VkDescriptorBufferInfo ps_ssbo = {}; - } m_bindings; - size_t m_uniform_buffer_reserve_size = 0; - u32 m_num_active_descriptor_sets = 0; - u32 m_num_dynamic_offsets = 0; + std::array gx_ubo_bindings; + std::array gx_ubo_offsets; + VkDescriptorBufferInfo utility_ubo_binding; + u32 utility_ubo_offset; + std::array samplers; + std::array texel_buffers; + VkDescriptorBufferInfo ssbo; + VkDescriptorImageInfo image_texture; + } m_bindings = {}; + std::array m_gx_descriptor_sets = {}; + std::array m_utility_descriptor_sets = {}; + VkDescriptorSet m_compute_descriptor_set = VK_NULL_HANDLE; // rasterization VkViewport m_viewport = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; VkRect2D m_scissor = {{0, 0}, {1, 1}}; // uniform buffers - std::unique_ptr m_uniform_stream_buffer; + std::unique_ptr m_dummy_texture; - VkFramebuffer m_framebuffer = VK_NULL_HANDLE; - VkRenderPass m_load_render_pass = VK_NULL_HANDLE; - VkRenderPass m_clear_render_pass = VK_NULL_HANDLE; + VKFramebuffer* m_framebuffer = nullptr; VkRenderPass m_current_render_pass = VK_NULL_HANDLE; - VkRect2D m_framebuffer_size = {}; VkRect2D m_framebuffer_render_area = {}; - - // CPU access tracking - u32 m_draw_counter = 0; - u32 m_last_efb_copy_draw_counter = 0; - std::vector m_cpu_accesses_this_frame; - std::vector m_scheduled_command_buffer_kicks; - bool m_allow_background_execution = true; }; } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/StreamBuffer.cpp b/Source/Core/VideoBackends/Vulkan/StreamBuffer.cpp index cb8e660af2..ea610f09cf 100644 --- a/Source/Core/VideoBackends/Vulkan/StreamBuffer.cpp +++ b/Source/Core/VideoBackends/Vulkan/StreamBuffer.cpp @@ -8,29 +8,24 @@ #include #include +#include "Common/Align.h" #include "Common/Assert.h" #include "Common/MsgHandler.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/Util.h" #include "VideoBackends/Vulkan/VulkanContext.h" namespace Vulkan { -StreamBuffer::StreamBuffer(VkBufferUsageFlags usage, size_t max_size) - : m_usage(usage), m_maximum_size(max_size) +StreamBuffer::StreamBuffer(VkBufferUsageFlags usage, u32 size) : m_usage(usage), m_size(size) { - // Add a callback that fires on fence point creation and signal - g_command_buffer_mgr->AddFencePointCallback( - this, - std::bind(&StreamBuffer::OnCommandBufferQueued, this, std::placeholders::_1, - std::placeholders::_2), - std::bind(&StreamBuffer::OnCommandBufferExecuted, this, std::placeholders::_1)); + g_command_buffer_mgr->AddFenceSignaledCallback( + this, std::bind(&StreamBuffer::OnFenceSignaled, this, std::placeholders::_1)); } StreamBuffer::~StreamBuffer() { - g_command_buffer_mgr->RemoveFencePointCallback(this); + g_command_buffer_mgr->RemoveFenceSignaledCallback(this); if (m_host_pointer) vkUnmapMemory(g_vulkan_context->GetDevice(), m_memory); @@ -41,24 +36,23 @@ StreamBuffer::~StreamBuffer() g_command_buffer_mgr->DeferDeviceMemoryDestruction(m_memory); } -std::unique_ptr StreamBuffer::Create(VkBufferUsageFlags usage, size_t initial_size, - size_t max_size) +std::unique_ptr StreamBuffer::Create(VkBufferUsageFlags usage, u32 size) { - std::unique_ptr buffer = std::make_unique(usage, max_size); - if (!buffer->ResizeBuffer(initial_size)) + std::unique_ptr buffer = std::make_unique(usage, size); + if (!buffer->AllocateBuffer()) return nullptr; return buffer; } -bool StreamBuffer::ResizeBuffer(size_t size) +bool StreamBuffer::AllocateBuffer() { // Create the buffer descriptor VkBufferCreateInfo buffer_create_info = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType nullptr, // const void* pNext 0, // VkBufferCreateFlags flags - static_cast(size), // VkDeviceSize size + static_cast(m_size), // VkDeviceSize size m_usage, // VkBufferUsageFlags usage VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode 0, // uint32_t queueFamilyIndexCount @@ -110,7 +104,7 @@ bool StreamBuffer::ResizeBuffer(size_t size) // Map this buffer into user-space void* mapped_ptr = nullptr; - res = vkMapMemory(g_vulkan_context->GetDevice(), memory, 0, size, 0, &mapped_ptr); + res = vkMapMemory(g_vulkan_context->GetDevice(), memory, 0, m_size, 0, &mapped_ptr); if (res != VK_SUCCESS) { LOG_VULKAN_ERROR(res, "vkMapMemory failed: "); @@ -133,36 +127,34 @@ bool StreamBuffer::ResizeBuffer(size_t size) m_buffer = buffer; m_memory = memory; m_host_pointer = reinterpret_cast(mapped_ptr); - m_current_size = size; m_current_offset = 0; m_current_gpu_position = 0; m_tracked_fences.clear(); return true; } -bool StreamBuffer::ReserveMemory(size_t num_bytes, size_t alignment, bool allow_reuse /* = true */, - bool allow_growth /* = true */, - bool reallocate_if_full /* = false */) +bool StreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment) { - size_t required_bytes = num_bytes + alignment; + const u32 required_bytes = num_bytes + alignment; // Check for sane allocations - if (required_bytes > m_maximum_size) + if (required_bytes > m_size) { PanicAlert("Attempting to allocate %u bytes from a %u byte stream buffer", - static_cast(num_bytes), static_cast(m_maximum_size)); + static_cast(num_bytes), static_cast(m_size)); return false; } // Is the GPU behind or up to date with our current offset? + UpdateCurrentFencePosition(); if (m_current_offset >= m_current_gpu_position) { - size_t remaining_bytes = m_current_size - m_current_offset; + const u32 remaining_bytes = m_size - m_current_offset; if (required_bytes <= remaining_bytes) { // Place at the current position, after the GPU position. - m_current_offset = Util::AlignBufferOffset(m_current_offset, alignment); + m_current_offset = Common::AlignUp(m_current_offset, alignment); m_last_allocation_size = num_bytes; return true; } @@ -171,7 +163,7 @@ bool StreamBuffer::ReserveMemory(size_t num_bytes, size_t alignment, bool allow_ // We use < here because we don't want to have the case of m_current_offset == // m_current_gpu_position. That would mean the code above would assume the // GPU has caught up to us, which it hasn't. - if (allow_reuse && required_bytes < m_current_gpu_position) + if (required_bytes < m_current_gpu_position) { // Reset offset to zero, since we're allocating behind the gpu now m_current_offset = 0; @@ -184,56 +176,35 @@ bool StreamBuffer::ReserveMemory(size_t num_bytes, size_t alignment, bool allow_ if (m_current_offset < m_current_gpu_position) { // We have from m_current_offset..m_current_gpu_position space to use. - size_t remaining_bytes = m_current_gpu_position - m_current_offset; + const u32 remaining_bytes = m_current_gpu_position - m_current_offset; if (required_bytes < remaining_bytes) { // Place at the current position, since this is still behind the GPU. - m_current_offset = Util::AlignBufferOffset(m_current_offset, alignment); + m_current_offset = Common::AlignUp(m_current_offset, alignment); m_last_allocation_size = num_bytes; return true; } } - // Try to grow the buffer up to the maximum size before waiting. - // Double each time until the maximum size is reached. - if (allow_growth && m_current_size < m_maximum_size) - { - size_t new_size = std::min(std::max(num_bytes, m_current_size * 2), m_maximum_size); - if (ResizeBuffer(new_size)) - { - // Allocating from the start of the buffer. - m_last_allocation_size = new_size; - return true; - } - } - // Can we find a fence to wait on that will give us enough memory? - if (allow_reuse && WaitForClearSpace(required_bytes)) + if (WaitForClearSpace(required_bytes)) { ASSERT(m_current_offset == m_current_gpu_position || (m_current_offset + required_bytes) < m_current_gpu_position); - m_current_offset = Util::AlignBufferOffset(m_current_offset, alignment); + m_current_offset = Common::AlignUp(m_current_offset, alignment); m_last_allocation_size = num_bytes; return true; } - // If we are not allowed to execute in our current state (e.g. in the middle of a render pass), - // as a last resort, reallocate the buffer. This will incur a performance hit and is not - // encouraged. - if (reallocate_if_full && ResizeBuffer(m_current_size)) - { - m_last_allocation_size = num_bytes; - return true; - } - - // We tried everything we could, and still couldn't get anything. If we're not at a point - // where the state is known and can be resumed, this is probably a fatal error. + // We tried everything we could, and still couldn't get anything. This means that too much space + // in the buffer is being used by the command buffer currently being recorded. Therefore, the + // only option is to execute it, and wait until it's done. return false; } -void StreamBuffer::CommitMemory(size_t final_num_bytes) +void StreamBuffer::CommitMemory(u32 final_num_bytes) { - ASSERT((m_current_offset + final_num_bytes) <= m_current_size); + ASSERT((m_current_offset + final_num_bytes) <= m_size); ASSERT(final_num_bytes <= m_last_allocation_size); // For non-coherent mappings, flush the memory range @@ -247,23 +218,25 @@ void StreamBuffer::CommitMemory(size_t final_num_bytes) m_current_offset += final_num_bytes; } -void StreamBuffer::OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fence) +void StreamBuffer::UpdateCurrentFencePosition() { // Don't create a tracking entry if the GPU is caught up with the buffer. if (m_current_offset == m_current_gpu_position) return; // Has the offset changed since the last fence? - if (!m_tracked_fences.empty() && m_tracked_fences.back().second == m_current_offset) + const VkFence fence = g_command_buffer_mgr->GetCurrentCommandBufferFence(); + if (!m_tracked_fences.empty() && m_tracked_fences.back().first == fence) { - // No need to track the new fence, the old one is sufficient. + // Still haven't executed a command buffer, so just update the offset. + m_tracked_fences.back().second = m_current_offset; return; } m_tracked_fences.emplace_back(fence, m_current_offset); } -void StreamBuffer::OnCommandBufferExecuted(VkFence fence) +void StreamBuffer::OnFenceSignaled(VkFence fence) { // Locate the entry for this fence (if any, we may have been forced to wait already) auto iter = std::find_if(m_tracked_fences.begin(), m_tracked_fences.end(), @@ -279,10 +252,9 @@ void StreamBuffer::OnCommandBufferExecuted(VkFence fence) } } -bool StreamBuffer::WaitForClearSpace(size_t num_bytes) +bool StreamBuffer::WaitForClearSpace(u32 num_bytes) { - size_t new_offset = 0; - size_t new_gpu_position = 0; + u32 new_offset = 0; auto iter = m_tracked_fences.begin(); for (; iter != m_tracked_fences.end(); iter++) { @@ -290,12 +262,11 @@ bool StreamBuffer::WaitForClearSpace(size_t num_bytes) // This is the "last resort" case, where a command buffer execution has been forced // after no additional data has been written to it, so we can assume that after the // fence has been signaled the entire buffer is now consumed. - size_t gpu_position = iter->second; + u32 gpu_position = iter->second; if (m_current_offset == gpu_position) { // Start at the start of the buffer again. new_offset = 0; - new_gpu_position = 0; break; } @@ -308,7 +279,6 @@ bool StreamBuffer::WaitForClearSpace(size_t num_bytes) if (gpu_position > num_bytes) { new_offset = 0; - new_gpu_position = gpu_position; break; } } @@ -317,31 +287,27 @@ bool StreamBuffer::WaitForClearSpace(size_t num_bytes) // We're currently allocating behind the GPU. This would give us between the current // offset and the GPU position worth of space to work with. Again, > because we can't // align the GPU position with the buffer offset. - size_t available_space_inbetween = gpu_position - m_current_offset; + u32 available_space_inbetween = gpu_position - m_current_offset; if (available_space_inbetween > num_bytes) { // Leave the offset as-is, but update the GPU position. new_offset = m_current_offset; - new_gpu_position = gpu_position; break; } } } // Did any fences satisfy this condition? - if (iter == m_tracked_fences.end()) + // Has the command buffer been executed yet? If not, the caller should execute it. + if (iter == m_tracked_fences.end() || + iter->first == g_command_buffer_mgr->GetCurrentCommandBufferFence()) + { return false; + } - // Wait until this fence is signaled. - VkResult res = - vkWaitForFences(g_vulkan_context->GetDevice(), 1, &iter->first, VK_TRUE, UINT64_MAX); - if (res != VK_SUCCESS) - LOG_VULKAN_ERROR(res, "vkWaitForFences failed: "); - - // Update GPU position, and remove all fences up to (and including) this fence. + // Wait until this fence is signaled. This will fire the callback, updating the GPU position. + g_command_buffer_mgr->WaitForFence(iter->first); m_current_offset = new_offset; - m_current_gpu_position = new_gpu_position; - m_tracked_fences.erase(m_tracked_fences.begin(), ++iter); return true; } diff --git a/Source/Core/VideoBackends/Vulkan/StreamBuffer.h b/Source/Core/VideoBackends/Vulkan/StreamBuffer.h index ce1b02164f..b52ce6cd35 100644 --- a/Source/Core/VideoBackends/Vulkan/StreamBuffer.h +++ b/Source/Core/VideoBackends/Vulkan/StreamBuffer.h @@ -17,43 +17,40 @@ namespace Vulkan class StreamBuffer { public: - StreamBuffer(VkBufferUsageFlags usage, size_t max_size); + StreamBuffer(VkBufferUsageFlags usage, u32 size); ~StreamBuffer(); VkBuffer GetBuffer() const { return m_buffer; } VkDeviceMemory GetDeviceMemory() const { return m_memory; } u8* GetHostPointer() const { return m_host_pointer; } u8* GetCurrentHostPointer() const { return m_host_pointer + m_current_offset; } - size_t GetCurrentSize() const { return m_current_size; } - size_t GetCurrentOffset() const { return m_current_offset; } - bool ReserveMemory(size_t num_bytes, size_t alignment, bool allow_reuse = true, - bool allow_growth = true, bool reallocate_if_full = false); - void CommitMemory(size_t final_num_bytes); + u32 GetCurrentSize() const { return m_size; } + u32 GetCurrentOffset() const { return m_current_offset; } + bool ReserveMemory(u32 num_bytes, u32 alignment); + void CommitMemory(u32 final_num_bytes); - static std::unique_ptr Create(VkBufferUsageFlags usage, size_t initial_size, - size_t max_size); + static std::unique_ptr Create(VkBufferUsageFlags usage, u32 size); private: - bool ResizeBuffer(size_t size); - void OnCommandBufferQueued(VkCommandBuffer command_buffer, VkFence fence); - void OnCommandBufferExecuted(VkFence fence); + bool AllocateBuffer(); + void UpdateCurrentFencePosition(); + void OnFenceSignaled(VkFence fence); // Waits for as many fences as needed to allocate num_bytes bytes from the buffer. - bool WaitForClearSpace(size_t num_bytes); + bool WaitForClearSpace(u32 num_bytes); VkBufferUsageFlags m_usage; - size_t m_current_size = 0; - size_t m_maximum_size; - size_t m_current_offset = 0; - size_t m_current_gpu_position = 0; - size_t m_last_allocation_size = 0; + u32 m_size; + u32 m_current_offset = 0; + u32 m_current_gpu_position = 0; + u32 m_last_allocation_size = 0; VkBuffer m_buffer = VK_NULL_HANDLE; VkDeviceMemory m_memory = VK_NULL_HANDLE; u8* m_host_pointer = nullptr; // List of fences and the corresponding positions in the buffer - std::deque> m_tracked_fences; + std::deque> m_tracked_fences; bool m_coherent_mapping = false; }; diff --git a/Source/Core/VideoBackends/Vulkan/SwapChain.cpp b/Source/Core/VideoBackends/Vulkan/SwapChain.cpp index 94cd0ade67..ee53005b93 100644 --- a/Source/Core/VideoBackends/Vulkan/SwapChain.cpp +++ b/Source/Core/VideoBackends/Vulkan/SwapChain.cpp @@ -13,6 +13,8 @@ #include "Common/MsgHandler.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" +#include "VideoBackends/Vulkan/ObjectCache.h" +#include "VideoBackends/Vulkan/VKTexture.h" #include "VideoBackends/Vulkan/VulkanContext.h" #include "VideoCommon/RenderBase.h" @@ -32,7 +34,6 @@ SwapChain::~SwapChain() DestroySwapChainImages(); DestroySwapChain(); DestroySurface(); - DestroySemaphores(); } VkSurfaceKHR SwapChain::CreateVulkanSurface(VkInstance instance, const WindowSystemInfo& wsi) @@ -130,53 +131,12 @@ std::unique_ptr SwapChain::Create(const WindowSystemInfo& wsi, VkSurf bool vsync) { std::unique_ptr swap_chain = std::make_unique(wsi, surface, vsync); - if (!swap_chain->CreateSemaphores() || !swap_chain->CreateSwapChain() || - !swap_chain->SetupSwapChainImages()) - { + if (!swap_chain->CreateSwapChain() || !swap_chain->SetupSwapChainImages()) return nullptr; - } return swap_chain; } -bool SwapChain::CreateSemaphores() -{ - // Create two semaphores, one that is triggered when the swapchain buffer is ready, another after - // submit and before present - VkSemaphoreCreateInfo semaphore_info = { - VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0 // VkSemaphoreCreateFlags flags - }; - - VkResult res; - if ((res = vkCreateSemaphore(g_vulkan_context->GetDevice(), &semaphore_info, nullptr, - &m_image_available_semaphore)) != VK_SUCCESS || - (res = vkCreateSemaphore(g_vulkan_context->GetDevice(), &semaphore_info, nullptr, - &m_rendering_finished_semaphore)) != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateSemaphore failed: "); - return false; - } - - return true; -} - -void SwapChain::DestroySemaphores() -{ - if (m_image_available_semaphore) - { - vkDestroySemaphore(g_vulkan_context->GetDevice(), m_image_available_semaphore, nullptr); - m_image_available_semaphore = VK_NULL_HANDLE; - } - - if (m_rendering_finished_semaphore) - { - vkDestroySemaphore(g_vulkan_context->GetDevice(), m_rendering_finished_semaphore, nullptr); - m_rendering_finished_semaphore = VK_NULL_HANDLE; - } -} - bool SwapChain::SelectSurfaceFormat() { u32 format_count; @@ -207,7 +167,7 @@ bool SwapChain::SelectSurfaceFormat() // Some drivers seem to return a SRGB format here (Intel Mesa). // This results in gamma correction when presenting to the screen, which we don't want. // Use a linear format instead, if this is the case. - VkFormat format = Util::GetLinearFormat(surface_format.format); + VkFormat format = VKTexture::GetLinearFormat(surface_format.format); if (format == VK_FORMAT_R8G8B8A8_UNORM) m_texture_format = AbstractTextureFormat::RGBA8; else if (format == VK_FORMAT_B8G8R8A8_UNORM) @@ -399,11 +359,13 @@ bool SwapChain::SetupSwapChainImages() images.data()); ASSERT(res == VK_SUCCESS); - m_render_pass = g_object_cache->GetRenderPass(m_surface_format.format, VK_FORMAT_UNDEFINED, 1, - VK_ATTACHMENT_LOAD_OP_LOAD); - m_clear_render_pass = g_object_cache->GetRenderPass(m_surface_format.format, VK_FORMAT_UNDEFINED, - 1, VK_ATTACHMENT_LOAD_OP_CLEAR); - if (m_render_pass == VK_NULL_HANDLE || m_clear_render_pass == VK_NULL_HANDLE) + const TextureConfig texture_config(TextureConfig( + m_width, m_height, 1, m_layers, 1, m_texture_format, AbstractTextureFlag_RenderTarget)); + const VkRenderPass load_render_pass = g_object_cache->GetRenderPass( + m_surface_format.format, VK_FORMAT_UNDEFINED, 1, VK_ATTACHMENT_LOAD_OP_LOAD); + const VkRenderPass clear_render_pass = g_object_cache->GetRenderPass( + m_surface_format.format, VK_FORMAT_UNDEFINED, 1, VK_ATTACHMENT_LOAD_OP_CLEAR); + if (load_render_pass == VK_NULL_HANDLE || clear_render_pass == VK_NULL_HANDLE) { PanicAlert("Failed to get swap chain render passes."); return false; @@ -416,26 +378,17 @@ bool SwapChain::SetupSwapChainImages() image.image = images[i]; // Create texture object, which creates a view of the backbuffer - image.texture = Texture2D::CreateFromExistingImage( - m_width, m_height, 1, 1, m_surface_format.format, VK_SAMPLE_COUNT_1_BIT, - VK_IMAGE_VIEW_TYPE_2D, image.image); + image.texture = + VKTexture::CreateAdopted(texture_config, image.image, + m_layers > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D, + VK_IMAGE_LAYOUT_UNDEFINED); + if (!image.texture) + return false; - VkImageView view = image.texture->GetView(); - VkFramebufferCreateInfo framebuffer_info = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - nullptr, - 0, - m_render_pass, - 1, - &view, - m_width, - m_height, - m_layers}; - - res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, - &image.framebuffer); - if (res != VK_SUCCESS) + image.framebuffer = VKFramebuffer::Create(image.texture.get(), nullptr); + if (!image.framebuffer) { - LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); + image.texture.reset(); return false; } @@ -447,10 +400,11 @@ bool SwapChain::SetupSwapChainImages() void SwapChain::DestroySwapChainImages() { - for (const auto& it : m_swap_chain_images) + for (auto& it : m_swap_chain_images) { // Images themselves are cleaned up by the swap chain object - vkDestroyFramebuffer(g_vulkan_context->GetDevice(), it.framebuffer, nullptr); + it.framebuffer.reset(); + it.texture.reset(); } m_swap_chain_images.clear(); } @@ -467,8 +421,8 @@ void SwapChain::DestroySwapChain() VkResult SwapChain::AcquireNextImage() { VkResult res = vkAcquireNextImageKHR(g_vulkan_context->GetDevice(), m_swap_chain, UINT64_MAX, - m_image_available_semaphore, VK_NULL_HANDLE, - &m_current_swap_chain_image_index); + g_command_buffer_mgr->GetCurrentCommandBufferSemaphore(), + VK_NULL_HANDLE, &m_current_swap_chain_image_index); if (res != VK_SUCCESS && res != VK_ERROR_OUT_OF_DATE_KHR && res != VK_SUBOPTIMAL_KHR) LOG_VULKAN_ERROR(res, "vkAcquireNextImageKHR failed: "); diff --git a/Source/Core/VideoBackends/Vulkan/SwapChain.h b/Source/Core/VideoBackends/Vulkan/SwapChain.h index 0687249acd..4359f4663c 100644 --- a/Source/Core/VideoBackends/Vulkan/SwapChain.h +++ b/Source/Core/VideoBackends/Vulkan/SwapChain.h @@ -10,13 +10,14 @@ #include "Common/CommonTypes.h" #include "Common/WindowSystemInfo.h" #include "VideoBackends/Vulkan/Constants.h" -#include "VideoBackends/Vulkan/Texture2D.h" #include "VideoCommon/TextureConfig.h" namespace Vulkan { class CommandBufferManager; class ObjectCache; +class VKTexture; +class VKFramebuffer; class SwapChain { @@ -44,19 +45,14 @@ public: { return m_swap_chain_images[m_current_swap_chain_image_index].image; } - Texture2D* GetCurrentTexture() const + VKTexture* GetCurrentTexture() const { return m_swap_chain_images[m_current_swap_chain_image_index].texture.get(); } - VkFramebuffer GetCurrentFramebuffer() const + VKFramebuffer* GetCurrentFramebuffer() const { - return m_swap_chain_images[m_current_swap_chain_image_index].framebuffer; + return m_swap_chain_images[m_current_swap_chain_image_index].framebuffer.get(); } - VkRenderPass GetLoadRenderPass() const { return m_render_pass; } - VkRenderPass GetClearRenderPass() const { return m_clear_render_pass; } - VkSemaphore GetImageAvailableSemaphore() const { return m_image_available_semaphore; } - VkSemaphore GetRenderingFinishedSemaphore() const { return m_rendering_finished_semaphore; } - VkResult AcquireNextImage(); bool RecreateSurface(void* native_handle); @@ -67,9 +63,6 @@ public: bool SetVSync(bool enabled); private: - bool CreateSemaphores(); - void DestroySemaphores(); - bool SelectSurfaceFormat(); bool SelectPresentMode(); @@ -84,8 +77,8 @@ private: struct SwapChainImage { VkImage image; - std::unique_ptr texture; - VkFramebuffer framebuffer; + std::unique_ptr texture; + std::unique_ptr framebuffer; }; WindowSystemInfo m_wsi; @@ -99,12 +92,6 @@ private: std::vector m_swap_chain_images; u32 m_current_swap_chain_image_index = 0; - VkSemaphore m_image_available_semaphore = VK_NULL_HANDLE; - VkSemaphore m_rendering_finished_semaphore = VK_NULL_HANDLE; - - VkRenderPass m_render_pass = VK_NULL_HANDLE; - VkRenderPass m_clear_render_pass = VK_NULL_HANDLE; - u32 m_width = 0; u32 m_height = 0; u32 m_layers = 0; diff --git a/Source/Core/VideoBackends/Vulkan/Texture2D.cpp b/Source/Core/VideoBackends/Vulkan/Texture2D.cpp deleted file mode 100644 index 96bfba6538..0000000000 --- a/Source/Core/VideoBackends/Vulkan/Texture2D.cpp +++ /dev/null @@ -1,404 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include - -#include "Common/Assert.h" -#include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/Texture2D.h" -#include "VideoBackends/Vulkan/VulkanContext.h" - -namespace Vulkan -{ -Texture2D::Texture2D(u32 width, u32 height, u32 levels, u32 layers, VkFormat format, - VkSampleCountFlagBits samples, VkImageViewType view_type, VkImage image, - VkDeviceMemory device_memory, VkImageView view) - : m_width(width), m_height(height), m_levels(levels), m_layers(layers), m_format(format), - m_samples(samples), m_view_type(view_type), m_image(image), m_device_memory(device_memory), - m_view(view) -{ -} - -Texture2D::~Texture2D() -{ - g_command_buffer_mgr->DeferImageViewDestruction(m_view); - - // If we don't have device memory allocated, the image is not owned by us (e.g. swapchain) - if (m_device_memory != VK_NULL_HANDLE) - { - g_command_buffer_mgr->DeferImageDestruction(m_image); - g_command_buffer_mgr->DeferDeviceMemoryDestruction(m_device_memory); - } -} - -std::unique_ptr Texture2D::Create(u32 width, u32 height, u32 levels, u32 layers, - VkFormat format, VkSampleCountFlagBits samples, - VkImageViewType view_type, VkImageTiling tiling, - VkImageUsageFlags usage) -{ - VkImageCreateInfo image_info = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - nullptr, - 0, - VK_IMAGE_TYPE_2D, - format, - {width, height, 1}, - levels, - layers, - samples, - tiling, - usage, - VK_SHARING_MODE_EXCLUSIVE, - 0, - nullptr, - VK_IMAGE_LAYOUT_UNDEFINED}; - - VkImage image = VK_NULL_HANDLE; - VkResult res = vkCreateImage(g_vulkan_context->GetDevice(), &image_info, nullptr, &image); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateImage failed: "); - return nullptr; - } - - // Allocate memory to back this texture, we want device local memory in this case - VkMemoryRequirements memory_requirements; - vkGetImageMemoryRequirements(g_vulkan_context->GetDevice(), image, &memory_requirements); - - VkMemoryAllocateInfo memory_info = { - VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, memory_requirements.size, - g_vulkan_context->GetMemoryType(memory_requirements.memoryTypeBits, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)}; - - VkDeviceMemory device_memory; - res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_info, nullptr, &device_memory); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkAllocateMemory failed: "); - vkDestroyImage(g_vulkan_context->GetDevice(), image, nullptr); - return nullptr; - } - - res = vkBindImageMemory(g_vulkan_context->GetDevice(), image, device_memory, 0); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkBindImageMemory failed: "); - vkDestroyImage(g_vulkan_context->GetDevice(), image, nullptr); - vkFreeMemory(g_vulkan_context->GetDevice(), device_memory, nullptr); - return nullptr; - } - - VkImageViewCreateInfo view_info = { - VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - nullptr, - 0, - image, - view_type, - format, - {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY}, - {Util::IsDepthFormat(format) ? static_cast(VK_IMAGE_ASPECT_DEPTH_BIT) : - static_cast(VK_IMAGE_ASPECT_COLOR_BIT), - 0, levels, 0, layers}}; - - VkImageView view = VK_NULL_HANDLE; - res = vkCreateImageView(g_vulkan_context->GetDevice(), &view_info, nullptr, &view); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateImageView failed: "); - vkDestroyImage(g_vulkan_context->GetDevice(), image, nullptr); - vkFreeMemory(g_vulkan_context->GetDevice(), device_memory, nullptr); - return nullptr; - } - - return std::make_unique(width, height, levels, layers, format, samples, view_type, - image, device_memory, view); -} - -std::unique_ptr Texture2D::CreateFromExistingImage(u32 width, u32 height, u32 levels, - u32 layers, VkFormat format, - VkSampleCountFlagBits samples, - VkImageViewType view_type, - VkImage existing_image) -{ - // Only need to create the image view, this is mainly for swap chains. - VkImageViewCreateInfo view_info = { - VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - nullptr, - 0, - existing_image, - view_type, - format, - {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, - VK_COMPONENT_SWIZZLE_IDENTITY}, - {Util::IsDepthFormat(format) ? static_cast(VK_IMAGE_ASPECT_DEPTH_BIT) : - static_cast(VK_IMAGE_ASPECT_COLOR_BIT), - 0, levels, 0, layers}}; - - // Memory is managed by the owner of the image. - VkDeviceMemory memory = VK_NULL_HANDLE; - VkImageView view = VK_NULL_HANDLE; - VkResult res = vkCreateImageView(g_vulkan_context->GetDevice(), &view_info, nullptr, &view); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateImageView failed: "); - return nullptr; - } - - return std::make_unique(width, height, levels, layers, format, samples, view_type, - existing_image, memory, view); -} - -void Texture2D::OverrideImageLayout(VkImageLayout new_layout) -{ - m_layout = new_layout; -} - -void Texture2D::TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout) -{ - if (m_layout == new_layout) - return; - - VkImageMemoryBarrier barrier = { - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkAccessFlags srcAccessMask - 0, // VkAccessFlags dstAccessMask - m_layout, // VkImageLayout oldLayout - new_layout, // VkImageLayout newLayout - VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex - VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex - m_image, // VkImage image - {Util::GetImageAspectForFormat(m_format), 0, m_levels, 0, - m_layers} // VkImageSubresourceRange subresourceRange - }; - - // srcStageMask -> Stages that must complete before the barrier - // dstStageMask -> Stages that must wait for after the barrier before beginning - VkPipelineStageFlags srcStageMask, dstStageMask; - switch (m_layout) - { - case VK_IMAGE_LAYOUT_UNDEFINED: - // Layout undefined therefore contents undefined, and we don't care what happens to it. - barrier.srcAccessMask = 0; - srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - break; - - case VK_IMAGE_LAYOUT_PREINITIALIZED: - // Image has been pre-initialized by the host, so ensure all writes have completed. - barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_HOST_BIT; - break; - - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - // Image was being used as a color attachment, so ensure all writes have completed. - barrier.srcAccessMask = - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - break; - - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - // Image was being used as a depthstencil attachment, so ensure all writes have completed. - barrier.srcAccessMask = - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - srcStageMask = - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - break; - - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - // Image was being used as a shader resource, make sure all reads have finished. - barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; - srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - break; - - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - // Image was being used as a copy source, ensure all reads have finished. - barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - break; - - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - // Image was being used as a copy destination, ensure all writes have finished. - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - break; - - default: - srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - break; - } - - switch (new_layout) - { - case VK_IMAGE_LAYOUT_UNDEFINED: - barrier.dstAccessMask = 0; - dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - break; - - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - barrier.dstAccessMask = - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - break; - - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - barrier.dstAccessMask = - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - dstStageMask = - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - break; - - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - break; - - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - break; - - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - break; - - case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: - srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - break; - - default: - dstStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - break; - } - - // If we were using a compute layout, the stages need to reflect that - switch (m_compute_layout) - { - case ComputeImageLayout::Undefined: - break; - case ComputeImageLayout::ReadOnly: - barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; - srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - break; - case ComputeImageLayout::WriteOnly: - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - break; - case ComputeImageLayout::ReadWrite: - barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - break; - } - m_compute_layout = ComputeImageLayout::Undefined; - - vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1, - &barrier); - - m_layout = new_layout; -} - -void Texture2D::TransitionToLayout(VkCommandBuffer command_buffer, ComputeImageLayout new_layout) -{ - ASSERT(new_layout != ComputeImageLayout::Undefined); - if (m_compute_layout == new_layout) - return; - - VkImageMemoryBarrier barrier = { - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkAccessFlags srcAccessMask - 0, // VkAccessFlags dstAccessMask - m_layout, // VkImageLayout oldLayout - VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout - VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex - VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex - m_image, // VkImage image - {Util::GetImageAspectForFormat(m_format), 0, m_levels, 0, - m_layers} // VkImageSubresourceRange subresourceRange - }; - - VkPipelineStageFlags srcStageMask, dstStageMask; - switch (m_layout) - { - case VK_IMAGE_LAYOUT_UNDEFINED: - // Layout undefined therefore contents undefined, and we don't care what happens to it. - barrier.srcAccessMask = 0; - srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - break; - - case VK_IMAGE_LAYOUT_PREINITIALIZED: - // Image has been pre-initialized by the host, so ensure all writes have completed. - barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_HOST_BIT; - break; - - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - // Image was being used as a color attachment, so ensure all writes have completed. - barrier.srcAccessMask = - VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - break; - - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - // Image was being used as a depthstencil attachment, so ensure all writes have completed. - barrier.srcAccessMask = - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - srcStageMask = - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - break; - - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - // Image was being used as a shader resource, make sure all reads have finished. - barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; - srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - break; - - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: - // Image was being used as a copy source, ensure all reads have finished. - barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - break; - - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: - // Image was being used as a copy destination, ensure all writes have finished. - barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; - break; - - default: - srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - break; - } - - switch (new_layout) - { - case ComputeImageLayout::ReadOnly: - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - break; - case ComputeImageLayout::WriteOnly: - barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - break; - case ComputeImageLayout::ReadWrite: - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - break; - default: - dstStageMask = 0; - break; - } - - m_layout = barrier.newLayout; - m_compute_layout = new_layout; - - vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1, - &barrier); -} - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Texture2D.h b/Source/Core/VideoBackends/Vulkan/Texture2D.h deleted file mode 100644 index 3fce48d758..0000000000 --- a/Source/Core/VideoBackends/Vulkan/Texture2D.h +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "Common/CommonTypes.h" -#include "VideoBackends/Vulkan/Constants.h" - -namespace Vulkan -{ -class CommandBufferManager; -class ObjectCache; - -class Texture2D -{ -public: - // Custom image layouts, mainly used for switching to/from compute - enum class ComputeImageLayout - { - Undefined, - ReadOnly, - WriteOnly, - ReadWrite - }; - - Texture2D(u32 width, u32 height, u32 levels, u32 layers, VkFormat format, - VkSampleCountFlagBits samples, VkImageViewType view_type, VkImage image, - VkDeviceMemory device_memory, VkImageView view); - ~Texture2D(); - - static std::unique_ptr Create(u32 width, u32 height, u32 levels, u32 layers, - VkFormat format, VkSampleCountFlagBits samples, - VkImageViewType view_type, VkImageTiling tiling, - VkImageUsageFlags usage); - - static std::unique_ptr CreateFromExistingImage(u32 width, u32 height, u32 levels, - u32 layers, VkFormat format, - VkSampleCountFlagBits samples, - VkImageViewType view_type, - VkImage existing_image); - - u32 GetWidth() const { return m_width; } - u32 GetHeight() const { return m_height; } - u32 GetLevels() const { return m_levels; } - u32 GetLayers() const { return m_layers; } - VkFormat GetFormat() const { return m_format; } - VkSampleCountFlagBits GetSamples() const { return m_samples; } - VkImageLayout GetLayout() const { return m_layout; } - VkImageViewType GetViewType() const { return m_view_type; } - VkImage GetImage() const { return m_image; } - VkDeviceMemory GetDeviceMemory() const { return m_device_memory; } - VkImageView GetView() const { return m_view; } - // Used when the render pass is changing the image layout, or to force it to - // VK_IMAGE_LAYOUT_UNDEFINED, if the existing contents of the image is - // irrelevant and will not be loaded. - void OverrideImageLayout(VkImageLayout new_layout); - - void TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout); - void TransitionToLayout(VkCommandBuffer command_buffer, ComputeImageLayout new_layout); - -private: - u32 m_width; - u32 m_height; - u32 m_levels; - u32 m_layers; - VkFormat m_format; - VkSampleCountFlagBits m_samples; - VkImageViewType m_view_type; - VkImageLayout m_layout = VK_IMAGE_LAYOUT_UNDEFINED; - ComputeImageLayout m_compute_layout = ComputeImageLayout::Undefined; - - VkImage m_image; - VkDeviceMemory m_device_memory; - VkImageView m_view; -}; -} diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp deleted file mode 100644 index 3f26b28a7c..0000000000 --- a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp +++ /dev/null @@ -1,316 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/Vulkan/TextureCache.h" - -#include -#include -#include -#include - -#include "Common/Assert.h" -#include "Common/CommonFuncs.h" -#include "Common/Logging/Log.h" -#include "Common/MsgHandler.h" - -#include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/FramebufferManager.h" -#include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/Renderer.h" -#include "VideoBackends/Vulkan/StateTracker.h" -#include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoBackends/Vulkan/Texture2D.h" -#include "VideoBackends/Vulkan/TextureConverter.h" -#include "VideoBackends/Vulkan/Util.h" -#include "VideoBackends/Vulkan/VKTexture.h" -#include "VideoBackends/Vulkan/VulkanContext.h" - -#include "VideoCommon/ImageWrite.h" -#include "VideoCommon/TextureConfig.h" - -namespace Vulkan -{ -TextureCache::TextureCache() -{ -} - -TextureCache::~TextureCache() -{ - TextureCache::DeleteShaders(); -} - -VkShaderModule TextureCache::GetCopyShader() const -{ - return m_copy_shader; -} - -TextureCache* TextureCache::GetInstance() -{ - return static_cast(g_texture_cache.get()); -} - -bool TextureCache::Initialize() -{ - m_texture_converter = std::make_unique(); - if (!m_texture_converter->Initialize()) - { - PanicAlert("Failed to initialize texture converter"); - return false; - } - - if (!CompileShaders()) - { - PanicAlert("Failed to compile one or more shaders"); - return false; - } - - return true; -} - -void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source, - const void* palette, TLUTFormat format) -{ - m_texture_converter->ConvertTexture(destination, source, palette, format); - - // Ensure both textures remain in the SHADER_READ_ONLY layout so they can be bound. - static_cast(source->texture.get()) - ->GetRawTexIdentifier() - ->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - static_cast(destination->texture.get()) - ->GetRawTexIdentifier() - ->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); -} - -void TextureCache::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, - u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half, float y_scale, - float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) -{ - // Flush EFB pokes first, as they're expected to be included. - FramebufferManager::GetInstance()->FlushEFBPokes(); - - // MSAA case where we need to resolve first. - // An out-of-bounds source region is valid here, and fine for the draw (since it is converted - // to texture coordinates), but it's not valid to resolve an out-of-range rectangle. - TargetRectangle scaled_src_rect = g_renderer->ConvertEFBRectangle(src_rect); - VkRect2D region = {{scaled_src_rect.left, scaled_src_rect.top}, - {static_cast(scaled_src_rect.GetWidth()), - static_cast(scaled_src_rect.GetHeight())}}; - region = Util::ClampRect2D(region, FramebufferManager::GetInstance()->GetEFBWidth(), - FramebufferManager::GetInstance()->GetEFBHeight()); - Texture2D* src_texture; - if (params.depth) - src_texture = FramebufferManager::GetInstance()->ResolveEFBDepthTexture(region); - else - src_texture = FramebufferManager::GetInstance()->ResolveEFBColorTexture(region); - - // End render pass before barrier (since we have no self-dependencies). - // The barrier has to happen after the render pass, not inside it, as we are going to be - // reading from the texture immediately afterwards. - StateTracker::GetInstance()->EndRenderPass(); - - // Transition to shader resource before reading. - VkImageLayout original_layout = src_texture->GetLayout(); - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - - m_texture_converter->EncodeTextureToMemory( - src_texture->GetView(), dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, - src_rect, scale_by_half, y_scale, gamma, clamp_top, clamp_bottom, filter_coefficients); - - // Transition back to original state - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), original_layout); - - StateTracker::GetInstance()->OnEFBCopyToRAM(); -} - -bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) -{ - return m_texture_converter->SupportsTextureDecoding(format, palette_format); -} - -void TextureCache::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, - size_t data_size, TextureFormat format, u32 width, u32 height, - u32 aligned_width, u32 aligned_height, u32 row_stride, - const u8* palette, TLUTFormat palette_format) -{ - // Group compute shader dispatches together in the init command buffer. That way we don't have to - // pay a penalty for switching from graphics->compute, or end/restart our render pass. - VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentInitCommandBuffer(); - m_texture_converter->DecodeTexture(command_buffer, entry, dst_level, data, data_size, format, - width, height, aligned_width, aligned_height, row_stride, - palette, palette_format); - - // Last mip level? Ensure the texture is ready for use. - if (dst_level == (entry->GetNumLevels() - 1)) - { - static_cast(entry->texture.get()) - ->GetRawTexIdentifier() - ->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - } -} - -bool TextureCache::CompileShaders() -{ - static const char COPY_SHADER_SOURCE[] = R"( - layout(set = 1, binding = 0) uniform sampler2DArray samp0; - - layout(location = 0) in float3 uv0; - layout(location = 1) in float4 col0; - layout(location = 0) out float4 ocol0; - - void main() - { - ocol0 = texture(samp0, uv0); - } - )"; - - std::string header = g_shader_cache->GetUtilityShaderHeader(); - std::string source = header + COPY_SHADER_SOURCE; - - m_copy_shader = Util::CompileAndCreateFragmentShader(source); - - return m_copy_shader != VK_NULL_HANDLE; -} - -void TextureCache::DeleteShaders() -{ - // It is safe to destroy shader modules after they are consumed by creating a pipeline. - // Therefore, no matter where this function is called from, it won't cause an issue due to - // pending commands, although at the time of writing should only be called at the end of - // a frame. See Vulkan spec, section 2.3.1. Object Lifetime. - if (m_copy_shader != VK_NULL_HANDLE) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_copy_shader, nullptr); - m_copy_shader = VK_NULL_HANDLE; - } - - for (auto& shader : m_efb_copy_to_tex_shaders) - { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), shader.second, nullptr); - } - m_efb_copy_to_tex_shaders.clear(); -} - -void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, - const EFBRectangle& src_rect, bool scale_by_half, - EFBCopyFormat dst_format, bool is_intensity, float gamma, - bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) -{ - VKTexture* texture = static_cast(entry->texture.get()); - - // A better way of doing this would be nice. - FramebufferManager* framebuffer_mgr = - static_cast(g_framebuffer_manager.get()); - TargetRectangle scaled_src_rect = g_renderer->ConvertEFBRectangle(src_rect); - - // Flush EFB pokes first, as they're expected to be included. - framebuffer_mgr->FlushEFBPokes(); - - // Has to be flagged as a render target. - ASSERT(texture->GetFramebuffer() != VK_NULL_HANDLE); - - // Can't be done in a render pass, since we're doing our own render pass! - VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); - StateTracker::GetInstance()->EndRenderPass(); - - // Fill uniform buffer. - struct PixelUniforms - { - float filter_coefficients[3]; - float gamma_rcp; - float clamp_top; - float clamp_bottom; - float pixel_height; - u32 padding; - }; - PixelUniforms uniforms; - for (size_t i = 0; i < filter_coefficients.size(); i++) - uniforms.filter_coefficients[i] = filter_coefficients[i]; - uniforms.gamma_rcp = 1.0f / gamma; - uniforms.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f; - uniforms.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f; - uniforms.pixel_height = - g_ActiveConfig.bCopyEFBScaled ? 1.0f / g_renderer->GetTargetHeight() : 1.0f / EFB_HEIGHT; - uniforms.padding = 0; - - // Transition EFB to shader resource before binding. - // An out-of-bounds source region is valid here, and fine for the draw (since it is converted - // to texture coordinates), but it's not valid to resolve an out-of-range rectangle. - VkRect2D region = {{scaled_src_rect.left, scaled_src_rect.top}, - {static_cast(scaled_src_rect.GetWidth()), - static_cast(scaled_src_rect.GetHeight())}}; - region = Util::ClampRect2D(region, FramebufferManager::GetInstance()->GetEFBWidth(), - FramebufferManager::GetInstance()->GetEFBHeight()); - Texture2D* src_texture; - if (is_depth_copy) - src_texture = FramebufferManager::GetInstance()->ResolveEFBDepthTexture(region); - else - src_texture = FramebufferManager::GetInstance()->ResolveEFBColorTexture(region); - - VkSampler src_sampler = - scale_by_half ? g_object_cache->GetLinearSampler() : g_object_cache->GetPointSampler(); - VkImageLayout original_layout = src_texture->GetLayout(); - src_texture->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - texture->GetRawTexIdentifier()->TransitionToLayout(command_buffer, - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity, - scale_by_half, - NeedsCopyFilterInShader(filter_coefficients)); - - auto it = m_efb_copy_to_tex_shaders.emplace(uid, VkShaderModule(VK_NULL_HANDLE)); - VkShaderModule& shader = it.first->second; - bool created = it.second; - - if (created) - { - std::string source = g_shader_cache->GetUtilityShaderHeader(); - source += - TextureConversionShaderGen::GenerateShader(APIType::Vulkan, uid.GetUidData()).GetBuffer(); - - shader = Util::CompileAndCreateFragmentShader(source); - } - - VkRenderPass render_pass = g_object_cache->GetRenderPass( - texture->GetRawTexIdentifier()->GetFormat(), VK_FORMAT_UNDEFINED, - texture->GetRawTexIdentifier()->GetSamples(), VK_ATTACHMENT_LOAD_OP_DONT_CARE); - - UtilityShaderDraw draw(command_buffer, - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD), render_pass, - g_shader_cache->GetPassthroughVertexShader(), - g_shader_cache->GetPassthroughGeometryShader(), shader); - - u8* ubo_ptr = draw.AllocatePSUniforms(sizeof(PixelUniforms)); - std::memcpy(ubo_ptr, &uniforms, sizeof(PixelUniforms)); - draw.CommitPSUniforms(sizeof(PixelUniforms)); - - draw.SetPSSampler(0, src_texture->GetView(), src_sampler); - - VkRect2D dest_region = {{0, 0}, {texture->GetConfig().width, texture->GetConfig().height}}; - - draw.BeginRenderPass(texture->GetFramebuffer(), dest_region); - - draw.DrawQuad(0, 0, texture->GetConfig().width, texture->GetConfig().height, scaled_src_rect.left, - scaled_src_rect.top, 0, scaled_src_rect.GetWidth(), scaled_src_rect.GetHeight(), - framebuffer_mgr->GetEFBWidth(), framebuffer_mgr->GetEFBHeight()); - - draw.EndRenderPass(); - - // We touched everything, so put it back. - StateTracker::GetInstance()->SetPendingRebind(); - - // Transition the EFB back to its original layout. - src_texture->TransitionToLayout(command_buffer, original_layout); - - // Ensure texture is in SHADER_READ_ONLY layout, ready for usage. - texture->GetRawTexIdentifier()->TransitionToLayout(command_buffer, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); -} - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.h b/Source/Core/VideoBackends/Vulkan/TextureCache.h deleted file mode 100644 index 35b3b6c360..0000000000 --- a/Source/Core/VideoBackends/Vulkan/TextureCache.h +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "Common/CommonTypes.h" -#include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoCommon/TextureCacheBase.h" -#include "VideoCommon/TextureConverterShaderGen.h" - -namespace Vulkan -{ -class TextureConverter; -class StateTracker; -class Texture2D; -class VKTexture; - -class TextureCache : public TextureCacheBase -{ -public: - TextureCache(); - ~TextureCache(); - - static TextureCache* GetInstance(); - - TextureConverter* GetTextureConverter() const { return m_texture_converter.get(); } - bool Initialize(); - - bool CompileShaders() override; - void DeleteShaders() override; - - void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette, - TLUTFormat format) override; - - void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override; - - bool SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) override; - - void DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, size_t data_size, - TextureFormat format, u32 width, u32 height, u32 aligned_width, - u32 aligned_height, u32 row_stride, const u8* palette, - TLUTFormat palette_format) override; - - VkShaderModule GetCopyShader() const; - -private: - void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, - bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, - float gamma, bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) override; - - std::unique_ptr m_texture_converter; - - VkShaderModule m_copy_shader = VK_NULL_HANDLE; - std::map m_efb_copy_to_tex_shaders; -}; - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp b/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp deleted file mode 100644 index c5acdc8e7f..0000000000 --- a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp +++ /dev/null @@ -1,634 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/Vulkan/TextureConverter.h" - -#include -#include -#include -#include -#include - -#include "Common/Assert.h" -#include "Common/CommonFuncs.h" -#include "Common/CommonTypes.h" -#include "Common/Logging/Log.h" -#include "Common/MsgHandler.h" - -#include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/FramebufferManager.h" -#include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/StateTracker.h" -#include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoBackends/Vulkan/Texture2D.h" -#include "VideoBackends/Vulkan/Util.h" -#include "VideoBackends/Vulkan/VKTexture.h" -#include "VideoBackends/Vulkan/VulkanContext.h" - -#include "VideoCommon/TextureConversionShader.h" -#include "VideoCommon/TextureDecoder.h" -#include "VideoCommon/VideoConfig.h" - -namespace Vulkan -{ -namespace -{ -struct EFBEncodeParams -{ - std::array position_uniform; - float y_scale; - float gamma_rcp; - float clamp_top; - float clamp_bottom; - float filter_coefficients[3]; - u32 padding; -}; -} // namespace -TextureConverter::TextureConverter() -{ -} - -TextureConverter::~TextureConverter() -{ - for (const auto& it : m_palette_conversion_shaders) - { - if (it != VK_NULL_HANDLE) - vkDestroyShaderModule(g_vulkan_context->GetDevice(), it, nullptr); - } - - if (m_texel_buffer_view_r8_uint != VK_NULL_HANDLE) - vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r8_uint, nullptr); - if (m_texel_buffer_view_r16_uint != VK_NULL_HANDLE) - vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r16_uint, nullptr); - if (m_texel_buffer_view_r32g32_uint != VK_NULL_HANDLE) - vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r32g32_uint, nullptr); - if (m_texel_buffer_view_rgba8_unorm != VK_NULL_HANDLE) - vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_rgba8_unorm, nullptr); - if (m_texel_buffer_view_rgba8_uint != VK_NULL_HANDLE) - vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_rgba8_uint, nullptr); - - for (auto& it : m_encoding_shaders) - vkDestroyShaderModule(g_vulkan_context->GetDevice(), it.second, nullptr); - - for (const auto& it : m_decoding_pipelines) - { - if (it.second.compute_shader != VK_NULL_HANDLE) - vkDestroyShaderModule(g_vulkan_context->GetDevice(), it.second.compute_shader, nullptr); - } -} - -bool TextureConverter::Initialize() -{ - if (!CreateTexelBuffer()) - { - PanicAlert("Failed to create uniform buffer"); - return false; - } - - if (!CompilePaletteConversionShaders()) - { - PanicAlert("Failed to compile palette conversion shaders"); - return false; - } - - if (!CreateEncodingTexture()) - { - PanicAlert("Failed to create encoding texture"); - return false; - } - - if (!CreateDecodingTexture()) - { - PanicAlert("Failed to create decoding texture"); - return false; - } - - return true; -} - -bool TextureConverter::ReserveTexelBufferStorage(size_t size, size_t alignment) -{ - // Enforce the minimum alignment for texture buffers on the device. - size_t actual_alignment = - std::max(static_cast(g_vulkan_context->GetTexelBufferAlignment()), alignment); - if (m_texel_buffer->ReserveMemory(size, actual_alignment)) - return true; - - WARN_LOG(VIDEO, "Executing command list while waiting for space in palette buffer"); - Util::ExecuteCurrentCommandsAndRestoreState(false); - - // This next call should never fail, since a command buffer is now in-flight and we can - // wait on the fence for the GPU to finish. If this returns false, it's probably because - // the device has been lost, which is fatal anyway. - if (!m_texel_buffer->ReserveMemory(size, actual_alignment)) - { - PanicAlert("Failed to allocate space for texture conversion"); - return false; - } - - return true; -} - -VkCommandBuffer -TextureConverter::GetCommandBufferForTextureConversion(const TextureCache::TCacheEntry* src_entry) -{ - // EFB copies can be used as paletted textures as well. For these, we can't assume them to be - // contain the correct data before the frame begins (when the init command buffer is executed), - // so we must convert them at the appropriate time, during the drawing command buffer. - if (src_entry->IsCopy()) - { - StateTracker::GetInstance()->EndRenderPass(); - StateTracker::GetInstance()->SetPendingRebind(); - return g_command_buffer_mgr->GetCurrentCommandBuffer(); - } - else - { - // Use initialization command buffer and perform conversion before the drawing commands. - return g_command_buffer_mgr->GetCurrentInitCommandBuffer(); - } -} - -void TextureConverter::ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry, - TextureCacheBase::TCacheEntry* src_entry, const void* palette, - TLUTFormat palette_format) -{ - struct PSUniformBlock - { - float multiplier; - int texel_buffer_offset; - int pad[2]; - }; - - VKTexture* source_texture = static_cast(src_entry->texture.get()); - VKTexture* destination_texture = static_cast(dst_entry->texture.get()); - - ASSERT(static_cast(palette_format) < NUM_PALETTE_CONVERSION_SHADERS); - ASSERT(destination_texture->GetConfig().rendertarget); - - // We want to align to 2 bytes (R16) or the device's texel buffer alignment, whichever is greater. - size_t palette_size = src_entry->format == TextureFormat::I4 ? 32 : 512; - if (!ReserveTexelBufferStorage(palette_size, sizeof(u16))) - return; - - // Copy in palette to texel buffer. - u32 palette_offset = static_cast(m_texel_buffer->GetCurrentOffset()); - memcpy(m_texel_buffer->GetCurrentHostPointer(), palette, palette_size); - m_texel_buffer->CommitMemory(palette_size); - - VkCommandBuffer command_buffer = GetCommandBufferForTextureConversion(src_entry); - source_texture->GetRawTexIdentifier()->TransitionToLayout( - command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - destination_texture->GetRawTexIdentifier()->TransitionToLayout( - command_buffer, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - // Bind and draw to the destination. - VkRenderPass render_pass = g_object_cache->GetRenderPass( - destination_texture->GetRawTexIdentifier()->GetFormat(), VK_FORMAT_UNDEFINED, - destination_texture->GetRawTexIdentifier()->GetSamples(), VK_ATTACHMENT_LOAD_OP_DONT_CARE); - UtilityShaderDraw draw(command_buffer, - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_TEXTURE_CONVERSION), - render_pass, g_shader_cache->GetScreenQuadVertexShader(), VK_NULL_HANDLE, - m_palette_conversion_shaders[static_cast(palette_format)]); - - VkRect2D region = {{0, 0}, {dst_entry->GetWidth(), dst_entry->GetHeight()}}; - draw.BeginRenderPass(destination_texture->GetFramebuffer(), region); - - PSUniformBlock uniforms = {}; - uniforms.multiplier = src_entry->format == TextureFormat::I4 ? 15.0f : 255.0f; - uniforms.texel_buffer_offset = static_cast(palette_offset / sizeof(u16)); - draw.SetPushConstants(&uniforms, sizeof(uniforms)); - draw.SetPSSampler(0, source_texture->GetRawTexIdentifier()->GetView(), - g_object_cache->GetPointSampler()); - draw.SetPSTexelBuffer(m_texel_buffer_view_r16_uint); - draw.SetViewportAndScissor(0, 0, dst_entry->GetWidth(), dst_entry->GetHeight()); - draw.DrawWithoutVertexBuffer(4); - draw.EndRenderPass(); -} - -void TextureConverter::EncodeTextureToMemory( - VkImageView src_texture, AbstractStagingTexture* dest, const EFBCopyParams& params, - u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, - bool clamp_bottom, const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients) -{ - VkShaderModule shader = GetEncodingShader(params); - if (shader == VK_NULL_HANDLE) - { - ERROR_LOG(VIDEO, "Missing encoding fragment shader for format %u->%u", - static_cast(params.efb_format), static_cast(params.copy_format)); - return; - } - - // Can't do our own draw within a render pass. - StateTracker::GetInstance()->EndRenderPass(); - - static_cast(m_encoding_render_texture.get()) - ->GetRawTexIdentifier() - ->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - VkRenderPass render_pass = g_object_cache->GetRenderPass( - Util::GetVkFormatForHostTextureFormat(m_encoding_render_texture->GetConfig().format), - VK_FORMAT_UNDEFINED, 1, VK_ATTACHMENT_LOAD_OP_DONT_CARE); - UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_PUSH_CONSTANT), - render_pass, g_shader_cache->GetScreenQuadVertexShader(), VK_NULL_HANDLE, - shader); - - // Uniform - int4 of left,top,native_width,scale - EFBEncodeParams encoder_params; - encoder_params.position_uniform[0] = src_rect.left; - encoder_params.position_uniform[1] = src_rect.top; - encoder_params.position_uniform[2] = static_cast(native_width); - encoder_params.position_uniform[3] = scale_by_half ? 2 : 1; - encoder_params.y_scale = y_scale; - encoder_params.gamma_rcp = 1.0f / gamma; - encoder_params.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f; - encoder_params.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f; - for (size_t i = 0; i < filter_coefficients.size(); i++) - encoder_params.filter_coefficients[i] = filter_coefficients[i]; - u8* ubo_ptr = draw.AllocatePSUniforms(sizeof(EFBEncodeParams)); - std::memcpy(ubo_ptr, &encoder_params, sizeof(EFBEncodeParams)); - draw.CommitPSUniforms(sizeof(EFBEncodeParams)); - - // We also linear filtering for both box filtering and downsampling higher resolutions to 1x - // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more - // complex down filtering to average all pixels and produce the correct result. - bool linear_filter = - (scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f; - draw.SetPSSampler(0, src_texture, - linear_filter ? g_object_cache->GetLinearSampler() : - g_object_cache->GetPointSampler()); - - u32 render_width = bytes_per_row / sizeof(u32); - u32 render_height = num_blocks_y; - Util::SetViewportAndScissor(g_command_buffer_mgr->GetCurrentCommandBuffer(), 0, 0, render_width, - render_height); - - VkRect2D render_region = {{0, 0}, {render_width, render_height}}; - draw.BeginRenderPass(static_cast(m_encoding_render_texture.get())->GetFramebuffer(), - render_region); - draw.DrawWithoutVertexBuffer(4); - draw.EndRenderPass(); - - MathUtil::Rectangle copy_rect(0, 0, render_width, render_height); - dest->CopyFromTexture(m_encoding_render_texture.get(), copy_rect, 0, 0, copy_rect); -} - -bool TextureConverter::SupportsTextureDecoding(TextureFormat format, TLUTFormat palette_format) -{ - auto key = std::make_pair(format, palette_format); - auto iter = m_decoding_pipelines.find(key); - if (iter != m_decoding_pipelines.end()) - return iter->second.valid; - - TextureDecodingPipeline pipeline; - pipeline.base_info = TextureConversionShaderTiled::GetDecodingShaderInfo(format); - pipeline.compute_shader = VK_NULL_HANDLE; - pipeline.valid = false; - - if (!pipeline.base_info) - { - m_decoding_pipelines.emplace(key, pipeline); - return false; - } - - std::string shader_source = - TextureConversionShaderTiled::GenerateDecodingShader(format, palette_format, APIType::Vulkan); - - pipeline.compute_shader = Util::CompileAndCreateComputeShader(shader_source); - if (pipeline.compute_shader == VK_NULL_HANDLE) - { - m_decoding_pipelines.emplace(key, pipeline); - return false; - } - - pipeline.valid = true; - m_decoding_pipelines.emplace(key, pipeline); - return true; -} - -void TextureConverter::DecodeTexture(VkCommandBuffer command_buffer, - TextureCache::TCacheEntry* entry, u32 dst_level, - const u8* data, size_t data_size, TextureFormat format, - u32 width, u32 height, u32 aligned_width, u32 aligned_height, - u32 row_stride, const u8* palette, TLUTFormat palette_format) -{ - VKTexture* destination_texture = static_cast(entry->texture.get()); - auto key = std::make_pair(format, palette_format); - auto iter = m_decoding_pipelines.find(key); - if (iter == m_decoding_pipelines.end()) - return; - - struct PushConstants - { - u32 dst_size[2]; - u32 src_size[2]; - u32 src_offset; - u32 src_row_stride; - u32 palette_offset; - }; - - // Copy to GPU-visible buffer, aligned to the data type - auto info = iter->second; - u32 bytes_per_buffer_elem = - TextureConversionShaderTiled::GetBytesPerBufferElement(info.base_info->buffer_format); - - // Calculate total data size, including palette. - // Only copy palette if it is required. - u32 total_upload_size = static_cast(data_size); - u32 palette_size = iter->second.base_info->palette_size; - u32 palette_offset = total_upload_size; - bool has_palette = palette_size > 0; - if (has_palette) - { - // Align to u16. - if ((total_upload_size % sizeof(u16)) != 0) - { - total_upload_size++; - palette_offset++; - } - - total_upload_size += palette_size; - } - - // Allocate space for upload, if it fails, execute the buffer. - if (!m_texel_buffer->ReserveMemory(total_upload_size, bytes_per_buffer_elem)) - { - Util::ExecuteCurrentCommandsAndRestoreState(true, false); - if (!m_texel_buffer->ReserveMemory(total_upload_size, bytes_per_buffer_elem)) - PanicAlert("Failed to reserve memory for encoded texture upload"); - } - - // Copy/commit upload buffer. - u32 texel_buffer_offset = static_cast(m_texel_buffer->GetCurrentOffset()); - - Util::BufferMemoryBarrier(g_command_buffer_mgr->GetCurrentCommandBuffer(), - m_texel_buffer->GetBuffer(), VK_ACCESS_SHADER_READ_BIT, - VK_ACCESS_HOST_WRITE_BIT, texel_buffer_offset, total_upload_size, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT); - - std::memcpy(m_texel_buffer->GetCurrentHostPointer(), data, data_size); - if (has_palette) - std::memcpy(m_texel_buffer->GetCurrentHostPointer() + palette_offset, palette, palette_size); - m_texel_buffer->CommitMemory(total_upload_size); - - Util::BufferMemoryBarrier(g_command_buffer_mgr->GetCurrentCommandBuffer(), - m_texel_buffer->GetBuffer(), VK_ACCESS_HOST_WRITE_BIT, - VK_ACCESS_SHADER_READ_BIT, texel_buffer_offset, total_upload_size, - VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); - - // Determine uniforms. - PushConstants constants = { - {width, height}, - {aligned_width, aligned_height}, - texel_buffer_offset / bytes_per_buffer_elem, - row_stride / bytes_per_buffer_elem, - static_cast((texel_buffer_offset + palette_offset) / sizeof(u16))}; - - // Determine view to use for texel buffers. - VkBufferView data_view = VK_NULL_HANDLE; - switch (iter->second.base_info->buffer_format) - { - case TextureConversionShaderTiled::BUFFER_FORMAT_R8_UINT: - data_view = m_texel_buffer_view_r8_uint; - break; - case TextureConversionShaderTiled::BUFFER_FORMAT_R16_UINT: - data_view = m_texel_buffer_view_r16_uint; - break; - case TextureConversionShaderTiled::BUFFER_FORMAT_R32G32_UINT: - data_view = m_texel_buffer_view_r32g32_uint; - break; - case TextureConversionShaderTiled::BUFFER_FORMAT_RGBA8_UINT: - data_view = m_texel_buffer_view_rgba8_uint; - break; - default: - break; - } - - // Dispatch compute to temporary texture. - ComputeShaderDispatcher dispatcher(command_buffer, - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_COMPUTE), - iter->second.compute_shader); - m_decoding_texture->TransitionToLayout(command_buffer, Texture2D::ComputeImageLayout::WriteOnly); - dispatcher.SetPushConstants(&constants, sizeof(constants)); - dispatcher.SetStorageImage(m_decoding_texture->GetView(), m_decoding_texture->GetLayout()); - dispatcher.SetTexelBuffer(0, data_view); - if (has_palette) - dispatcher.SetTexelBuffer(1, m_texel_buffer_view_r16_uint); - auto groups = TextureConversionShaderTiled::GetDispatchCount(iter->second.base_info, - aligned_width, aligned_height); - dispatcher.Dispatch(groups.first, groups.second, 1); - - // Copy from temporary texture to final destination. - Texture2D* vulkan_tex_identifier = destination_texture->GetRawTexIdentifier(); - m_decoding_texture->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - vulkan_tex_identifier->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - VkImageCopy image_copy = {{VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}, - {0, 0, 0}, - {VK_IMAGE_ASPECT_COLOR_BIT, dst_level, 0, 1}, - {0, 0, 0}, - {width, height, 1}}; - vkCmdCopyImage(command_buffer, m_decoding_texture->GetImage(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, vulkan_tex_identifier->GetImage(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy); -} - -bool TextureConverter::CreateTexelBuffer() -{ - // Prefer an 8MB buffer if possible, but use less if the device doesn't support this. - // This buffer is potentially going to be addressed as R8s in the future, so we assume - // that one element is one byte. - m_texel_buffer_size = - std::min(TEXTURE_CONVERSION_TEXEL_BUFFER_SIZE, - static_cast(g_vulkan_context->GetDeviceLimits().maxTexelBufferElements)); - - m_texel_buffer = StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, - m_texel_buffer_size, m_texel_buffer_size); - if (!m_texel_buffer) - return false; - - // Create views of the formats that we will be using. - m_texel_buffer_view_r8_uint = CreateTexelBufferView(VK_FORMAT_R8_UINT); - m_texel_buffer_view_r16_uint = CreateTexelBufferView(VK_FORMAT_R16_UINT); - m_texel_buffer_view_r32g32_uint = CreateTexelBufferView(VK_FORMAT_R32G32_UINT); - m_texel_buffer_view_rgba8_unorm = CreateTexelBufferView(VK_FORMAT_R8G8B8A8_UNORM); - m_texel_buffer_view_rgba8_uint = CreateTexelBufferView(VK_FORMAT_R8G8B8A8_UINT); - return m_texel_buffer_view_r8_uint != VK_NULL_HANDLE && - m_texel_buffer_view_r16_uint != VK_NULL_HANDLE && - m_texel_buffer_view_r32g32_uint != VK_NULL_HANDLE && - m_texel_buffer_view_rgba8_unorm != VK_NULL_HANDLE && - m_texel_buffer_view_rgba8_uint != VK_NULL_HANDLE; -} - -VkBufferView TextureConverter::CreateTexelBufferView(VkFormat format) const -{ - // Create a view of the whole buffer, we'll offset our texel load into it - VkBufferViewCreateInfo view_info = { - VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, // VkStructureType sType - nullptr, // const void* pNext - 0, // VkBufferViewCreateFlags flags - m_texel_buffer->GetBuffer(), // VkBuffer buffer - format, // VkFormat format - 0, // VkDeviceSize offset - m_texel_buffer_size // VkDeviceSize range - }; - - VkBufferView view; - VkResult res = vkCreateBufferView(g_vulkan_context->GetDevice(), &view_info, nullptr, &view); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateBufferView failed: "); - return VK_NULL_HANDLE; - } - - return view; -} - -bool TextureConverter::CompilePaletteConversionShaders() -{ - static const char PALETTE_CONVERSION_FRAGMENT_SHADER_SOURCE[] = R"( - layout(std140, push_constant) uniform PCBlock - { - float multiplier; - int texture_buffer_offset; - } PC; - - SAMPLER_BINDING(0) uniform sampler2DArray samp0; - TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp1; - - layout(location = 0) in vec3 f_uv0; - layout(location = 0) out vec4 ocol0; - - int Convert3To8(int v) - { - // Swizzle bits: 00000123 -> 12312312 - return (v << 5) | (v << 2) | (v >> 1); - } - int Convert4To8(int v) - { - // Swizzle bits: 00001234 -> 12341234 - return (v << 4) | v; - } - int Convert5To8(int v) - { - // Swizzle bits: 00012345 -> 12345123 - return (v << 3) | (v >> 2); - } - int Convert6To8(int v) - { - // Swizzle bits: 00123456 -> 12345612 - return (v << 2) | (v >> 4); - } - float4 DecodePixel_RGB5A3(int val) - { - int r,g,b,a; - if ((val&0x8000) > 0) - { - r=Convert5To8((val>>10) & 0x1f); - g=Convert5To8((val>>5 ) & 0x1f); - b=Convert5To8((val ) & 0x1f); - a=0xFF; - } - else - { - a=Convert3To8((val>>12) & 0x7); - r=Convert4To8((val>>8 ) & 0xf); - g=Convert4To8((val>>4 ) & 0xf); - b=Convert4To8((val ) & 0xf); - } - return float4(r, g, b, a) / 255.0; - } - float4 DecodePixel_RGB565(int val) - { - int r, g, b, a; - r = Convert5To8((val >> 11) & 0x1f); - g = Convert6To8((val >> 5) & 0x3f); - b = Convert5To8((val) & 0x1f); - a = 0xFF; - return float4(r, g, b, a) / 255.0; - } - float4 DecodePixel_IA8(int val) - { - int i = val & 0xFF; - int a = val >> 8; - return float4(i, i, i, a) / 255.0; - } - void main() - { - int src = int(round(texture(samp0, f_uv0).r * PC.multiplier)); - src = int(texelFetch(samp1, src + PC.texture_buffer_offset).r); - src = ((src << 8) & 0xFF00) | (src >> 8); - ocol0 = DECODE(src); - } - - )"; - - std::string palette_ia8_program = StringFromFormat("%s\n%s", "#define DECODE DecodePixel_IA8", - PALETTE_CONVERSION_FRAGMENT_SHADER_SOURCE); - std::string palette_rgb565_program = StringFromFormat( - "%s\n%s", "#define DECODE DecodePixel_RGB565", PALETTE_CONVERSION_FRAGMENT_SHADER_SOURCE); - std::string palette_rgb5a3_program = StringFromFormat( - "%s\n%s", "#define DECODE DecodePixel_RGB5A3", PALETTE_CONVERSION_FRAGMENT_SHADER_SOURCE); - - m_palette_conversion_shaders[static_cast(TLUTFormat::IA8)] = - Util::CompileAndCreateFragmentShader(palette_ia8_program); - m_palette_conversion_shaders[static_cast(TLUTFormat::RGB565)] = - Util::CompileAndCreateFragmentShader(palette_rgb565_program); - m_palette_conversion_shaders[static_cast(TLUTFormat::RGB5A3)] = - Util::CompileAndCreateFragmentShader(palette_rgb5a3_program); - - return m_palette_conversion_shaders[static_cast(TLUTFormat::IA8)] != VK_NULL_HANDLE && - m_palette_conversion_shaders[static_cast(TLUTFormat::RGB565)] != VK_NULL_HANDLE && - m_palette_conversion_shaders[static_cast(TLUTFormat::RGB5A3)] != VK_NULL_HANDLE; -} - -VkShaderModule TextureConverter::CompileEncodingShader(const EFBCopyParams& params) -{ - const char* shader = - TextureConversionShaderTiled::GenerateEncodingShader(params, APIType::Vulkan); - VkShaderModule module = Util::CompileAndCreateFragmentShader(shader); - if (module == VK_NULL_HANDLE) - PanicAlert("Failed to compile texture encoding shader."); - - return module; -} - -VkShaderModule TextureConverter::GetEncodingShader(const EFBCopyParams& params) -{ - auto iter = m_encoding_shaders.find(params); - if (iter != m_encoding_shaders.end()) - return iter->second; - - VkShaderModule shader = CompileEncodingShader(params); - m_encoding_shaders.emplace(params, shader); - return shader; -} - -bool TextureConverter::CreateEncodingTexture() -{ - m_encoding_render_texture = g_renderer->CreateTexture(TextureCache::GetEncodingTextureConfig()); - return m_encoding_render_texture != nullptr; -} - -bool TextureConverter::CreateDecodingTexture() -{ - m_decoding_texture = Texture2D::Create( - DECODING_TEXTURE_WIDTH, DECODING_TEXTURE_HEIGHT, 1, 1, VK_FORMAT_R8G8B8A8_UNORM, - VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT); - if (!m_decoding_texture) - return false; - - VkClearColorValue clear_value = {{0.0f, 0.0f, 0.0f, 1.0f}}; - VkImageSubresourceRange clear_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - m_decoding_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - vkCmdClearColorImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - m_decoding_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - &clear_value, 1, &clear_range); - return true; -} -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/TextureConverter.h b/Source/Core/VideoBackends/Vulkan/TextureConverter.h deleted file mode 100644 index 9c035296b2..0000000000 --- a/Source/Core/VideoBackends/Vulkan/TextureConverter.h +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include - -#include "Common/CommonTypes.h" -#include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoBackends/Vulkan/TextureCache.h" -#include "VideoCommon/TextureConversionShader.h" -#include "VideoCommon/TextureDecoder.h" -#include "VideoCommon/VideoCommon.h" - -class AbstractTexture; -class AbstractStagingTexture; - -namespace Vulkan -{ -class Texture2D; -class VKTexture; - -class TextureConverter -{ -public: - TextureConverter(); - ~TextureConverter(); - - bool Initialize(); - - // Applies palette to dst_entry, using indices from src_entry. - void ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry, - TextureCache::TCacheEntry* src_entry, const void* palette, - TLUTFormat palette_format); - - // Uses an encoding shader to copy src_texture to dest. - void EncodeTextureToMemory( - VkImageView src_texture, AbstractStagingTexture* dest, const EFBCopyParams& params, - u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, bool clamp_top, - bool clamp_bottom, const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients); - - bool SupportsTextureDecoding(TextureFormat format, TLUTFormat palette_format); - void DecodeTexture(VkCommandBuffer command_buffer, TextureCache::TCacheEntry* entry, - u32 dst_level, const u8* data, size_t data_size, TextureFormat format, - u32 width, u32 height, u32 aligned_width, u32 aligned_height, u32 row_stride, - const u8* palette, TLUTFormat palette_format); - -private: - static const size_t NUM_PALETTE_CONVERSION_SHADERS = 3; - - // Maximum size of a texture based on BP registers. - static const u32 DECODING_TEXTURE_WIDTH = 1024; - static const u32 DECODING_TEXTURE_HEIGHT = 1024; - - bool CreateTexelBuffer(); - VkBufferView CreateTexelBufferView(VkFormat format) const; - - bool CompilePaletteConversionShaders(); - - VkShaderModule CompileEncodingShader(const EFBCopyParams& params); - VkShaderModule GetEncodingShader(const EFBCopyParams& params); - - bool CreateEncodingTexture(); - bool CreateDecodingTexture(); - - // Allocates storage in the texel command buffer of the specified size. - // If the buffer does not have enough space, executes the current command buffer and tries again. - // If this is done, g_command_buffer_mgr->GetCurrentCommandBuffer() will return a different value, - // so it always should be re-obtained after calling this method. - // Once the data copy is done, call m_texel_buffer->CommitMemory(size). - bool ReserveTexelBufferStorage(size_t size, size_t alignment); - - // Returns the command buffer that the texture conversion should occur in for the given texture. - // This can be the initialization/copy command buffer, or the drawing command buffer. - VkCommandBuffer GetCommandBufferForTextureConversion(const TextureCache::TCacheEntry* src_entry); - - // Shared between conversion types - std::unique_ptr m_texel_buffer; - VkBufferView m_texel_buffer_view_r8_uint = VK_NULL_HANDLE; - VkBufferView m_texel_buffer_view_r16_uint = VK_NULL_HANDLE; - VkBufferView m_texel_buffer_view_r32g32_uint = VK_NULL_HANDLE; - VkBufferView m_texel_buffer_view_rgba8_uint = VK_NULL_HANDLE; - VkBufferView m_texel_buffer_view_rgba8_unorm = VK_NULL_HANDLE; - size_t m_texel_buffer_size = 0; - - // Palette conversion - taking an indexed texture and applying palette - std::array m_palette_conversion_shaders = {}; - - // Texture encoding - RGBA8->GX format in memory - std::map m_encoding_shaders; - std::unique_ptr m_encoding_render_texture; - - // Texture decoding - GX format in memory->RGBA8 - struct TextureDecodingPipeline - { - const TextureConversionShaderTiled::DecodingShaderInfo* base_info; - VkShaderModule compute_shader; - bool valid; - }; - std::map, TextureDecodingPipeline> m_decoding_pipelines; - std::unique_ptr m_decoding_texture; -}; - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Util.cpp b/Source/Core/VideoBackends/Vulkan/Util.cpp deleted file mode 100644 index 35c9bcfbaf..0000000000 --- a/Source/Core/VideoBackends/Vulkan/Util.cpp +++ /dev/null @@ -1,925 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoBackends/Vulkan/Util.h" - -#include "Common/Align.h" -#include "Common/Assert.h" -#include "Common/CommonFuncs.h" -#include "Common/MathUtil.h" -#include "Common/MsgHandler.h" - -#include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/ShaderCache.h" -#include "VideoBackends/Vulkan/ShaderCompiler.h" -#include "VideoBackends/Vulkan/StateTracker.h" -#include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoBackends/Vulkan/VulkanContext.h" - -namespace Vulkan -{ -namespace Util -{ -size_t AlignBufferOffset(size_t offset, size_t alignment) -{ - // Assume an offset of zero is already aligned to a value larger than alignment. - if (offset == 0) - return 0; - - return Common::AlignUp(offset, alignment); -} - -u32 MakeRGBA8Color(float r, float g, float b, float a) -{ - return (static_cast(MathUtil::Clamp(static_cast(r * 255.0f), 0, 255)) << 0) | - (static_cast(MathUtil::Clamp(static_cast(g * 255.0f), 0, 255)) << 8) | - (static_cast(MathUtil::Clamp(static_cast(b * 255.0f), 0, 255)) << 16) | - (static_cast(MathUtil::Clamp(static_cast(a * 255.0f), 0, 255)) << 24); -} - -bool IsDepthFormat(VkFormat format) -{ - switch (format) - { - case VK_FORMAT_D16_UNORM: - case VK_FORMAT_D16_UNORM_S8_UINT: - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - return true; - default: - return false; - } -} - -bool IsCompressedFormat(VkFormat format) -{ - switch (format) - { - case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: - case VK_FORMAT_BC2_UNORM_BLOCK: - case VK_FORMAT_BC3_UNORM_BLOCK: - case VK_FORMAT_BC7_UNORM_BLOCK: - return true; - - default: - return false; - } -} - -VkFormat GetLinearFormat(VkFormat format) -{ - switch (format) - { - case VK_FORMAT_R8_SRGB: - return VK_FORMAT_R8_UNORM; - case VK_FORMAT_R8G8_SRGB: - return VK_FORMAT_R8G8_UNORM; - case VK_FORMAT_R8G8B8_SRGB: - return VK_FORMAT_R8G8B8_UNORM; - case VK_FORMAT_R8G8B8A8_SRGB: - return VK_FORMAT_R8G8B8A8_UNORM; - case VK_FORMAT_B8G8R8_SRGB: - return VK_FORMAT_B8G8R8_UNORM; - case VK_FORMAT_B8G8R8A8_SRGB: - return VK_FORMAT_B8G8R8A8_UNORM; - default: - return format; - } -} - -VkFormat GetVkFormatForHostTextureFormat(AbstractTextureFormat format) -{ - switch (format) - { - case AbstractTextureFormat::DXT1: - return VK_FORMAT_BC1_RGBA_UNORM_BLOCK; - - case AbstractTextureFormat::DXT3: - return VK_FORMAT_BC2_UNORM_BLOCK; - - case AbstractTextureFormat::DXT5: - return VK_FORMAT_BC3_UNORM_BLOCK; - - case AbstractTextureFormat::BPTC: - return VK_FORMAT_BC7_UNORM_BLOCK; - - case AbstractTextureFormat::RGBA8: - return VK_FORMAT_R8G8B8A8_UNORM; - - case AbstractTextureFormat::BGRA8: - return VK_FORMAT_B8G8R8A8_UNORM; - - case AbstractTextureFormat::R16: - return VK_FORMAT_R16_UNORM; - - case AbstractTextureFormat::D16: - return VK_FORMAT_D16_UNORM; - - case AbstractTextureFormat::D24_S8: - return VK_FORMAT_D24_UNORM_S8_UINT; - - case AbstractTextureFormat::R32F: - return VK_FORMAT_R32_SFLOAT; - - case AbstractTextureFormat::D32F: - return VK_FORMAT_D32_SFLOAT; - - case AbstractTextureFormat::D32F_S8: - return VK_FORMAT_D32_SFLOAT_S8_UINT; - - case AbstractTextureFormat::Undefined: - return VK_FORMAT_UNDEFINED; - - default: - PanicAlert("Unhandled texture format."); - return VK_FORMAT_R8G8B8A8_UNORM; - } -} - -VkImageAspectFlags GetImageAspectForFormat(VkFormat format) -{ - switch (format) - { - case VK_FORMAT_D16_UNORM_S8_UINT: - case VK_FORMAT_D24_UNORM_S8_UINT: - case VK_FORMAT_D32_SFLOAT_S8_UINT: - return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - - case VK_FORMAT_D16_UNORM: - case VK_FORMAT_D32_SFLOAT: - return VK_IMAGE_ASPECT_DEPTH_BIT; - - default: - return VK_IMAGE_ASPECT_COLOR_BIT; - } -} - -u32 GetTexelSize(VkFormat format) -{ - // Only contains pixel formats we use. - switch (format) - { - case VK_FORMAT_R32_SFLOAT: - return 4; - - case VK_FORMAT_D32_SFLOAT: - return 4; - - case VK_FORMAT_R8G8B8A8_UNORM: - return 4; - - case VK_FORMAT_B8G8R8A8_UNORM: - return 4; - - case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: - return 8; - - case VK_FORMAT_BC2_UNORM_BLOCK: - case VK_FORMAT_BC3_UNORM_BLOCK: - case VK_FORMAT_BC7_UNORM_BLOCK: - return 16; - - default: - PanicAlert("Unhandled pixel format"); - return 1; - } -} - -u32 GetBlockSize(VkFormat format) -{ - switch (format) - { - case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: - case VK_FORMAT_BC2_UNORM_BLOCK: - case VK_FORMAT_BC3_UNORM_BLOCK: - case VK_FORMAT_BC7_UNORM_BLOCK: - return 4; - - default: - return 1; - } -} - -VkRect2D ClampRect2D(const VkRect2D& rect, u32 width, u32 height) -{ - VkRect2D out; - out.offset.x = MathUtil::Clamp(rect.offset.x, 0, static_cast(width - 1)); - out.offset.y = MathUtil::Clamp(rect.offset.y, 0, static_cast(height - 1)); - out.extent.width = std::min(rect.extent.width, width - static_cast(rect.offset.x)); - out.extent.height = std::min(rect.extent.height, height - static_cast(rect.offset.y)); - return out; -} - -VkBlendFactor GetAlphaBlendFactor(VkBlendFactor factor) -{ - switch (factor) - { - case VK_BLEND_FACTOR_SRC_COLOR: - return VK_BLEND_FACTOR_SRC_ALPHA; - case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: - return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - case VK_BLEND_FACTOR_DST_COLOR: - return VK_BLEND_FACTOR_DST_ALPHA; - case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: - return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; - default: - return factor; - } -} - -void SetViewportAndScissor(VkCommandBuffer command_buffer, int x, int y, int width, int height, - float min_depth /*= 0.0f*/, float max_depth /*= 1.0f*/) -{ - VkViewport viewport = {static_cast(x), - static_cast(y), - static_cast(width), - static_cast(height), - min_depth, - max_depth}; - - VkRect2D scissor = {{x, y}, {static_cast(width), static_cast(height)}}; - - vkCmdSetViewport(command_buffer, 0, 1, &viewport); - vkCmdSetScissor(command_buffer, 0, 1, &scissor); -} - -void BufferMemoryBarrier(VkCommandBuffer command_buffer, VkBuffer buffer, - VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, - VkDeviceSize offset, VkDeviceSize size, - VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask) -{ - VkBufferMemoryBarrier buffer_info = { - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType - nullptr, // const void* pNext - src_access_mask, // VkAccessFlags srcAccessMask - dst_access_mask, // VkAccessFlags dstAccessMask - VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex - VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex - buffer, // VkBuffer buffer - offset, // VkDeviceSize offset - size // VkDeviceSize size - }; - - vkCmdPipelineBarrier(command_buffer, src_stage_mask, dst_stage_mask, 0, 0, nullptr, 1, - &buffer_info, 0, nullptr); -} - -void ExecuteCurrentCommandsAndRestoreState(bool execute_off_thread, bool wait_for_completion) -{ - StateTracker::GetInstance()->EndRenderPass(); - g_command_buffer_mgr->ExecuteCommandBuffer(execute_off_thread, wait_for_completion); - StateTracker::GetInstance()->InvalidateDescriptorSets(); - StateTracker::GetInstance()->InvalidateConstants(); - StateTracker::GetInstance()->SetPendingRebind(); -} - -VkShaderModule CreateShaderModule(const u32* spv, size_t spv_word_count) -{ - VkShaderModuleCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - info.codeSize = spv_word_count * sizeof(u32); - info.pCode = spv; - - VkShaderModule module; - VkResult res = vkCreateShaderModule(g_vulkan_context->GetDevice(), &info, nullptr, &module); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateShaderModule failed: "); - return VK_NULL_HANDLE; - } - - return module; -} - -VkShaderModule CompileAndCreateVertexShader(const std::string& source_code) -{ - ShaderCompiler::SPIRVCodeVector code; - if (!ShaderCompiler::CompileVertexShader(&code, source_code.c_str(), source_code.length())) - return VK_NULL_HANDLE; - - return CreateShaderModule(code.data(), code.size()); -} - -VkShaderModule CompileAndCreateGeometryShader(const std::string& source_code) -{ - ShaderCompiler::SPIRVCodeVector code; - if (!ShaderCompiler::CompileGeometryShader(&code, source_code.c_str(), source_code.length())) - return VK_NULL_HANDLE; - - return CreateShaderModule(code.data(), code.size()); -} - -VkShaderModule CompileAndCreateFragmentShader(const std::string& source_code) -{ - ShaderCompiler::SPIRVCodeVector code; - if (!ShaderCompiler::CompileFragmentShader(&code, source_code.c_str(), source_code.length())) - return VK_NULL_HANDLE; - - return CreateShaderModule(code.data(), code.size()); -} - -VkShaderModule CompileAndCreateComputeShader(const std::string& source_code) -{ - ShaderCompiler::SPIRVCodeVector code; - if (!ShaderCompiler::CompileComputeShader(&code, source_code.c_str(), source_code.length())) - return VK_NULL_HANDLE; - - return CreateShaderModule(code.data(), code.size()); -} - -} // namespace Util - -UtilityShaderDraw::UtilityShaderDraw(VkCommandBuffer command_buffer, - VkPipelineLayout pipeline_layout, VkRenderPass render_pass, - VkShaderModule vertex_shader, VkShaderModule geometry_shader, - VkShaderModule pixel_shader, PrimitiveType primitive) - : m_command_buffer(command_buffer) -{ - // Populate minimal pipeline state - m_pipeline_info.vertex_format = g_object_cache->GetUtilityShaderVertexFormat(); - m_pipeline_info.pipeline_layout = pipeline_layout; - m_pipeline_info.render_pass = render_pass; - m_pipeline_info.vs = vertex_shader; - m_pipeline_info.gs = geometry_shader; - m_pipeline_info.ps = pixel_shader; - m_pipeline_info.rasterization_state.hex = RenderState::GetNoCullRasterizationState().hex; - m_pipeline_info.rasterization_state.primitive = primitive; - m_pipeline_info.depth_state.hex = RenderState::GetNoDepthTestingDepthStencilState().hex; - m_pipeline_info.blend_state.hex = RenderState::GetNoBlendingBlendState().hex; - m_pipeline_info.multisampling_state.per_sample_shading = false; - m_pipeline_info.multisampling_state.samples = 1; -} - -UtilityShaderVertex* UtilityShaderDraw::ReserveVertices(size_t count) -{ - if (!g_object_cache->GetUtilityShaderVertexBuffer()->ReserveMemory( - sizeof(UtilityShaderVertex) * count, sizeof(UtilityShaderVertex), true, true, true)) - PanicAlert("Failed to allocate space for vertices in backend shader"); - - m_vertex_buffer = g_object_cache->GetUtilityShaderVertexBuffer()->GetBuffer(); - m_vertex_buffer_offset = g_object_cache->GetUtilityShaderVertexBuffer()->GetCurrentOffset(); - - return reinterpret_cast( - g_object_cache->GetUtilityShaderVertexBuffer()->GetCurrentHostPointer()); -} - -void UtilityShaderDraw::CommitVertices(size_t count) -{ - g_object_cache->GetUtilityShaderVertexBuffer()->CommitMemory(sizeof(UtilityShaderVertex) * count); - m_vertex_count = static_cast(count); -} - -void UtilityShaderDraw::UploadVertices(UtilityShaderVertex* vertices, size_t count) -{ - UtilityShaderVertex* upload_vertices = ReserveVertices(count); - memcpy(upload_vertices, vertices, sizeof(UtilityShaderVertex) * count); - CommitVertices(count); -} - -u8* UtilityShaderDraw::AllocateVSUniforms(size_t size) -{ - if (!g_object_cache->GetUtilityShaderUniformBuffer()->ReserveMemory( - size, g_vulkan_context->GetUniformBufferAlignment(), true, true, true)) - PanicAlert("Failed to allocate util uniforms"); - - return g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentHostPointer(); -} - -void UtilityShaderDraw::CommitVSUniforms(size_t size) -{ - m_vs_uniform_buffer.buffer = g_object_cache->GetUtilityShaderUniformBuffer()->GetBuffer(); - m_vs_uniform_buffer.offset = 0; - m_vs_uniform_buffer.range = size; - m_ubo_offsets[UBO_DESCRIPTOR_SET_BINDING_VS] = - static_cast(g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentOffset()); - - g_object_cache->GetUtilityShaderUniformBuffer()->CommitMemory(size); -} - -u8* UtilityShaderDraw::AllocatePSUniforms(size_t size) -{ - if (!g_object_cache->GetUtilityShaderUniformBuffer()->ReserveMemory( - size, g_vulkan_context->GetUniformBufferAlignment(), true, true, true)) - PanicAlert("Failed to allocate util uniforms"); - - return g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentHostPointer(); -} - -void UtilityShaderDraw::CommitPSUniforms(size_t size) -{ - m_ps_uniform_buffer.buffer = g_object_cache->GetUtilityShaderUniformBuffer()->GetBuffer(); - m_ps_uniform_buffer.offset = 0; - m_ps_uniform_buffer.range = size; - m_ubo_offsets[UBO_DESCRIPTOR_SET_BINDING_PS] = - static_cast(g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentOffset()); - - g_object_cache->GetUtilityShaderUniformBuffer()->CommitMemory(size); -} - -void UtilityShaderDraw::SetPushConstants(const void* data, size_t data_size) -{ - ASSERT(static_cast(data_size) < PUSH_CONSTANT_BUFFER_SIZE); - - vkCmdPushConstants(m_command_buffer, m_pipeline_info.pipeline_layout, - VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, - static_cast(data_size), data); -} - -void UtilityShaderDraw::SetPSSampler(size_t index, VkImageView view, VkSampler sampler) -{ - m_ps_samplers[index].sampler = sampler; - m_ps_samplers[index].imageView = view; - m_ps_samplers[index].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; -} - -void UtilityShaderDraw::SetPSTexelBuffer(VkBufferView view) -{ - // Should only be used with the texture conversion pipeline layout. - ASSERT(m_pipeline_info.pipeline_layout == - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_TEXTURE_CONVERSION)); - - m_ps_texel_buffer = view; -} - -void UtilityShaderDraw::SetRasterizationState(const RasterizationState& state) -{ - m_pipeline_info.rasterization_state.hex = state.hex; -} - -void UtilityShaderDraw::SetMultisamplingState(const MultisamplingState& state) -{ - m_pipeline_info.multisampling_state.hex = state.hex; -} - -void UtilityShaderDraw::SetDepthState(const DepthState& state) -{ - m_pipeline_info.depth_state.hex = state.hex; -} - -void UtilityShaderDraw::SetBlendState(const BlendingState& state) -{ - m_pipeline_info.blend_state.hex = state.hex; -} - -void UtilityShaderDraw::BeginRenderPass(VkFramebuffer framebuffer, const VkRect2D& region, - const VkClearValue* clear_value) -{ - VkRenderPassBeginInfo begin_info = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - nullptr, - m_pipeline_info.render_pass, - framebuffer, - region, - clear_value ? 1u : 0u, - clear_value}; - - vkCmdBeginRenderPass(m_command_buffer, &begin_info, VK_SUBPASS_CONTENTS_INLINE); -} - -void UtilityShaderDraw::EndRenderPass() -{ - vkCmdEndRenderPass(m_command_buffer); -} - -void UtilityShaderDraw::Draw() -{ - BindVertexBuffer(); - BindDescriptors(); - if (!BindPipeline()) - return; - - vkCmdDraw(m_command_buffer, m_vertex_count, 1, 0, 0); -} - -void UtilityShaderDraw::DrawQuad(int x, int y, int width, int height, float z) -{ - UtilityShaderVertex vertices[4]; - vertices[0].SetPosition(-1.0f, 1.0f, z); - vertices[0].SetTextureCoordinates(0.0f, 1.0f); - vertices[0].SetColor(1.0f, 1.0f, 1.0f, 1.0f); - vertices[1].SetPosition(1.0f, 1.0f, z); - vertices[1].SetTextureCoordinates(1.0f, 1.0f); - vertices[1].SetColor(1.0f, 1.0f, 1.0f, 1.0f); - vertices[2].SetPosition(-1.0f, -1.0f, z); - vertices[2].SetTextureCoordinates(0.0f, 0.0f); - vertices[2].SetColor(1.0f, 1.0f, 1.0f, 1.0f); - vertices[3].SetPosition(1.0f, -1.0f, z); - vertices[3].SetTextureCoordinates(1.0f, 0.0f); - vertices[3].SetColor(1.0f, 1.0f, 1.0f, 1.0f); - - Util::SetViewportAndScissor(m_command_buffer, x, y, width, height); - UploadVertices(vertices, ArraySize(vertices)); - Draw(); -} - -void UtilityShaderDraw::DrawQuad(int dst_x, int dst_y, int dst_width, int dst_height, int src_x, - int src_y, int src_layer, int src_width, int src_height, - int src_full_width, int src_full_height, float z) -{ - float u0 = float(src_x) / float(src_full_width); - float v0 = float(src_y) / float(src_full_height); - float u1 = float(src_x + src_width) / float(src_full_width); - float v1 = float(src_y + src_height) / float(src_full_height); - float w = static_cast(src_layer); - - UtilityShaderVertex vertices[4]; - vertices[0].SetPosition(-1.0f, 1.0f, z); - vertices[0].SetTextureCoordinates(u0, v1, w); - vertices[0].SetColor(1.0f, 1.0f, 1.0f, 1.0f); - vertices[1].SetPosition(1.0f, 1.0f, z); - vertices[1].SetTextureCoordinates(u1, v1, w); - vertices[1].SetColor(1.0f, 1.0f, 1.0f, 1.0f); - vertices[2].SetPosition(-1.0f, -1.0f, z); - vertices[2].SetTextureCoordinates(u0, v0, w); - vertices[2].SetColor(1.0f, 1.0f, 1.0f, 1.0f); - vertices[3].SetPosition(1.0f, -1.0f, z); - vertices[3].SetTextureCoordinates(u1, v0, w); - vertices[3].SetColor(1.0f, 1.0f, 1.0f, 1.0f); - - Util::SetViewportAndScissor(m_command_buffer, dst_x, dst_y, dst_width, dst_height); - UploadVertices(vertices, ArraySize(vertices)); - Draw(); -} - -void UtilityShaderDraw::DrawColoredQuad(int x, int y, int width, int height, float r, float g, - float b, float a, float z) -{ - return DrawColoredQuad(x, y, width, height, Util::MakeRGBA8Color(r, g, b, a), z); -} - -void UtilityShaderDraw::DrawColoredQuad(int x, int y, int width, int height, u32 color, float z) -{ - UtilityShaderVertex vertices[4]; - vertices[0].SetPosition(-1.0f, 1.0f, z); - vertices[0].SetTextureCoordinates(0.0f, 1.0f); - vertices[0].SetColor(color); - vertices[1].SetPosition(1.0f, 1.0f, z); - vertices[1].SetTextureCoordinates(1.0f, 1.0f); - vertices[1].SetColor(color); - vertices[2].SetPosition(-1.0f, -1.0f, z); - vertices[2].SetTextureCoordinates(0.0f, 0.0f); - vertices[2].SetColor(color); - vertices[3].SetPosition(1.0f, -1.0f, z); - vertices[3].SetTextureCoordinates(1.0f, 0.0f); - vertices[3].SetColor(color); - - Util::SetViewportAndScissor(m_command_buffer, x, y, width, height); - UploadVertices(vertices, ArraySize(vertices)); - Draw(); -} - -void UtilityShaderDraw::SetViewportAndScissor(int x, int y, int width, int height) -{ - Util::SetViewportAndScissor(m_command_buffer, x, y, width, height, 0.0f, 1.0f); -} - -void UtilityShaderDraw::DrawWithoutVertexBuffer(u32 vertex_count) -{ - m_pipeline_info.vertex_format = nullptr; - - BindDescriptors(); - if (!BindPipeline()) - return; - - vkCmdDraw(m_command_buffer, vertex_count, 1, 0, 0); -} - -void UtilityShaderDraw::BindVertexBuffer() -{ - vkCmdBindVertexBuffers(m_command_buffer, 0, 1, &m_vertex_buffer, &m_vertex_buffer_offset); -} - -void UtilityShaderDraw::BindDescriptors() -{ - // TODO: This method is a mess, clean it up - std::array bind_descriptor_sets = {}; - std::array set_writes = {}; - uint32_t num_set_writes = 0; - - VkDescriptorBufferInfo dummy_uniform_buffer = { - g_object_cache->GetUtilityShaderUniformBuffer()->GetBuffer(), 0, 1}; - - // uniform buffers - if (m_vs_uniform_buffer.buffer != VK_NULL_HANDLE || m_ps_uniform_buffer.buffer != VK_NULL_HANDLE) - { - VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet( - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_PER_STAGE_UNIFORM_BUFFERS)); - if (set == VK_NULL_HANDLE) - PanicAlert("Failed to allocate descriptor set for utility draw"); - - set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - UBO_DESCRIPTOR_SET_BINDING_VS, - 0, - 1, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, - nullptr, - (m_vs_uniform_buffer.buffer != VK_NULL_HANDLE) ? - &m_vs_uniform_buffer : - &dummy_uniform_buffer, - nullptr}; - - if (g_vulkan_context->SupportsGeometryShaders()) - { - set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - UBO_DESCRIPTOR_SET_BINDING_GS, - 0, - 1, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, - nullptr, - &dummy_uniform_buffer, - nullptr}; - } - - set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - UBO_DESCRIPTOR_SET_BINDING_PS, - 0, - 1, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, - nullptr, - (m_ps_uniform_buffer.buffer != VK_NULL_HANDLE) ? - &m_ps_uniform_buffer : - &dummy_uniform_buffer, - nullptr}; - - bind_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_UNIFORM_BUFFERS] = set; - } - - // PS samplers - size_t first_active_sampler; - for (first_active_sampler = 0; first_active_sampler < NUM_PIXEL_SHADER_SAMPLERS; - first_active_sampler++) - { - if (m_ps_samplers[first_active_sampler].imageView != VK_NULL_HANDLE && - m_ps_samplers[first_active_sampler].sampler != VK_NULL_HANDLE) - { - break; - } - } - - // Check if we have any at all, skip the binding process entirely if we don't - if (first_active_sampler != NUM_PIXEL_SHADER_SAMPLERS) - { - // We need to fill it with non-empty images. - for (size_t i = 0; i < NUM_PIXEL_SHADER_SAMPLERS; i++) - { - if (m_ps_samplers[i].imageView == VK_NULL_HANDLE) - { - m_ps_samplers[i].imageView = g_object_cache->GetDummyImageView(); - m_ps_samplers[i].sampler = g_object_cache->GetPointSampler(); - } - } - - // Allocate a new descriptor set - VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet( - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS)); - if (set == VK_NULL_HANDLE) - PanicAlert("Failed to allocate descriptor set for utility draw"); - - set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - 0, - 0, - static_cast(NUM_PIXEL_SHADER_SAMPLERS), - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - m_ps_samplers.data(), - nullptr, - nullptr}; - - bind_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_PIXEL_SHADER_SAMPLERS] = set; - } - - vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), num_set_writes, set_writes.data(), 0, - nullptr); - - if (m_ps_texel_buffer != VK_NULL_HANDLE) - { - // TODO: Handle case where this fails. - // This'll only be when we do over say, 1024 allocations per frame, which shouldn't happen. - // TODO: Execute the command buffer, reset render passes and then try again. - VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet( - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS)); - if (set == VK_NULL_HANDLE) - { - PanicAlert("Failed to allocate texel buffer descriptor set for utility draw"); - return; - } - - VkWriteDescriptorSet set_write = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - 0, - 0, - 1, - VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, - nullptr, - nullptr, - &m_ps_texel_buffer}; - vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), 1, &set_write, 0, nullptr); - bind_descriptor_sets[DESCRIPTOR_SET_BIND_POINT_STORAGE_OR_TEXEL_BUFFER] = set; - } - - // Fast path when there are no gaps in the set bindings - u32 bind_point_index; - for (bind_point_index = 0; bind_point_index < NUM_DESCRIPTOR_SET_BIND_POINTS; bind_point_index++) - { - if (bind_descriptor_sets[bind_point_index] == VK_NULL_HANDLE) - break; - } - if (bind_point_index > 0) - { - // Bind the contiguous sets, any others after any gaps will be handled below - vkCmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - m_pipeline_info.pipeline_layout, 0, bind_point_index, - &bind_descriptor_sets[0], NUM_UBO_DESCRIPTOR_SET_BINDINGS, - m_ubo_offsets.data()); - } - - // Handle any remaining sets - for (u32 i = bind_point_index; i < NUM_DESCRIPTOR_SET_BIND_POINTS; i++) - { - if (bind_descriptor_sets[i] == VK_NULL_HANDLE) - continue; - - // No need to worry about dynamic offsets here, since #0 will always be bound above. - vkCmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - m_pipeline_info.pipeline_layout, i, 1, &bind_descriptor_sets[i], 0, - nullptr); - } -} - -bool UtilityShaderDraw::BindPipeline() -{ - VkPipeline pipeline = g_shader_cache->GetPipeline(m_pipeline_info); - if (pipeline == VK_NULL_HANDLE) - { - PanicAlert("Failed to get pipeline for backend shader draw"); - return false; - } - - vkCmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - return true; -} - -ComputeShaderDispatcher::ComputeShaderDispatcher(VkCommandBuffer command_buffer, - VkPipelineLayout pipeline_layout, - VkShaderModule compute_shader) - : m_command_buffer(command_buffer) -{ - // Populate minimal pipeline state - m_pipeline_info.pipeline_layout = pipeline_layout; - m_pipeline_info.cs = compute_shader; -} - -u8* ComputeShaderDispatcher::AllocateUniformBuffer(size_t size) -{ - if (!g_object_cache->GetUtilityShaderUniformBuffer()->ReserveMemory( - size, g_vulkan_context->GetUniformBufferAlignment(), true, true, true)) - PanicAlert("Failed to allocate util uniforms"); - - return g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentHostPointer(); -} - -void ComputeShaderDispatcher::CommitUniformBuffer(size_t size) -{ - m_uniform_buffer.buffer = g_object_cache->GetUtilityShaderUniformBuffer()->GetBuffer(); - m_uniform_buffer.offset = 0; - m_uniform_buffer.range = size; - m_uniform_buffer_offset = - static_cast(g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentOffset()); - - g_object_cache->GetUtilityShaderUniformBuffer()->CommitMemory(size); -} - -void ComputeShaderDispatcher::SetPushConstants(const void* data, size_t data_size) -{ - ASSERT(static_cast(data_size) < PUSH_CONSTANT_BUFFER_SIZE); - - vkCmdPushConstants(m_command_buffer, m_pipeline_info.pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, - 0, static_cast(data_size), data); -} - -void ComputeShaderDispatcher::SetSampler(size_t index, VkImageView view, VkSampler sampler) -{ - m_samplers[index].sampler = sampler; - m_samplers[index].imageView = view; - m_samplers[index].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; -} - -void ComputeShaderDispatcher::SetStorageImage(VkImageView view, VkImageLayout image_layout) -{ - m_storage_image.sampler = VK_NULL_HANDLE; - m_storage_image.imageView = view; - m_storage_image.imageLayout = image_layout; -} - -void ComputeShaderDispatcher::SetTexelBuffer(size_t index, VkBufferView view) -{ - m_texel_buffers[index] = view; -} - -void ComputeShaderDispatcher::Dispatch(u32 groups_x, u32 groups_y, u32 groups_z) -{ - BindDescriptors(); - if (!BindPipeline()) - return; - - vkCmdDispatch(m_command_buffer, groups_x, groups_y, groups_z); -} - -void ComputeShaderDispatcher::BindDescriptors() -{ - VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet( - g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_COMPUTE)); - if (set == VK_NULL_HANDLE) - { - PanicAlert("Failed to allocate descriptor set for compute dispatch"); - return; - } - - // Reserve enough descriptors to write every binding. - std::array set_writes = {}; - u32 num_set_writes = 0; - - if (m_uniform_buffer.buffer != VK_NULL_HANDLE) - { - set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - 0, - 0, - 1, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, - nullptr, - &m_uniform_buffer, - nullptr}; - } - - // Samplers - for (size_t i = 0; i < m_samplers.size(); i++) - { - const VkDescriptorImageInfo& info = m_samplers[i]; - if (info.imageView != VK_NULL_HANDLE && info.sampler != VK_NULL_HANDLE) - { - set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - nullptr, - set, - static_cast(1 + i), - 0, - 1, - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - &info, - nullptr, - nullptr}; - } - } - - for (size_t i = 0; i < m_texel_buffers.size(); i++) - { - if (m_texel_buffers[i] != VK_NULL_HANDLE) - { - set_writes[num_set_writes++] = { - VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, set, 5 + static_cast(i), 0, 1, - VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, nullptr, nullptr, &m_texel_buffers[i]}; - } - } - - if (m_storage_image.imageView != VK_NULL_HANDLE) - { - set_writes[num_set_writes++] = { - VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, set, 7, 0, 1, - VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &m_storage_image, nullptr, nullptr}; - } - - if (num_set_writes > 0) - { - vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), num_set_writes, set_writes.data(), 0, - nullptr); - } - - vkCmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, - m_pipeline_info.pipeline_layout, 0, 1, &set, 1, &m_uniform_buffer_offset); -} - -bool ComputeShaderDispatcher::BindPipeline() -{ - VkPipeline pipeline = g_shader_cache->GetComputePipeline(m_pipeline_info); - if (pipeline == VK_NULL_HANDLE) - { - PanicAlert("Failed to get pipeline for backend compute dispatch"); - return false; - } - - vkCmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); - return true; -} - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Util.h b/Source/Core/VideoBackends/Vulkan/Util.h deleted file mode 100644 index ced63ef06b..0000000000 --- a/Source/Core/VideoBackends/Vulkan/Util.h +++ /dev/null @@ -1,235 +0,0 @@ -// Copyright 2016 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include "Common/CommonTypes.h" -#include "VideoBackends/Vulkan/Constants.h" -#include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/ShaderCache.h" -#include "VideoCommon/RenderState.h" -#include "VideoCommon/TextureConfig.h" - -namespace Vulkan -{ -class CommandBufferManager; -class StateTracker; - -namespace Util -{ -size_t AlignBufferOffset(size_t offset, size_t alignment); - -u32 MakeRGBA8Color(float r, float g, float b, float a); - -bool IsDepthFormat(VkFormat format); -bool IsCompressedFormat(VkFormat format); -VkFormat GetLinearFormat(VkFormat format); -VkFormat GetVkFormatForHostTextureFormat(AbstractTextureFormat format); -VkImageAspectFlags GetImageAspectForFormat(VkFormat format); -u32 GetTexelSize(VkFormat format); -u32 GetBlockSize(VkFormat format); - -// Clamps a VkRect2D to the specified dimensions. -VkRect2D ClampRect2D(const VkRect2D& rect, u32 width, u32 height); - -// Map {SRC,DST}_COLOR to {SRC,DST}_ALPHA -VkBlendFactor GetAlphaBlendFactor(VkBlendFactor factor); - -// Combines viewport and scissor updates -void SetViewportAndScissor(VkCommandBuffer command_buffer, int x, int y, int width, int height, - float min_depth = 0.0f, float max_depth = 1.0f); - -// Wrapper for creating an barrier on a buffer -void BufferMemoryBarrier(VkCommandBuffer command_buffer, VkBuffer buffer, - VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask, - VkDeviceSize offset, VkDeviceSize size, - VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask); - -// Completes the current render pass, executes the command buffer, and restores state ready for next -// render. Use when you want to kick the current buffer to make room for new data. -void ExecuteCurrentCommandsAndRestoreState(bool execute_off_thread, - bool wait_for_completion = false); - -// Create a shader module from the specified SPIR-V. -VkShaderModule CreateShaderModule(const u32* spv, size_t spv_word_count); - -// Compile a vertex shader and create a shader module, discarding the intermediate SPIR-V. -VkShaderModule CompileAndCreateVertexShader(const std::string& source_code); - -// Compile a geometry shader and create a shader module, discarding the intermediate SPIR-V. -VkShaderModule CompileAndCreateGeometryShader(const std::string& source_code); - -// Compile a fragment shader and create a shader module, discarding the intermediate SPIR-V. -VkShaderModule CompileAndCreateFragmentShader(const std::string& source_code); - -// Compile a compute shader and create a shader module, discarding the intermediate SPIR-V. -VkShaderModule CompileAndCreateComputeShader(const std::string& source_code); -} - -// Utility shader vertex format -#pragma pack(push, 1) -struct UtilityShaderVertex -{ - float Position[4]; - float TexCoord[4]; - u32 Color; - - void SetPosition(float x, float y) - { - Position[0] = x; - Position[1] = y; - Position[2] = 0.0f; - Position[3] = 1.0f; - } - void SetPosition(float x, float y, float z) - { - Position[0] = x; - Position[1] = y; - Position[2] = z; - Position[3] = 1.0f; - } - void SetTextureCoordinates(float u, float v) - { - TexCoord[0] = u; - TexCoord[1] = v; - TexCoord[2] = 0.0f; - TexCoord[3] = 0.0f; - } - void SetTextureCoordinates(float u, float v, float w) - { - TexCoord[0] = u; - TexCoord[1] = v; - TexCoord[2] = w; - TexCoord[3] = 0.0f; - } - void SetTextureCoordinates(float u, float v, float w, float x) - { - TexCoord[0] = u; - TexCoord[1] = v; - TexCoord[2] = w; - TexCoord[3] = x; - } - void SetColor(u32 color) { Color = color; } - void SetColor(float r, float g, float b) { Color = Util::MakeRGBA8Color(r, g, b, 1.0f); } - void SetColor(float r, float g, float b, float a) { Color = Util::MakeRGBA8Color(r, g, b, a); } -}; -#pragma pack(pop) - -class UtilityShaderDraw -{ -public: - UtilityShaderDraw(VkCommandBuffer command_buffer, VkPipelineLayout pipeline_layout, - VkRenderPass render_pass, VkShaderModule vertex_shader, - VkShaderModule geometry_shader, VkShaderModule pixel_shader, - PrimitiveType primitive = PrimitiveType::TriangleStrip); - - UtilityShaderVertex* ReserveVertices(size_t count); - void CommitVertices(size_t count); - - void UploadVertices(UtilityShaderVertex* vertices, size_t count); - - u8* AllocateVSUniforms(size_t size); - void CommitVSUniforms(size_t size); - - u8* AllocatePSUniforms(size_t size); - void CommitPSUniforms(size_t size); - - void SetPushConstants(const void* data, size_t data_size); - - void SetPSSampler(size_t index, VkImageView view, VkSampler sampler); - - void SetPSTexelBuffer(VkBufferView view); - - void SetRasterizationState(const RasterizationState& state); - void SetMultisamplingState(const MultisamplingState& state); - void SetDepthState(const DepthState& state); - void SetBlendState(const BlendingState& state); - - void BeginRenderPass(VkFramebuffer framebuffer, const VkRect2D& region, - const VkClearValue* clear_value = nullptr); - void EndRenderPass(); - - void Draw(); - - // NOTE: These methods alter the viewport state of the command buffer. - - // Sets texture coordinates to 0..1 - void DrawQuad(int x, int y, int width, int height, float z = 0.0f); - - // Sets texture coordinates to the specified range - void DrawQuad(int dst_x, int dst_y, int dst_width, int dst_height, int src_x, int src_y, - int src_layer, int src_width, int src_height, int src_full_width, - int src_full_height, float z = 0.0f); - - void DrawColoredQuad(int x, int y, int width, int height, u32 color, float z = 0.0f); - - void DrawColoredQuad(int x, int y, int width, int height, float r, float g, float b, float a, - float z = 0.0f); - - // Draw without a vertex buffer. Assumes viewport has been initialized separately. - void SetViewportAndScissor(int x, int y, int width, int height); - void DrawWithoutVertexBuffer(u32 vertex_count); - -private: - void BindVertexBuffer(); - void BindDescriptors(); - bool BindPipeline(); - - VkCommandBuffer m_command_buffer = VK_NULL_HANDLE; - VkBuffer m_vertex_buffer = VK_NULL_HANDLE; - VkDeviceSize m_vertex_buffer_offset = 0; - uint32_t m_vertex_count = 0; - - VkDescriptorBufferInfo m_vs_uniform_buffer = {}; - VkDescriptorBufferInfo m_ps_uniform_buffer = {}; - std::array m_ubo_offsets = {}; - - std::array m_ps_samplers = {}; - - VkBufferView m_ps_texel_buffer = VK_NULL_HANDLE; - - PipelineInfo m_pipeline_info = {}; -}; - -class ComputeShaderDispatcher -{ -public: - ComputeShaderDispatcher(VkCommandBuffer command_buffer, VkPipelineLayout pipeline_layout, - VkShaderModule compute_shader); - - u8* AllocateUniformBuffer(size_t size); - void CommitUniformBuffer(size_t size); - - void SetPushConstants(const void* data, size_t data_size); - - void SetSampler(size_t index, VkImageView view, VkSampler sampler); - - void SetTexelBuffer(size_t index, VkBufferView view); - - void SetStorageImage(VkImageView view, VkImageLayout image_layout); - - void Dispatch(u32 groups_x, u32 groups_y, u32 groups_z); - -private: - void BindDescriptors(); - bool BindPipeline(); - - VkCommandBuffer m_command_buffer = VK_NULL_HANDLE; - - VkDescriptorBufferInfo m_uniform_buffer = {}; - u32 m_uniform_buffer_offset = 0; - - std::array m_samplers = {}; - - std::array m_texel_buffers = {}; - - VkDescriptorImageInfo m_storage_image = {}; - - ComputePipelineInfo m_pipeline_info = {}; -}; - -} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp b/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp index fa513cef7d..8ff1a30009 100644 --- a/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKPipeline.cpp @@ -6,9 +6,9 @@ #include "Common/MsgHandler.h" #include "VideoBackends/Vulkan/ObjectCache.h" -#include "VideoBackends/Vulkan/Util.h" #include "VideoBackends/Vulkan/VKPipeline.h" #include "VideoBackends/Vulkan/VKShader.h" +#include "VideoBackends/Vulkan/VKTexture.h" #include "VideoBackends/Vulkan/VertexFormat.h" #include "VideoBackends/Vulkan/VulkanContext.h" @@ -25,14 +25,213 @@ VKPipeline::~VKPipeline() vkDestroyPipeline(g_vulkan_context->GetDevice(), m_pipeline, nullptr); } +static bool IsStripPrimitiveTopology(VkPrimitiveTopology topology) +{ + return topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP || + topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP || + topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY || + topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY; +} + +static VkPipelineRasterizationStateCreateInfo +GetVulkanRasterizationState(const RasterizationState& state) +{ + static constexpr std::array cull_modes = { + {VK_CULL_MODE_NONE, VK_CULL_MODE_BACK_BIT, VK_CULL_MODE_FRONT_BIT, + VK_CULL_MODE_FRONT_AND_BACK}}; + + bool depth_clamp = g_ActiveConfig.backend_info.bSupportsDepthClamp; + + return { + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineRasterizationStateCreateFlags flags + depth_clamp, // VkBool32 depthClampEnable + VK_FALSE, // VkBool32 rasterizerDiscardEnable + VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode + cull_modes[state.cullmode], // VkCullModeFlags cullMode + VK_FRONT_FACE_CLOCKWISE, // VkFrontFace frontFace + VK_FALSE, // VkBool32 depthBiasEnable + 0.0f, // float depthBiasConstantFactor + 0.0f, // float depthBiasClamp + 0.0f, // float depthBiasSlopeFactor + 1.0f // float lineWidth + }; +} + +static VkPipelineMultisampleStateCreateInfo GetVulkanMultisampleState(const FramebufferState& state) +{ + return { + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineMultisampleStateCreateFlags flags + static_cast( + state.samples.Value()), // VkSampleCountFlagBits rasterizationSamples + state.per_sample_shading, // VkBool32 sampleShadingEnable + 1.0f, // float minSampleShading + nullptr, // const VkSampleMask* pSampleMask; + VK_FALSE, // VkBool32 alphaToCoverageEnable + VK_FALSE // VkBool32 alphaToOneEnable + }; +} + +static VkPipelineDepthStencilStateCreateInfo GetVulkanDepthStencilState(const DepthState& state) +{ + // Less/greater are swapped due to inverted depth. + VkCompareOp compare_op; + bool inverted_depth = !g_ActiveConfig.backend_info.bSupportsReversedDepthRange; + switch (state.func) + { + case ZMode::NEVER: + compare_op = VK_COMPARE_OP_NEVER; + break; + case ZMode::LESS: + compare_op = inverted_depth ? VK_COMPARE_OP_GREATER : VK_COMPARE_OP_LESS; + break; + case ZMode::EQUAL: + compare_op = VK_COMPARE_OP_EQUAL; + break; + case ZMode::LEQUAL: + compare_op = inverted_depth ? VK_COMPARE_OP_GREATER_OR_EQUAL : VK_COMPARE_OP_LESS_OR_EQUAL; + break; + case ZMode::GREATER: + compare_op = inverted_depth ? VK_COMPARE_OP_LESS : VK_COMPARE_OP_GREATER; + break; + case ZMode::NEQUAL: + compare_op = VK_COMPARE_OP_NOT_EQUAL; + break; + case ZMode::GEQUAL: + compare_op = inverted_depth ? VK_COMPARE_OP_LESS_OR_EQUAL : VK_COMPARE_OP_GREATER_OR_EQUAL; + break; + case ZMode::ALWAYS: + compare_op = VK_COMPARE_OP_ALWAYS; + break; + default: + compare_op = VK_COMPARE_OP_ALWAYS; + break; + } + + return { + VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineDepthStencilStateCreateFlags flags + state.testenable, // VkBool32 depthTestEnable + state.updateenable, // VkBool32 depthWriteEnable + compare_op, // VkCompareOp depthCompareOp + VK_FALSE, // VkBool32 depthBoundsTestEnable + VK_FALSE, // VkBool32 stencilTestEnable + {}, // VkStencilOpState front + {}, // VkStencilOpState back + 0.0f, // float minDepthBounds + 1.0f // float maxDepthBounds + }; +} + +static VkPipelineColorBlendAttachmentState GetVulkanAttachmentBlendState(const BlendingState& state) +{ + VkPipelineColorBlendAttachmentState vk_state = {}; + vk_state.blendEnable = static_cast(state.blendenable); + vk_state.colorBlendOp = state.subtract ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD; + vk_state.alphaBlendOp = state.subtractAlpha ? VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_ADD; + + if (state.usedualsrc && g_ActiveConfig.backend_info.bSupportsDualSourceBlend) + { + static constexpr std::array src_factors = { + {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; + static constexpr std::array dst_factors = { + {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC1_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; + + vk_state.srcColorBlendFactor = src_factors[state.srcfactor]; + vk_state.srcAlphaBlendFactor = src_factors[state.srcfactoralpha]; + vk_state.dstColorBlendFactor = dst_factors[state.dstfactor]; + vk_state.dstAlphaBlendFactor = dst_factors[state.dstfactoralpha]; + } + else + { + static constexpr std::array src_factors = { + {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_DST_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_SRC_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; + + static constexpr std::array dst_factors = { + {VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ONE, VK_BLEND_FACTOR_SRC_COLOR, + VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_SRC_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_DST_ALPHA, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA}}; + + vk_state.srcColorBlendFactor = src_factors[state.srcfactor]; + vk_state.srcAlphaBlendFactor = src_factors[state.srcfactoralpha]; + vk_state.dstColorBlendFactor = dst_factors[state.dstfactor]; + vk_state.dstAlphaBlendFactor = dst_factors[state.dstfactoralpha]; + } + + if (state.colorupdate) + { + vk_state.colorWriteMask = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT; + } + else + { + vk_state.colorWriteMask = 0; + } + + if (state.alphaupdate) + vk_state.colorWriteMask |= VK_COLOR_COMPONENT_A_BIT; + + return vk_state; +} + +static VkPipelineColorBlendStateCreateInfo +GetVulkanColorBlendState(const BlendingState& state, + const VkPipelineColorBlendAttachmentState* attachments, + uint32_t num_attachments) +{ + static constexpr std::array vk_logic_ops = { + {VK_LOGIC_OP_CLEAR, VK_LOGIC_OP_AND, VK_LOGIC_OP_AND_REVERSE, VK_LOGIC_OP_COPY, + VK_LOGIC_OP_AND_INVERTED, VK_LOGIC_OP_NO_OP, VK_LOGIC_OP_XOR, VK_LOGIC_OP_OR, + VK_LOGIC_OP_NOR, VK_LOGIC_OP_EQUIVALENT, VK_LOGIC_OP_INVERT, VK_LOGIC_OP_OR_REVERSE, + VK_LOGIC_OP_COPY_INVERTED, VK_LOGIC_OP_OR_INVERTED, VK_LOGIC_OP_NAND, VK_LOGIC_OP_SET}}; + + VkBool32 vk_logic_op_enable = static_cast(state.logicopenable); + if (vk_logic_op_enable && !g_ActiveConfig.backend_info.bSupportsLogicOp) + { + // At the time of writing, Adreno and Mali drivers didn't support logic ops. + // The "emulation" through blending path has been removed, so just disable it completely. + // These drivers don't support dual-source blend either, so issues are to be expected. + vk_logic_op_enable = VK_FALSE; + } + + VkLogicOp vk_logic_op = vk_logic_op_enable ? vk_logic_ops[state.logicmode] : VK_LOGIC_OP_CLEAR; + + VkPipelineColorBlendStateCreateInfo vk_state = { + VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineColorBlendStateCreateFlags flags + vk_logic_op_enable, // VkBool32 logicOpEnable + vk_logic_op, // VkLogicOp logicOp + num_attachments, // uint32_t attachmentCount + attachments, // const VkPipelineColorBlendAttachmentState* pAttachments + {1.0f, 1.0f, 1.0f, 1.0f} // float blendConstants[4] + }; + + return vk_state; +} + std::unique_ptr VKPipeline::Create(const AbstractPipelineConfig& config) { DEBUG_ASSERT(config.vertex_shader && config.pixel_shader); // Get render pass for config. VkRenderPass render_pass = g_object_cache->GetRenderPass( - Util::GetVkFormatForHostTextureFormat(config.framebuffer_state.color_texture_format), - Util::GetVkFormatForHostTextureFormat(config.framebuffer_state.depth_texture_format), + VKTexture::GetVkFormatForHostTextureFormat(config.framebuffer_state.color_texture_format), + VKTexture::GetVkFormatForHostTextureFormat(config.framebuffer_state.depth_texture_format), config.framebuffer_state.samples, VK_ATTACHMENT_LOAD_OP_LOAD); // Get pipeline layout. @@ -50,26 +249,144 @@ std::unique_ptr VKPipeline::Create(const AbstractPipelineConfig& con return nullptr; } - // TODO: Move ShaderCache stuff to here. - PipelineInfo pinfo; - pinfo.vertex_format = static_cast(config.vertex_format); - pinfo.pipeline_layout = pipeline_layout; - pinfo.vs = static_cast(config.vertex_shader)->GetShaderModule(); - pinfo.ps = static_cast(config.pixel_shader)->GetShaderModule(); - pinfo.gs = config.geometry_shader ? - static_cast(config.geometry_shader)->GetShaderModule() : - VK_NULL_HANDLE; - pinfo.render_pass = render_pass; - pinfo.rasterization_state.hex = config.rasterization_state.hex; - pinfo.depth_state.hex = config.depth_state.hex; - pinfo.blend_state.hex = config.blending_state.hex; - pinfo.multisampling_state.hex = 0; - pinfo.multisampling_state.samples = config.framebuffer_state.samples; - pinfo.multisampling_state.per_sample_shading = config.framebuffer_state.per_sample_shading; + // Declare descriptors for empty vertex buffers/attributes + static const VkPipelineVertexInputStateCreateInfo empty_vertex_input_state = { + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkPipelineVertexInputStateCreateFlags flags + 0, // uint32_t vertexBindingDescriptionCount + nullptr, // const VkVertexInputBindingDescription* pVertexBindingDescriptions + 0, // uint32_t vertexAttributeDescriptionCount + nullptr // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions + }; - VkPipeline pipeline = g_shader_cache->CreatePipeline(pinfo); - if (pipeline == VK_NULL_HANDLE) - return nullptr; + // Vertex inputs + const VkPipelineVertexInputStateCreateInfo& vertex_input_state = + config.vertex_format ? + static_cast(config.vertex_format)->GetVertexInputStateInfo() : + empty_vertex_input_state; + + // Input assembly + static constexpr std::array vk_primitive_topologies = { + {VK_PRIMITIVE_TOPOLOGY_POINT_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_LIST, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP}}; + VkPipelineInputAssemblyStateCreateInfo input_assembly_state = { + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, nullptr, 0, + vk_primitive_topologies[static_cast(config.rasterization_state.primitive.Value())], + VK_FALSE}; + + // See Vulkan spec, section 19: + // If topology is VK_PRIMITIVE_TOPOLOGY_POINT_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_LIST, + // VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, + // VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY or VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, + // primitiveRestartEnable must be VK_FALSE + if (g_ActiveConfig.backend_info.bSupportsPrimitiveRestart && + IsStripPrimitiveTopology(input_assembly_state.topology)) + { + input_assembly_state.primitiveRestartEnable = VK_TRUE; + } + + // Shaders to stages + VkPipelineShaderStageCreateInfo shader_stages[3]; + uint32_t num_shader_stages = 0; + if (config.vertex_shader) + { + shader_stages[num_shader_stages++] = { + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + nullptr, + 0, + VK_SHADER_STAGE_VERTEX_BIT, + static_cast(config.vertex_shader)->GetShaderModule(), + "main"}; + } + if (config.geometry_shader) + { + shader_stages[num_shader_stages++] = { + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + nullptr, + 0, + VK_SHADER_STAGE_GEOMETRY_BIT, + static_cast(config.geometry_shader)->GetShaderModule(), + "main"}; + } + if (config.pixel_shader) + { + shader_stages[num_shader_stages++] = { + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + nullptr, + 0, + VK_SHADER_STAGE_FRAGMENT_BIT, + static_cast(config.pixel_shader)->GetShaderModule(), + "main"}; + } + + // Fill in Vulkan descriptor structs from our state structures. + VkPipelineRasterizationStateCreateInfo rasterization_state = + GetVulkanRasterizationState(config.rasterization_state); + VkPipelineMultisampleStateCreateInfo multisample_state = + GetVulkanMultisampleState(config.framebuffer_state); + VkPipelineDepthStencilStateCreateInfo depth_stencil_state = + GetVulkanDepthStencilState(config.depth_state); + VkPipelineColorBlendAttachmentState blend_attachment_state = + GetVulkanAttachmentBlendState(config.blending_state); + VkPipelineColorBlendStateCreateInfo blend_state = + GetVulkanColorBlendState(config.blending_state, &blend_attachment_state, 1); + + // This viewport isn't used, but needs to be specified anyway. + static const VkViewport viewport = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; + static const VkRect2D scissor = {{0, 0}, {1, 1}}; + static const VkPipelineViewportStateCreateInfo viewport_state = { + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + nullptr, + 0, // VkPipelineViewportStateCreateFlags flags; + 1, // uint32_t viewportCount + &viewport, // const VkViewport* pViewports + 1, // uint32_t scissorCount + &scissor // const VkRect2D* pScissors + }; + + // Set viewport and scissor dynamic state so we can change it elsewhere. + static const VkDynamicState dynamic_states[] = {VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR}; + static const VkPipelineDynamicStateCreateInfo dynamic_state = { + VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, nullptr, + 0, // VkPipelineDynamicStateCreateFlags flags + static_cast(ArraySize(dynamic_states)), // uint32_t dynamicStateCount + dynamic_states // const VkDynamicState* pDynamicStates + }; + + // Combine to full pipeline info structure. + VkGraphicsPipelineCreateInfo pipeline_info = { + VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + nullptr, // VkStructureType sType + 0, // VkPipelineCreateFlags flags + num_shader_stages, // uint32_t stageCount + shader_stages, // const VkPipelineShaderStageCreateInfo* pStages + &vertex_input_state, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState + &input_assembly_state, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState + nullptr, // const VkPipelineTessellationStateCreateInfo* pTessellationState + &viewport_state, // const VkPipelineViewportStateCreateInfo* pViewportState + &rasterization_state, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState + &multisample_state, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState + &depth_stencil_state, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState + &blend_state, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState + &dynamic_state, // const VkPipelineDynamicStateCreateInfo* pDynamicState + pipeline_layout, // VkPipelineLayout layout + render_pass, // VkRenderPass renderPass + 0, // uint32_t subpass + VK_NULL_HANDLE, // VkPipeline basePipelineHandle + -1 // int32_t basePipelineIndex + }; + + VkPipeline pipeline; + VkResult res = + vkCreateGraphicsPipelines(g_vulkan_context->GetDevice(), g_object_cache->GetPipelineCache(), + 1, &pipeline_info, nullptr, &pipeline); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateGraphicsPipelines failed: "); + return VK_NULL_HANDLE; + } return std::make_unique(pipeline, pipeline_layout, config.usage); } diff --git a/Source/Core/VideoBackends/Vulkan/VKShader.cpp b/Source/Core/VideoBackends/Vulkan/VKShader.cpp index 5b44ed99a4..1a95a9c1f8 100644 --- a/Source/Core/VideoBackends/Vulkan/VKShader.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKShader.cpp @@ -5,8 +5,8 @@ #include "Common/Align.h" #include "Common/Assert.h" +#include "VideoBackends/Vulkan/ObjectCache.h" #include "VideoBackends/Vulkan/ShaderCompiler.h" -#include "VideoBackends/Vulkan/Util.h" #include "VideoBackends/Vulkan/VKShader.h" #include "VideoBackends/Vulkan/VulkanContext.h" @@ -48,26 +48,47 @@ AbstractShader::BinaryData VKShader::GetBinary() const static std::unique_ptr CreateShaderObject(ShaderStage stage, ShaderCompiler::SPIRVCodeVector spv) { - VkShaderModule mod = Util::CreateShaderModule(spv.data(), spv.size()); - if (mod == VK_NULL_HANDLE) - return nullptr; + VkShaderModuleCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + info.codeSize = spv.size() * sizeof(u32); + info.pCode = spv.data(); + + VkShaderModule mod; + VkResult res = vkCreateShaderModule(g_vulkan_context->GetDevice(), &info, nullptr, &mod); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateShaderModule failed: "); + return VK_NULL_HANDLE; + } // If it's a graphics shader, we defer pipeline creation. if (stage != ShaderStage::Compute) return std::make_unique(stage, std::move(spv), mod); // If it's a compute shader, we create the pipeline straight away. - ComputePipelineInfo pinfo; - pinfo.pipeline_layout = g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_COMPUTE); - pinfo.cs = mod; - VkPipeline pipeline = g_shader_cache->CreateComputePipeline(pinfo); - if (pipeline == VK_NULL_HANDLE) + const VkComputePipelineCreateInfo pipeline_info = { + VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + nullptr, + 0, + {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, nullptr, 0, VK_SHADER_STAGE_COMPUTE_BIT, + mod, "main", nullptr}, + g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_COMPUTE), + VK_NULL_HANDLE, + -1}; + + VkPipeline pipeline; + res = vkCreateComputePipelines(g_vulkan_context->GetDevice(), g_object_cache->GetPipelineCache(), + 1, &pipeline_info, nullptr, &pipeline); + + // Shader module is no longer needed, now it is compiled to a pipeline. + vkDestroyShaderModule(g_vulkan_context->GetDevice(), mod, nullptr); + + if (res != VK_SUCCESS) { - vkDestroyShaderModule(g_vulkan_context->GetDevice(), mod, nullptr); + LOG_VULKAN_ERROR(res, "vkCreateComputePipelines failed: "); return nullptr; } - // Shader module is no longer needed, now it is compiled to a pipeline. return std::make_unique(std::move(spv), pipeline); } diff --git a/Source/Core/VideoBackends/Vulkan/VKTexture.cpp b/Source/Core/VideoBackends/Vulkan/VKTexture.cpp index 929307edb3..a19e169412 100644 --- a/Source/Core/VideoBackends/Vulkan/VKTexture.cpp +++ b/Source/Core/VideoBackends/Vulkan/VKTexture.cpp @@ -13,115 +13,228 @@ #include "Common/MsgHandler.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/FramebufferManager.h" +#include "VideoBackends/Vulkan/ObjectCache.h" +#include "VideoBackends/Vulkan/Renderer.h" #include "VideoBackends/Vulkan/StagingBuffer.h" #include "VideoBackends/Vulkan/StateTracker.h" -#include "VideoBackends/Vulkan/Texture2D.h" -#include "VideoBackends/Vulkan/Util.h" +#include "VideoBackends/Vulkan/StreamBuffer.h" #include "VideoBackends/Vulkan/VKTexture.h" #include "VideoBackends/Vulkan/VulkanContext.h" -#include "VideoCommon/ImageWrite.h" -#include "VideoCommon/TextureConfig.h" - namespace Vulkan { -VKTexture::VKTexture(const TextureConfig& tex_config, std::unique_ptr texture, - VkFramebuffer framebuffer) - : AbstractTexture(tex_config), m_texture(std::move(texture)), m_framebuffer(framebuffer) +VKTexture::VKTexture(const TextureConfig& tex_config, VkDeviceMemory device_memory, VkImage image, + VkImageLayout layout /* = VK_IMAGE_LAYOUT_UNDEFINED */, + ComputeImageLayout compute_layout /* = ComputeImageLayout::Undefined */) + : AbstractTexture(tex_config), m_device_memory(device_memory), m_image(image), m_layout(layout), + m_compute_layout(compute_layout) { } +VKTexture::~VKTexture() +{ + StateTracker::GetInstance()->UnbindTexture(m_view); + g_command_buffer_mgr->DeferImageViewDestruction(m_view); + + // If we don't have device memory allocated, the image is not owned by us (e.g. swapchain) + if (m_device_memory != VK_NULL_HANDLE) + { + g_command_buffer_mgr->DeferImageDestruction(m_image); + g_command_buffer_mgr->DeferDeviceMemoryDestruction(m_device_memory); + } +} + std::unique_ptr VKTexture::Create(const TextureConfig& tex_config) { // Determine image usage, we need to flag as an attachment if it can be used as a rendertarget. VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; - if (tex_config.rendertarget) - usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - - // Allocate texture object - VkFormat vk_format = Util::GetVkFormatForHostTextureFormat(tex_config.format); - auto texture = - Texture2D::Create(tex_config.width, tex_config.height, tex_config.levels, tex_config.layers, - vk_format, static_cast(tex_config.samples), - VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, usage); - - if (!texture) + if (tex_config.IsRenderTarget()) { + usage |= IsDepthFormat(tex_config.format) ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + } + if (tex_config.IsComputeImage()) + usage |= VK_IMAGE_USAGE_STORAGE_BIT; + + VkImageCreateInfo image_info = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + nullptr, + 0, + VK_IMAGE_TYPE_2D, + GetVkFormatForHostTextureFormat(tex_config.format), + {tex_config.width, tex_config.height, 1}, + tex_config.levels, + tex_config.layers, + static_cast(tex_config.samples), + VK_IMAGE_TILING_OPTIMAL, + usage, + VK_SHARING_MODE_EXCLUSIVE, + 0, + nullptr, + VK_IMAGE_LAYOUT_UNDEFINED}; + + VkImage image = VK_NULL_HANDLE; + VkResult res = vkCreateImage(g_vulkan_context->GetDevice(), &image_info, nullptr, &image); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateImage failed: "); return nullptr; } - // If this is a render target (for efb copies), allocate a framebuffer - VkFramebuffer framebuffer = VK_NULL_HANDLE; - if (tex_config.rendertarget) + // Allocate memory to back this texture, we want device local memory in this case + VkMemoryRequirements memory_requirements; + vkGetImageMemoryRequirements(g_vulkan_context->GetDevice(), image, &memory_requirements); + + VkMemoryAllocateInfo memory_info = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, memory_requirements.size, + g_vulkan_context->GetMemoryType(memory_requirements.memoryTypeBits, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)}; + + VkDeviceMemory device_memory; + res = vkAllocateMemory(g_vulkan_context->GetDevice(), &memory_info, nullptr, &device_memory); + if (res != VK_SUCCESS) { - VkImageView framebuffer_attachments[] = {texture->GetView()}; - VkRenderPass render_pass = - g_object_cache->GetRenderPass(texture->GetFormat(), VK_FORMAT_UNDEFINED, tex_config.samples, - VK_ATTACHMENT_LOAD_OP_DONT_CARE); - VkFramebufferCreateInfo framebuffer_info = { - VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, - nullptr, - 0, - render_pass, - static_cast(ArraySize(framebuffer_attachments)), - framebuffer_attachments, - texture->GetWidth(), - texture->GetHeight(), - texture->GetLayers()}; - - VkResult res = vkCreateFramebuffer(g_vulkan_context->GetDevice(), &framebuffer_info, nullptr, - &framebuffer); - if (res != VK_SUCCESS) - { - LOG_VULKAN_ERROR(res, "vkCreateFramebuffer failed: "); - return nullptr; - } - - if (!IsDepthFormat(tex_config.format)) - { - // Clear render targets before use to prevent reading uninitialized memory. - VkClearColorValue clear_value = {{0.0f, 0.0f, 0.0f, 1.0f}}; - VkImageSubresourceRange clear_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, tex_config.levels, 0, - tex_config.layers}; - texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - vkCmdClearColorImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), texture->GetImage(), - texture->GetLayout(), &clear_value, 1, &clear_range); - } - else - { - // Clear render targets before use to prevent reading uninitialized memory. - VkClearDepthStencilValue clear_value = {0.0f, 0}; - VkImageSubresourceRange clear_range = {Util::GetImageAspectForFormat(vk_format), 0, - tex_config.levels, 0, tex_config.layers}; - texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - vkCmdClearDepthStencilImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - texture->GetImage(), texture->GetLayout(), &clear_value, 1, - &clear_range); - } + LOG_VULKAN_ERROR(res, "vkAllocateMemory failed: "); + vkDestroyImage(g_vulkan_context->GetDevice(), image, nullptr); + return nullptr; } - return std::unique_ptr(new VKTexture(tex_config, std::move(texture), framebuffer)); + res = vkBindImageMemory(g_vulkan_context->GetDevice(), image, device_memory, 0); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkBindImageMemory failed: "); + vkDestroyImage(g_vulkan_context->GetDevice(), image, nullptr); + vkFreeMemory(g_vulkan_context->GetDevice(), device_memory, nullptr); + return nullptr; + } + + std::unique_ptr texture = std::make_unique( + tex_config, device_memory, image, VK_IMAGE_LAYOUT_UNDEFINED, ComputeImageLayout::Undefined); + if (!texture->CreateView(VK_IMAGE_VIEW_TYPE_2D_ARRAY)) + return nullptr; + + return texture; } -VKTexture::~VKTexture() +std::unique_ptr VKTexture::CreateAdopted(const TextureConfig& tex_config, VkImage image, + VkImageViewType view_type, VkImageLayout layout) { - // Texture is automatically cleaned up, however, we don't want to leave it bound. - g_renderer->UnbindTexture(this); - if (m_framebuffer != VK_NULL_HANDLE) - g_command_buffer_mgr->DeferFramebufferDestruction(m_framebuffer); + std::unique_ptr texture = std::make_unique( + tex_config, nullptr, image, layout, ComputeImageLayout::Undefined); + if (!texture->CreateView(VK_IMAGE_VIEW_TYPE_2D_ARRAY)) + return nullptr; + + return texture; } -Texture2D* VKTexture::GetRawTexIdentifier() const +bool VKTexture::CreateView(VkImageViewType type) { - return m_texture.get(); + VkImageViewCreateInfo view_info = { + VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + nullptr, + 0, + m_image, + type, + GetVkFormat(), + {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_IDENTITY}, + {GetImageAspectForFormat(GetFormat()), 0, GetLevels(), 0, GetLayers()}}; + + VkResult res = vkCreateImageView(g_vulkan_context->GetDevice(), &view_info, nullptr, &m_view); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateImageView failed: "); + return false; + } + + return true; } -VkFramebuffer VKTexture::GetFramebuffer() const + +VkFormat VKTexture::GetLinearFormat(VkFormat format) { - return m_framebuffer; + switch (format) + { + case VK_FORMAT_R8_SRGB: + return VK_FORMAT_R8_UNORM; + case VK_FORMAT_R8G8_SRGB: + return VK_FORMAT_R8G8_UNORM; + case VK_FORMAT_R8G8B8_SRGB: + return VK_FORMAT_R8G8B8_UNORM; + case VK_FORMAT_R8G8B8A8_SRGB: + return VK_FORMAT_R8G8B8A8_UNORM; + case VK_FORMAT_B8G8R8_SRGB: + return VK_FORMAT_B8G8R8_UNORM; + case VK_FORMAT_B8G8R8A8_SRGB: + return VK_FORMAT_B8G8R8A8_UNORM; + default: + return format; + } +} + +VkFormat VKTexture::GetVkFormatForHostTextureFormat(AbstractTextureFormat format) +{ + switch (format) + { + case AbstractTextureFormat::DXT1: + return VK_FORMAT_BC1_RGBA_UNORM_BLOCK; + + case AbstractTextureFormat::DXT3: + return VK_FORMAT_BC2_UNORM_BLOCK; + + case AbstractTextureFormat::DXT5: + return VK_FORMAT_BC3_UNORM_BLOCK; + + case AbstractTextureFormat::BPTC: + return VK_FORMAT_BC7_UNORM_BLOCK; + + case AbstractTextureFormat::RGBA8: + return VK_FORMAT_R8G8B8A8_UNORM; + + case AbstractTextureFormat::BGRA8: + return VK_FORMAT_B8G8R8A8_UNORM; + + case AbstractTextureFormat::R16: + return VK_FORMAT_R16_UNORM; + + case AbstractTextureFormat::D16: + return VK_FORMAT_D16_UNORM; + + case AbstractTextureFormat::D24_S8: + return VK_FORMAT_D24_UNORM_S8_UINT; + + case AbstractTextureFormat::R32F: + return VK_FORMAT_R32_SFLOAT; + + case AbstractTextureFormat::D32F: + return VK_FORMAT_D32_SFLOAT; + + case AbstractTextureFormat::D32F_S8: + return VK_FORMAT_D32_SFLOAT_S8_UINT; + + case AbstractTextureFormat::Undefined: + return VK_FORMAT_UNDEFINED; + + default: + PanicAlert("Unhandled texture format."); + return VK_FORMAT_R8G8B8A8_UNORM; + } +} + +VkImageAspectFlags VKTexture::GetImageAspectForFormat(AbstractTextureFormat format) +{ + switch (format) + { + case AbstractTextureFormat::D24_S8: + case AbstractTextureFormat::D32F_S8: + return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + + case AbstractTextureFormat::D16: + case AbstractTextureFormat::D32F: + return VK_IMAGE_ASPECT_DEPTH_BIT; + + default: + return VK_IMAGE_ASPECT_COLOR_BIT; + } } void VKTexture::CopyRectangleFromTexture(const AbstractTexture* src, @@ -129,7 +242,7 @@ void VKTexture::CopyRectangleFromTexture(const AbstractTexture* src, u32 src_level, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) { - Texture2D* src_texture = static_cast(src)->GetRawTexIdentifier(); + const VKTexture* src_texture = static_cast(src); ASSERT_MSG(VIDEO, static_cast(src_rect.GetWidth()) <= src_texture->GetWidth() && @@ -151,67 +264,18 @@ void VKTexture::CopyRectangleFromTexture(const AbstractTexture* src, // Must be called outside of a render pass. StateTracker::GetInstance()->EndRenderPass(); + const VkImageLayout old_src_layout = src_texture->GetLayout(); src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - vkCmdCopyImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), src_texture->GetImage(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_texture->GetImage(), + vkCmdCopyImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), src_texture->m_image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy); - // Ensure both textures remain in the SHADER_READ_ONLY layout so they can be bound. - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); -} - -void VKTexture::ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& src_rect, - const MathUtil::Rectangle& dst_rect) -{ - Texture2D* src_texture = static_cast(source)->GetRawTexIdentifier(); - - // Can't do this within a game render pass. - StateTracker::GetInstance()->EndRenderPass(); - StateTracker::GetInstance()->SetPendingRebind(); - - // Can't render to a non-rendertarget (no framebuffer). - ASSERT_MSG(VIDEO, m_config.rendertarget, - "Destination texture for partial copy is not a rendertarget"); - - // Render pass expects dst_texture to be in COLOR_ATTACHMENT_OPTIMAL state. - // src_texture should already be in SHADER_READ_ONLY state, but transition in case (XFB). - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - VkRenderPass render_pass = g_object_cache->GetRenderPass( - m_texture->GetFormat(), VK_FORMAT_UNDEFINED, 1, VK_ATTACHMENT_LOAD_OP_DONT_CARE); - UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(), - g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD), render_pass, - g_shader_cache->GetPassthroughVertexShader(), - g_shader_cache->GetPassthroughGeometryShader(), - TextureCache::GetInstance()->GetCopyShader()); - - VkRect2D region = { - {dst_rect.left, dst_rect.top}, - {static_cast(dst_rect.GetWidth()), static_cast(dst_rect.GetHeight())}}; - draw.BeginRenderPass(m_framebuffer, region); - draw.SetPSSampler(0, src_texture->GetView(), g_object_cache->GetLinearSampler()); - draw.DrawQuad(dst_rect.left, dst_rect.top, dst_rect.GetWidth(), dst_rect.GetHeight(), - src_rect.left, src_rect.top, 0, src_rect.GetWidth(), src_rect.GetHeight(), - static_cast(src_texture->GetWidth()), - static_cast(src_texture->GetHeight())); - draw.EndRenderPass(); - - // Ensure both textures remain in the SHADER_READ_ONLY layout so they can be bound. - src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + // Only restore the source layout. Destination is restored by FinishedRendering(). + src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_src_layout); } void VKTexture::ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, @@ -225,11 +289,11 @@ void VKTexture::ResolveFromTexture(const AbstractTexture* src, const MathUtil::R // Resolving is considered to be a transfer operation. StateTracker::GetInstance()->EndRenderPass(); - VkImageLayout old_src_layout = srcentry->m_texture->GetLayout(); - srcentry->m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + VkImageLayout old_src_layout = srcentry->m_layout; + srcentry->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); VkImageResolve resolve = { {VK_IMAGE_ASPECT_COLOR_BIT, level, layer, 1}, // srcSubresource @@ -238,23 +302,18 @@ void VKTexture::ResolveFromTexture(const AbstractTexture* src, const MathUtil::R {rect.left, rect.top, 0}, // dstOffset {static_cast(rect.GetWidth()), static_cast(rect.GetHeight()), 1} // extent }; - vkCmdResolveImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), - srcentry->m_texture->GetImage(), srcentry->m_texture->GetLayout(), - m_texture->GetImage(), m_texture->GetLayout(), 1, &resolve); + vkCmdResolveImage(g_command_buffer_mgr->GetCurrentCommandBuffer(), srcentry->m_image, + srcentry->m_layout, m_image, m_layout, 1, &resolve); - // Restore old source texture layout. Destination is assumed to be bound as a shader resource. - srcentry->m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - old_src_layout); - m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + srcentry->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_src_layout); } void VKTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, size_t buffer_size) { // Can't copy data larger than the texture extents. - width = std::max(1u, std::min(width, m_texture->GetWidth() >> level)); - height = std::max(1u, std::min(height, m_texture->GetHeight() >> level)); + width = std::max(1u, std::min(width, GetWidth() >> level)); + height = std::max(1u, std::min(height, GetHeight() >> level)); // We don't care about the existing contents of the texture, so we could the image layout to // VK_IMAGE_LAYOUT_UNDEFINED here. However, under section 2.2.1, Queue Operation of the Vulkan @@ -272,30 +331,29 @@ void VKTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8* // When the last mip level is uploaded, we transition to SHADER_READ_ONLY, ready for use. This is // because we can't transition in a render pass, and we don't necessarily know when this texture // is going to be used. - m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); // For unaligned textures, we can save some memory in the transfer buffer by skipping the rows // that lie outside of the texture's dimensions. - u32 upload_alignment = static_cast(g_vulkan_context->GetBufferImageGranularity()); - u32 block_size = Util::GetBlockSize(m_texture->GetFormat()); - u32 num_rows = Common::AlignUp(height, block_size) / block_size; - size_t source_pitch = CalculateStrideForFormat(m_config.format, row_length); - size_t upload_size = source_pitch * num_rows; + const u32 upload_alignment = static_cast(g_vulkan_context->GetBufferImageGranularity()); + const u32 block_size = GetBlockSizeForFormat(GetFormat()); + const u32 num_rows = Common::AlignUp(height, block_size) / block_size; + const u32 source_pitch = CalculateStrideForFormat(m_config.format, row_length); + const u32 upload_size = source_pitch * num_rows; std::unique_ptr temp_buffer; VkBuffer upload_buffer; VkDeviceSize upload_buffer_offset; // Does this texture data fit within the streaming buffer? - if (upload_size <= STAGING_TEXTURE_UPLOAD_THRESHOLD && - upload_size <= MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE) + if (upload_size <= STAGING_TEXTURE_UPLOAD_THRESHOLD) { StreamBuffer* stream_buffer = g_object_cache->GetTextureUploadBuffer(); if (!stream_buffer->ReserveMemory(upload_size, upload_alignment)) { // Execute the command buffer first. WARN_LOG(VIDEO, "Executing command list while waiting for space in texture upload buffer"); - Util::ExecuteCurrentCommandsAndRestoreState(false); + Renderer::GetInstance()->ExecuteCommandBuffer(false); // Try allocating again. This may cause a fence wait. if (!stream_buffer->ReserveMemory(upload_size, upload_alignment)) @@ -334,17 +392,282 @@ void VKTexture::Load(u32 level, u32 width, u32 height, u32 row_length, const u8* {width, height, 1} // VkExtent3D imageExtent }; vkCmdCopyBufferToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), upload_buffer, - m_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, - &image_copy); + m_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy); - // Last mip level? We shouldn't be doing any further uploads now, so transition for rendering. + // Preemptively transition to shader read only after uploading the last mip level, as we're + // likely finished with writes to this texture for now. We can't do this in common with a + // FinishedRendering() call because the upload happens in the init command buffer, and we + // don't want to interrupt the render pass with calls which were executed ages before. if (level == (m_config.levels - 1)) { - m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); } } +void VKTexture::FinishedRendering() +{ + if (m_layout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) + return; + + StateTracker::GetInstance()->EndRenderPass(); + TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); +} + +void VKTexture::OverrideImageLayout(VkImageLayout new_layout) +{ + m_layout = new_layout; +} + +void VKTexture::TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout) const +{ + if (m_layout == new_layout) + return; + + VkImageMemoryBarrier barrier = { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkAccessFlags srcAccessMask + 0, // VkAccessFlags dstAccessMask + m_layout, // VkImageLayout oldLayout + new_layout, // VkImageLayout newLayout + VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex + VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex + m_image, // VkImage image + {GetImageAspectForFormat(GetFormat()), 0, GetLevels(), 0, + GetLayers()} // VkImageSubresourceRange subresourceRange + }; + + // srcStageMask -> Stages that must complete before the barrier + // dstStageMask -> Stages that must wait for after the barrier before beginning + VkPipelineStageFlags srcStageMask, dstStageMask; + switch (m_layout) + { + case VK_IMAGE_LAYOUT_UNDEFINED: + // Layout undefined therefore contents undefined, and we don't care what happens to it. + barrier.srcAccessMask = 0; + srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + + case VK_IMAGE_LAYOUT_PREINITIALIZED: + // Image has been pre-initialized by the host, so ensure all writes have completed. + barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_HOST_BIT; + break; + + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + // Image was being used as a color attachment, so ensure all writes have completed. + barrier.srcAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + break; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + // Image was being used as a depthstencil attachment, so ensure all writes have completed. + barrier.srcAccessMask = + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + srcStageMask = + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + break; + + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + // Image was being used as a shader resource, make sure all reads have finished. + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + break; + + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + // Image was being used as a copy source, ensure all reads have finished. + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + // Image was being used as a copy destination, ensure all writes have finished. + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + default: + srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + } + + switch (new_layout) + { + case VK_IMAGE_LAYOUT_UNDEFINED: + barrier.dstAccessMask = 0; + dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + barrier.dstAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + break; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + barrier.dstAccessMask = + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + dstStageMask = + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + break; + + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + break; + + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: + srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + + default: + dstStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + break; + } + + // If we were using a compute layout, the stages need to reflect that + switch (m_compute_layout) + { + case ComputeImageLayout::Undefined: + break; + case ComputeImageLayout::ReadOnly: + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + case ComputeImageLayout::WriteOnly: + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + case ComputeImageLayout::ReadWrite: + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + } + m_compute_layout = ComputeImageLayout::Undefined; + + vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1, + &barrier); + + m_layout = new_layout; +} + +void VKTexture::TransitionToLayout(VkCommandBuffer command_buffer, + ComputeImageLayout new_layout) const +{ + ASSERT(new_layout != ComputeImageLayout::Undefined); + if (m_compute_layout == new_layout) + return; + + VkImageMemoryBarrier barrier = { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkAccessFlags srcAccessMask + 0, // VkAccessFlags dstAccessMask + m_layout, // VkImageLayout oldLayout + VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout + VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex + VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex + m_image, // VkImage image + {GetImageAspectForFormat(GetFormat()), 0, GetLevels(), 0, + GetLayers()} // VkImageSubresourceRange subresourceRange + }; + + VkPipelineStageFlags srcStageMask, dstStageMask; + switch (m_layout) + { + case VK_IMAGE_LAYOUT_UNDEFINED: + // Layout undefined therefore contents undefined, and we don't care what happens to it. + barrier.srcAccessMask = 0; + srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + + case VK_IMAGE_LAYOUT_PREINITIALIZED: + // Image has been pre-initialized by the host, so ensure all writes have completed. + barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_HOST_BIT; + break; + + case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + // Image was being used as a color attachment, so ensure all writes have completed. + barrier.srcAccessMask = + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + break; + + case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: + // Image was being used as a depthstencil attachment, so ensure all writes have completed. + barrier.srcAccessMask = + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + srcStageMask = + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + break; + + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + // Image was being used as a shader resource, make sure all reads have finished. + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + break; + + case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: + // Image was being used as a copy source, ensure all reads have finished. + barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: + // Image was being used as a copy destination, ensure all writes have finished. + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT; + break; + + default: + srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + break; + } + + switch (new_layout) + { + case ComputeImageLayout::ReadOnly: + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + case ComputeImageLayout::WriteOnly: + barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + case ComputeImageLayout::ReadWrite: + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + break; + default: + dstStageMask = 0; + break; + } + + m_layout = barrier.newLayout; + m_compute_layout = new_layout; + + vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1, + &barrier); +} + VKStagingTexture::VKStagingTexture(StagingTextureType type, const TextureConfig& config, std::unique_ptr buffer) : AbstractStagingTexture(type, config), m_staging_buffer(std::move(buffer)) @@ -407,38 +730,32 @@ void VKStagingTexture::CopyFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect) { + const VKTexture* src_tex = static_cast(src); ASSERT(m_type == StagingTextureType::Readback || m_type == StagingTextureType::Mutable); ASSERT(src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.GetHeight() == dst_rect.GetHeight()); - ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= src->GetConfig().width && - src_rect.top >= 0 && static_cast(src_rect.bottom) <= src->GetConfig().height); + ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= src_tex->GetWidth() && + src_rect.top >= 0 && static_cast(src_rect.bottom) <= src_tex->GetHeight()); ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= m_config.width && dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= m_config.height); - Texture2D* src_tex = static_cast(src)->GetRawTexIdentifier(); - CopyFromTexture(src_tex, src_rect, src_layer, src_level, dst_rect); -} - -void VKStagingTexture::CopyFromTexture(Texture2D* src, const MathUtil::Rectangle& src_rect, - u32 src_layer, u32 src_level, - const MathUtil::Rectangle& dst_rect) -{ if (m_needs_flush) { // Drop copy before reusing it. - g_command_buffer_mgr->RemoveFencePointCallback(this); + g_command_buffer_mgr->RemoveFenceSignaledCallback(this); m_flush_fence = VK_NULL_HANDLE; m_needs_flush = false; } - VkImageLayout old_layout = src->GetLayout(); - src->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + StateTracker::GetInstance()->EndRenderPass(); + + VkImageLayout old_layout = src_tex->GetLayout(); + src_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); // Issue the image->buffer copy, but delay it for now. VkBufferImageCopy image_copy = {}; - VkImageAspectFlags aspect = - Util::IsDepthFormat(src->GetFormat()) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; + const VkImageAspectFlags aspect = VKTexture::GetImageAspectForFormat(src_tex->GetFormat()); image_copy.bufferOffset = static_cast(static_cast(dst_rect.top) * m_config.GetStride() + static_cast(dst_rect.left) * m_texel_size); @@ -448,58 +765,51 @@ void VKStagingTexture::CopyFromTexture(Texture2D* src, const MathUtil::Rectangle image_copy.imageOffset = {src_rect.left, src_rect.top, 0}; image_copy.imageExtent = {static_cast(src_rect.GetWidth()), static_cast(src_rect.GetHeight()), 1u}; - vkCmdCopyImageToBuffer(g_command_buffer_mgr->GetCurrentCommandBuffer(), src->GetImage(), + vkCmdCopyImageToBuffer(g_command_buffer_mgr->GetCurrentCommandBuffer(), src_tex->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_staging_buffer->GetBuffer(), 1, &image_copy); // Restore old source texture layout. - src->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_layout); + src_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_layout); m_needs_flush = true; - g_command_buffer_mgr->AddFencePointCallback(this, - [this](VkCommandBuffer buf, VkFence fence) { - ASSERT(m_needs_flush); - if (m_flush_fence != VK_NULL_HANDLE) - return; + m_flush_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence(); + g_command_buffer_mgr->AddFenceSignaledCallback(this, [this](VkFence fence) { + if (m_flush_fence != fence) + return; - m_flush_fence = fence; - }, - [this](VkFence fence) { - if (m_flush_fence != fence) - return; - - m_flush_fence = VK_NULL_HANDLE; - m_needs_flush = false; - g_command_buffer_mgr->RemoveFencePointCallback( - this); - m_staging_buffer->InvalidateCPUCache(); - }); + m_flush_fence = VK_NULL_HANDLE; + m_needs_flush = false; + g_command_buffer_mgr->RemoveFenceSignaledCallback(this); + m_staging_buffer->InvalidateCPUCache(); + }); } void VKStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, AbstractTexture* dst, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) { + const VKTexture* dst_tex = static_cast(dst); ASSERT(m_type == StagingTextureType::Upload || m_type == StagingTextureType::Mutable); ASSERT(src_rect.GetWidth() == dst_rect.GetWidth() && src_rect.GetHeight() == dst_rect.GetHeight()); ASSERT(src_rect.left >= 0 && static_cast(src_rect.right) <= m_config.width && src_rect.top >= 0 && static_cast(src_rect.bottom) <= m_config.height); - ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= dst->GetConfig().width && - dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= dst->GetConfig().height); + ASSERT(dst_rect.left >= 0 && static_cast(dst_rect.right) <= dst_tex->GetWidth() && + dst_rect.top >= 0 && static_cast(dst_rect.bottom) <= dst_tex->GetHeight()); if (m_needs_flush) { // Drop copy before reusing it. - g_command_buffer_mgr->RemoveFencePointCallback(this); + g_command_buffer_mgr->RemoveFenceSignaledCallback(this); m_flush_fence = VK_NULL_HANDLE; m_needs_flush = false; } // Flush caches before copying. m_staging_buffer->FlushCPUCache(); + StateTracker::GetInstance()->EndRenderPass(); - Texture2D* dst_tex = static_cast(dst)->GetRawTexIdentifier(); VkImageLayout old_layout = dst_tex->GetLayout(); dst_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); @@ -523,23 +833,15 @@ void VKStagingTexture::CopyToTexture(const MathUtil::Rectangle& src_rect, A dst_tex->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), old_layout); m_needs_flush = true; - g_command_buffer_mgr->AddFencePointCallback(this, - [this](VkCommandBuffer buf, VkFence fence) { - ASSERT(m_needs_flush); - if (m_flush_fence != VK_NULL_HANDLE) - return; + m_flush_fence = g_command_buffer_mgr->GetCurrentCommandBufferFence(); + g_command_buffer_mgr->AddFenceSignaledCallback(this, [this](VkFence fence) { + if (m_flush_fence != fence) + return; - m_flush_fence = fence; - }, - [this](VkFence fence) { - if (m_flush_fence != fence) - return; - - m_flush_fence = VK_NULL_HANDLE; - m_needs_flush = false; - g_command_buffer_mgr->RemoveFencePointCallback( - this); - }); + m_flush_fence = VK_NULL_HANDLE; + m_needs_flush = false; + g_command_buffer_mgr->RemoveFenceSignaledCallback(this); + }); } bool VKStagingTexture::Map() @@ -559,19 +861,19 @@ void VKStagingTexture::Flush() return; // Either of the below two calls will cause the callback to fire. - g_command_buffer_mgr->RemoveFencePointCallback(this); - if (m_flush_fence != VK_NULL_HANDLE) + g_command_buffer_mgr->RemoveFenceSignaledCallback(this); + if (m_flush_fence == g_command_buffer_mgr->GetCurrentCommandBufferFence()) { - // WaitForFence should fire the callback. - g_command_buffer_mgr->WaitForFence(m_flush_fence); - m_flush_fence = VK_NULL_HANDLE; + // The readback is in the current command buffer, and we must execute it. + Renderer::GetInstance()->ExecuteCommandBuffer(false, true); } else { - // We don't have a fence, and are pending. That means the readback is in the current - // command buffer, and must execute it to populate the staging texture. - Util::ExecuteCurrentCommandsAndRestoreState(false, true); + // WaitForFence should fire the callback. + g_command_buffer_mgr->WaitForFence(m_flush_fence); } + + DEBUG_ASSERT(m_flush_fence == VK_NULL_HANDLE); m_needs_flush = false; // For readback textures, invalidate the CPU cache as there is new data there. @@ -579,16 +881,16 @@ void VKStagingTexture::Flush() m_staging_buffer->InvalidateCPUCache(); } -VKFramebuffer::VKFramebuffer(const VKTexture* color_attachment, const VKTexture* depth_attachment, - u32 width, u32 height, u32 layers, u32 samples, VkFramebuffer fb, +VKFramebuffer::VKFramebuffer(VKTexture* color_attachment, VKTexture* depth_attachment, u32 width, + u32 height, u32 layers, u32 samples, VkFramebuffer fb, VkRenderPass load_render_pass, VkRenderPass discard_render_pass, VkRenderPass clear_render_pass) : AbstractFramebuffer( + color_attachment, depth_attachment, color_attachment ? color_attachment->GetFormat() : AbstractTextureFormat::Undefined, depth_attachment ? depth_attachment->GetFormat() : AbstractTextureFormat::Undefined, width, height, layers, samples), - m_color_attachment(color_attachment), m_depth_attachment(depth_attachment), m_fb(fb), - m_load_render_pass(load_render_pass), m_discard_render_pass(discard_render_pass), + m_fb(fb), m_load_render_pass(load_render_pass), m_discard_render_pass(discard_render_pass), m_clear_render_pass(clear_render_pass) { } @@ -598,16 +900,16 @@ VKFramebuffer::~VKFramebuffer() g_command_buffer_mgr->DeferFramebufferDestruction(m_fb); } -std::unique_ptr VKFramebuffer::Create(const VKTexture* color_attachment, - const VKTexture* depth_attachment) +std::unique_ptr VKFramebuffer::Create(VKTexture* color_attachment, + VKTexture* depth_attachment) { if (!ValidateConfig(color_attachment, depth_attachment)) return nullptr; const VkFormat vk_color_format = - color_attachment ? color_attachment->GetRawTexIdentifier()->GetFormat() : VK_FORMAT_UNDEFINED; + color_attachment ? color_attachment->GetVkFormat() : VK_FORMAT_UNDEFINED; const VkFormat vk_depth_format = - depth_attachment ? depth_attachment->GetRawTexIdentifier()->GetFormat() : VK_FORMAT_UNDEFINED; + depth_attachment ? depth_attachment->GetVkFormat() : VK_FORMAT_UNDEFINED; const VKTexture* either_attachment = color_attachment ? color_attachment : depth_attachment; const u32 width = either_attachment->GetWidth(); const u32 height = either_attachment->GetHeight(); @@ -618,10 +920,10 @@ std::unique_ptr VKFramebuffer::Create(const VKTexture* color_atta u32 num_attachments = 0; if (color_attachment) - attachment_views[num_attachments++] = color_attachment->GetRawTexIdentifier()->GetView(); + attachment_views[num_attachments++] = color_attachment->GetView(); if (depth_attachment) - attachment_views[num_attachments++] = depth_attachment->GetRawTexIdentifier()->GetView(); + attachment_views[num_attachments++] = depth_attachment->GetView(); VkRenderPass load_render_pass = g_object_cache->GetRenderPass( vk_color_format, vk_depth_format, samples, VK_ATTACHMENT_LOAD_OP_LOAD); @@ -659,38 +961,20 @@ std::unique_ptr VKFramebuffer::Create(const VKTexture* color_atta clear_render_pass); } -void VKFramebuffer::TransitionForRender() const +void VKFramebuffer::TransitionForRender() { if (m_color_attachment) { - m_color_attachment->GetRawTexIdentifier()->TransitionToLayout( - g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + static_cast(m_color_attachment) + ->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); } if (m_depth_attachment) { - m_depth_attachment->GetRawTexIdentifier()->TransitionToLayout( - g_command_buffer_mgr->GetCurrentCommandBuffer(), - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + static_cast(m_depth_attachment) + ->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); } } - -void VKFramebuffer::TransitionForSample() const -{ - if (StateTracker::GetInstance()->GetFramebuffer() == m_fb) - StateTracker::GetInstance()->EndRenderPass(); - - if (m_color_attachment) - { - m_color_attachment->GetRawTexIdentifier()->TransitionToLayout( - g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - } - - if (m_depth_attachment) - { - m_depth_attachment->GetRawTexIdentifier()->TransitionToLayout( - g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - } -} - } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/VKTexture.h b/Source/Core/VideoBackends/Vulkan/VKTexture.h index 3a5c8cadc8..bab11ec108 100644 --- a/Source/Core/VideoBackends/Vulkan/VKTexture.h +++ b/Source/Core/VideoBackends/Vulkan/VKTexture.h @@ -5,8 +5,8 @@ #pragma once #include -#include +#include "VideoBackends/Vulkan/VulkanLoader.h" #include "VideoCommon/AbstractFramebuffer.h" #include "VideoCommon/AbstractStagingTexture.h" #include "VideoCommon/AbstractTexture.h" @@ -19,33 +19,64 @@ class Texture2D; class VKTexture final : public AbstractTexture { public: + // Custom image layouts, mainly used for switching to/from compute + enum class ComputeImageLayout + { + Undefined, + ReadOnly, + WriteOnly, + ReadWrite + }; + VKTexture() = delete; + VKTexture(const TextureConfig& tex_config, VkDeviceMemory device_memory, VkImage image, + VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED, + ComputeImageLayout compute_layout = ComputeImageLayout::Undefined); ~VKTexture(); + static VkFormat GetLinearFormat(VkFormat format); + static VkFormat GetVkFormatForHostTextureFormat(AbstractTextureFormat format); + static VkImageAspectFlags GetImageAspectForFormat(AbstractTextureFormat format); + void CopyRectangleFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) override; - void ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& src_rect, - const MathUtil::Rectangle& dst_rect) override; void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, u32 layer, u32 level) override; - void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, size_t buffer_size) override; + void FinishedRendering() override; - Texture2D* GetRawTexIdentifier() const; - VkFramebuffer GetFramebuffer() const; + VkImage GetImage() const { return m_image; } + VkDeviceMemory GetDeviceMemory() const { return m_device_memory; } + VkImageView GetView() const { return m_view; } + VkImageLayout GetLayout() const { return m_layout; } + VkFormat GetVkFormat() const { return GetVkFormatForHostTextureFormat(m_config.format); } + bool IsAdopted() const { return m_device_memory != nullptr; } static std::unique_ptr Create(const TextureConfig& tex_config); + static std::unique_ptr + CreateAdopted(const TextureConfig& tex_config, VkImage image, + VkImageViewType view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY, + VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED); + + // Used when the render pass is changing the image layout, or to force it to + // VK_IMAGE_LAYOUT_UNDEFINED, if the existing contents of the image is + // irrelevant and will not be loaded. + void OverrideImageLayout(VkImageLayout new_layout); + + void TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout) const; + void TransitionToLayout(VkCommandBuffer command_buffer, ComputeImageLayout new_layout) const; private: - VKTexture(const TextureConfig& tex_config, std::unique_ptr texture, - VkFramebuffer framebuffer); + bool CreateView(VkImageViewType type); - std::unique_ptr m_texture; - VkFramebuffer m_framebuffer; + VkDeviceMemory m_device_memory; + VkImage m_image; + VkImageView m_view = VK_NULL_HANDLE; + mutable VkImageLayout m_layout = VK_IMAGE_LAYOUT_UNDEFINED; + mutable ComputeImageLayout m_compute_layout = ComputeImageLayout::Undefined; }; class VKStagingTexture final : public AbstractStagingTexture @@ -65,11 +96,6 @@ public: void Unmap() override; void Flush() override; - // This overload is provided for compatibility as we dropped StagingTexture2D. - // For now, FramebufferManager relies on them. But we can drop it once we move that to common. - void CopyFromTexture(Texture2D* src, const MathUtil::Rectangle& src_rect, u32 src_layer, - u32 src_level, const MathUtil::Rectangle& dst_rect); - static std::unique_ptr Create(StagingTextureType type, const TextureConfig& config); @@ -84,25 +110,23 @@ private: class VKFramebuffer final : public AbstractFramebuffer { public: - VKFramebuffer(const VKTexture* color_attachment, const VKTexture* depth_attachment, u32 width, - u32 height, u32 layers, u32 samples, VkFramebuffer fb, - VkRenderPass load_render_pass, VkRenderPass discard_render_pass, - VkRenderPass clear_render_pass); + VKFramebuffer(VKTexture* color_attachment, VKTexture* depth_attachment, u32 width, u32 height, + u32 layers, u32 samples, VkFramebuffer fb, VkRenderPass load_render_pass, + VkRenderPass discard_render_pass, VkRenderPass clear_render_pass); ~VKFramebuffer() override; VkFramebuffer GetFB() const { return m_fb; } + VkRect2D GetRect() const { return VkRect2D{{0, 0}, {m_width, m_height}}; } + VkRenderPass GetLoadRenderPass() const { return m_load_render_pass; } VkRenderPass GetDiscardRenderPass() const { return m_discard_render_pass; } VkRenderPass GetClearRenderPass() const { return m_clear_render_pass; } - void TransitionForRender() const; - void TransitionForSample() const; + void TransitionForRender(); - static std::unique_ptr Create(const VKTexture* color_attachments, - const VKTexture* depth_attachment); + static std::unique_ptr Create(VKTexture* color_attachments, + VKTexture* depth_attachment); protected: - const VKTexture* m_color_attachment; - const VKTexture* m_depth_attachment; VkFramebuffer m_fb; VkRenderPass m_load_render_pass; VkRenderPass m_discard_render_pass; diff --git a/Source/Core/VideoBackends/Vulkan/VertexFormat.cpp b/Source/Core/VideoBackends/Vulkan/VertexFormat.cpp index 3ea824e530..5d8006c2c0 100644 --- a/Source/Core/VideoBackends/Vulkan/VertexFormat.cpp +++ b/Source/Core/VideoBackends/Vulkan/VertexFormat.cpp @@ -46,9 +46,8 @@ static VkFormat VarToVkFormat(VarType t, uint32_t components, bool integer) return integer ? integer_type_lookup[t][components - 1] : float_type_lookup[t][components - 1]; } -VertexFormat::VertexFormat(const PortableVertexDeclaration& in_vtx_decl) +VertexFormat::VertexFormat(const PortableVertexDeclaration& vtx_decl) : NativeVertexFormat(vtx_decl) { - vtx_decl = in_vtx_decl; MapAttributes(); SetupInputState(); } @@ -62,50 +61,49 @@ void VertexFormat::MapAttributes() { m_num_attributes = 0; - if (vtx_decl.position.enable) - AddAttribute(SHADER_POSITION_ATTRIB, 0, - VarToVkFormat(vtx_decl.position.type, vtx_decl.position.components, - vtx_decl.position.integer), - vtx_decl.position.offset); + if (m_decl.position.enable) + AddAttribute( + SHADER_POSITION_ATTRIB, 0, + VarToVkFormat(m_decl.position.type, m_decl.position.components, m_decl.position.integer), + m_decl.position.offset); for (uint32_t i = 0; i < 3; i++) { - if (vtx_decl.normals[i].enable) + if (m_decl.normals[i].enable) AddAttribute(SHADER_NORM0_ATTRIB + i, 0, - VarToVkFormat(vtx_decl.normals[i].type, vtx_decl.normals[i].components, - vtx_decl.normals[i].integer), - vtx_decl.normals[i].offset); + VarToVkFormat(m_decl.normals[i].type, m_decl.normals[i].components, + m_decl.normals[i].integer), + m_decl.normals[i].offset); } for (uint32_t i = 0; i < 2; i++) { - if (vtx_decl.colors[i].enable) + if (m_decl.colors[i].enable) AddAttribute(SHADER_COLOR0_ATTRIB + i, 0, - VarToVkFormat(vtx_decl.colors[i].type, vtx_decl.colors[i].components, - vtx_decl.colors[i].integer), - vtx_decl.colors[i].offset); + VarToVkFormat(m_decl.colors[i].type, m_decl.colors[i].components, + m_decl.colors[i].integer), + m_decl.colors[i].offset); } for (uint32_t i = 0; i < 8; i++) { - if (vtx_decl.texcoords[i].enable) + if (m_decl.texcoords[i].enable) AddAttribute(SHADER_TEXTURE0_ATTRIB + i, 0, - VarToVkFormat(vtx_decl.texcoords[i].type, vtx_decl.texcoords[i].components, - vtx_decl.texcoords[i].integer), - vtx_decl.texcoords[i].offset); + VarToVkFormat(m_decl.texcoords[i].type, m_decl.texcoords[i].components, + m_decl.texcoords[i].integer), + m_decl.texcoords[i].offset); } - if (vtx_decl.posmtx.enable) - AddAttribute( - SHADER_POSMTX_ATTRIB, 0, - VarToVkFormat(vtx_decl.posmtx.type, vtx_decl.posmtx.components, vtx_decl.posmtx.integer), - vtx_decl.posmtx.offset); + if (m_decl.posmtx.enable) + AddAttribute(SHADER_POSMTX_ATTRIB, 0, + VarToVkFormat(m_decl.posmtx.type, m_decl.posmtx.components, m_decl.posmtx.integer), + m_decl.posmtx.offset); } void VertexFormat::SetupInputState() { m_binding_description.binding = 0; - m_binding_description.stride = vtx_decl.stride; + m_binding_description.stride = m_decl.stride; m_binding_description.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; m_input_state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; diff --git a/Source/Core/VideoBackends/Vulkan/VertexFormat.h b/Source/Core/VideoBackends/Vulkan/VertexFormat.h index 9b5810ced7..617967202b 100644 --- a/Source/Core/VideoBackends/Vulkan/VertexFormat.h +++ b/Source/Core/VideoBackends/Vulkan/VertexFormat.h @@ -14,7 +14,7 @@ namespace Vulkan class VertexFormat : public ::NativeVertexFormat { public: - VertexFormat(const PortableVertexDeclaration& in_vtx_decl); + VertexFormat(const PortableVertexDeclaration& vtx_decl); // Passed to pipeline state creation const VkPipelineVertexInputStateCreateInfo& GetVertexInputStateInfo() const; @@ -35,4 +35,4 @@ private: uint32_t m_num_attributes = 0; }; -} +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/VertexManager.cpp b/Source/Core/VideoBackends/Vulkan/VertexManager.cpp index bd0ab0b54d..7fcbf84013 100644 --- a/Source/Core/VideoBackends/Vulkan/VertexManager.cpp +++ b/Source/Core/VideoBackends/Vulkan/VertexManager.cpp @@ -4,87 +4,140 @@ #include "VideoBackends/Vulkan/VertexManager.h" +#include "Common/Align.h" #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" #include "Common/MsgHandler.h" -#include "VideoBackends/Vulkan/BoundingBox.h" #include "VideoBackends/Vulkan/CommandBufferManager.h" -#include "VideoBackends/Vulkan/FramebufferManager.h" #include "VideoBackends/Vulkan/Renderer.h" #include "VideoBackends/Vulkan/StateTracker.h" #include "VideoBackends/Vulkan/StreamBuffer.h" -#include "VideoBackends/Vulkan/Util.h" #include "VideoBackends/Vulkan/VertexFormat.h" #include "VideoBackends/Vulkan/VulkanContext.h" -#include "VideoCommon/BoundingBox.h" +#include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/IndexGenerator.h" +#include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/VertexLoaderManager.h" +#include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoConfig.h" namespace Vulkan { -// TODO: Clean up this mess -constexpr size_t INITIAL_VERTEX_BUFFER_SIZE = VertexManager::MAXVBUFFERSIZE * 2; -constexpr size_t MAX_VERTEX_BUFFER_SIZE = VertexManager::MAXVBUFFERSIZE * 16; -constexpr size_t INITIAL_INDEX_BUFFER_SIZE = VertexManager::MAXIBUFFERSIZE * sizeof(u16) * 2; -constexpr size_t MAX_INDEX_BUFFER_SIZE = VertexManager::MAXIBUFFERSIZE * sizeof(u16) * 16; - -VertexManager::VertexManager() - : m_cpu_vertex_buffer(MAXVBUFFERSIZE), m_cpu_index_buffer(MAXIBUFFERSIZE) +static VkBufferView CreateTexelBufferView(VkBuffer buffer, VkFormat vk_format) { + // Create a view of the whole buffer, we'll offset our texel load into it + VkBufferViewCreateInfo view_info = { + VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, // VkStructureType sType + nullptr, // const void* pNext + 0, // VkBufferViewCreateFlags flags + buffer, // VkBuffer buffer + vk_format, // VkFormat format + 0, // VkDeviceSize offset + VK_WHOLE_SIZE // VkDeviceSize range + }; + + VkBufferView view; + VkResult res = vkCreateBufferView(g_vulkan_context->GetDevice(), &view_info, nullptr, &view); + if (res != VK_SUCCESS) + { + LOG_VULKAN_ERROR(res, "vkCreateBufferView failed: "); + return VK_NULL_HANDLE; + } + + return view; } +VertexManager::VertexManager() = default; + VertexManager::~VertexManager() { -} - -VertexManager* VertexManager::GetInstance() -{ - return static_cast(g_vertex_manager.get()); + DestroyTexelBufferViews(); } bool VertexManager::Initialize() { - m_vertex_stream_buffer = StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, - INITIAL_VERTEX_BUFFER_SIZE, MAX_VERTEX_BUFFER_SIZE); - - m_index_stream_buffer = StreamBuffer::Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, - INITIAL_INDEX_BUFFER_SIZE, MAX_INDEX_BUFFER_SIZE); - - if (!m_vertex_stream_buffer || !m_index_stream_buffer) + m_vertex_stream_buffer = + StreamBuffer::Create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VERTEX_STREAM_BUFFER_SIZE * 4); + m_index_stream_buffer = + StreamBuffer::Create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, INDEX_STREAM_BUFFER_SIZE * 4); + m_uniform_stream_buffer = + StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, UNIFORM_STREAM_BUFFER_SIZE * 4); + if (!m_vertex_stream_buffer || !m_index_stream_buffer || !m_uniform_stream_buffer) { PanicAlert("Failed to allocate streaming buffers"); return false; } + // The validation layer complains if max(offsets) + max(ubo_ranges) >= ubo_size. + // To work around this we reserve the maximum buffer size at all times, but only commit + // as many bytes as we use. + m_uniform_buffer_reserve_size = sizeof(PixelShaderConstants); + m_uniform_buffer_reserve_size = Common::AlignUp(m_uniform_buffer_reserve_size, + g_vulkan_context->GetUniformBufferAlignment()) + + sizeof(VertexShaderConstants); + m_uniform_buffer_reserve_size = Common::AlignUp(m_uniform_buffer_reserve_size, + g_vulkan_context->GetUniformBufferAlignment()) + + sizeof(GeometryShaderConstants); + + // Prefer an 8MB buffer if possible, but use less if the device doesn't support this. + // This buffer is potentially going to be addressed as R8s in the future, so we assume + // that one element is one byte. This doesn't use min() because of a NDK compiler bug.. + const u32 texel_buffer_size = + TEXEL_STREAM_BUFFER_SIZE > g_vulkan_context->GetDeviceLimits().maxTexelBufferElements ? + g_vulkan_context->GetDeviceLimits().maxTexelBufferElements : + TEXEL_STREAM_BUFFER_SIZE; + m_texel_stream_buffer = + StreamBuffer::Create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, texel_buffer_size); + if (!m_texel_stream_buffer) + { + PanicAlert("Failed to allocate streaming texel buffer"); + return false; + } + + static constexpr std::array, NUM_TEXEL_BUFFER_FORMATS> + format_mapping = {{ + {TEXEL_BUFFER_FORMAT_R8_UINT, VK_FORMAT_R8_UINT}, + {TEXEL_BUFFER_FORMAT_R16_UINT, VK_FORMAT_R16_UINT}, + {TEXEL_BUFFER_FORMAT_RGBA8_UINT, VK_FORMAT_R8G8B8A8_UNORM}, + {TEXEL_BUFFER_FORMAT_R32G32_UINT, VK_FORMAT_R32G32_UINT}, + }}; + for (const auto& it : format_mapping) + { + if ((m_texel_buffer_views[it.first] = CreateTexelBufferView(m_texel_stream_buffer->GetBuffer(), + it.second)) == VK_NULL_HANDLE) + { + PanicAlert("Failed to create texel buffer view"); + return false; + } + } + + // Bind the buffers to all the known spots even if it's not used, to keep the driver happy. + UploadAllConstants(); + StateTracker::GetInstance()->SetUtilityUniformBuffer(m_uniform_stream_buffer->GetBuffer(), 0, + sizeof(VertexShaderConstants)); + for (u32 i = 0; i < NUM_COMPUTE_TEXEL_BUFFERS; i++) + { + StateTracker::GetInstance()->SetTexelBuffer(i, + m_texel_buffer_views[TEXEL_BUFFER_FORMAT_R8_UINT]); + } + return true; } -std::unique_ptr -VertexManager::CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) +void VertexManager::DestroyTexelBufferViews() { - return std::make_unique(vtx_decl); -} - -void VertexManager::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) -{ - StateTracker::GetInstance()->UpdateConstants(uniforms, uniforms_size); -} - -void VertexManager::ResetBuffer(u32 vertex_stride, bool cull_all) -{ - if (cull_all) + for (VkBufferView view : m_texel_buffer_views) { - // Not drawing on the gpu, so store in a heap buffer instead - m_cur_buffer_pointer = m_base_buffer_pointer = m_cpu_vertex_buffer.data(); - m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_vertex_buffer.size(); - IndexGenerator::Start(m_cpu_index_buffer.data()); - return; + if (view != VK_NULL_HANDLE) + vkDestroyBufferView(g_vulkan_context->GetDevice(), view, nullptr); } +} +void VertexManager::ResetBuffer(u32 vertex_stride) +{ // Attempt to allocate from buffers bool has_vbuffer_allocation = m_vertex_stream_buffer->ReserveMemory(MAXVBUFFERSIZE, vertex_stride); @@ -94,7 +147,7 @@ void VertexManager::ResetBuffer(u32 vertex_stride, bool cull_all) { // Flush any pending commands first, so that we can wait on the fences WARN_LOG(VIDEO, "Executing command list while waiting for space in vertex/index buffer"); - Util::ExecuteCurrentCommandsAndRestoreState(false); + Renderer::GetInstance()->ExecuteCommandBuffer(false); // Attempt to allocate again, this may cause a fence wait if (!has_vbuffer_allocation) @@ -122,10 +175,8 @@ void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_in const u32 index_data_size = num_indices * sizeof(u16); *out_base_vertex = - vertex_stride > 0 ? - static_cast(m_vertex_stream_buffer->GetCurrentOffset() / vertex_stride) : - 0; - *out_base_index = static_cast(m_index_stream_buffer->GetCurrentOffset() / sizeof(u16)); + vertex_stride > 0 ? (m_vertex_stream_buffer->GetCurrentOffset() / vertex_stride) : 0; + *out_base_index = m_index_stream_buffer->GetCurrentOffset() / sizeof(u16); m_vertex_stream_buffer->CommitMemory(vertex_data_size); m_index_stream_buffer->CommitMemory(index_data_size); @@ -138,43 +189,206 @@ void VertexManager::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_in VK_INDEX_TYPE_UINT16); } -void VertexManager::UploadConstants() +void VertexManager::UploadUniforms() { - StateTracker::GetInstance()->UpdateVertexShaderConstants(); - StateTracker::GetInstance()->UpdateGeometryShaderConstants(); - StateTracker::GetInstance()->UpdatePixelShaderConstants(); + UpdateVertexShaderConstants(); + UpdateGeometryShaderConstants(); + UpdatePixelShaderConstants(); } -void VertexManager::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) +void VertexManager::UpdateVertexShaderConstants() { - // Flush all EFB pokes and invalidate the peek cache. - FramebufferManager::GetInstance()->InvalidatePeekCache(); - FramebufferManager::GetInstance()->FlushEFBPokes(); + if (!VertexShaderManager::dirty || !ReserveConstantStorage()) + return; - // If bounding box is enabled, we need to flush any changes first, then invalidate what we have. - if (g_vulkan_context->SupportsBoundingBox()) + StateTracker::GetInstance()->SetGXUniformBuffer( + UBO_DESCRIPTOR_SET_BINDING_VS, m_uniform_stream_buffer->GetBuffer(), + m_uniform_stream_buffer->GetCurrentOffset(), sizeof(VertexShaderConstants)); + std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &VertexShaderManager::constants, + sizeof(VertexShaderConstants)); + m_uniform_stream_buffer->CommitMemory(sizeof(VertexShaderConstants)); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(VertexShaderConstants)); + VertexShaderManager::dirty = false; +} + +void VertexManager::UpdateGeometryShaderConstants() +{ + if (!GeometryShaderManager::dirty || !ReserveConstantStorage()) + return; + + StateTracker::GetInstance()->SetGXUniformBuffer( + UBO_DESCRIPTOR_SET_BINDING_GS, m_uniform_stream_buffer->GetBuffer(), + m_uniform_stream_buffer->GetCurrentOffset(), sizeof(GeometryShaderConstants)); + std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &GeometryShaderManager::constants, + sizeof(GeometryShaderConstants)); + m_uniform_stream_buffer->CommitMemory(sizeof(GeometryShaderConstants)); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(GeometryShaderConstants)); + GeometryShaderManager::dirty = false; +} + +void VertexManager::UpdatePixelShaderConstants() +{ + if (!PixelShaderManager::dirty || !ReserveConstantStorage()) + return; + + StateTracker::GetInstance()->SetGXUniformBuffer( + UBO_DESCRIPTOR_SET_BINDING_PS, m_uniform_stream_buffer->GetBuffer(), + m_uniform_stream_buffer->GetCurrentOffset(), sizeof(PixelShaderConstants)); + std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), &PixelShaderManager::constants, + sizeof(PixelShaderConstants)); + m_uniform_stream_buffer->CommitMemory(sizeof(PixelShaderConstants)); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, sizeof(PixelShaderConstants)); + PixelShaderManager::dirty = false; +} + +bool VertexManager::ReserveConstantStorage() +{ + if (m_uniform_stream_buffer->ReserveMemory(m_uniform_buffer_reserve_size, + g_vulkan_context->GetUniformBufferAlignment())) { - BoundingBox* bounding_box = Renderer::GetInstance()->GetBoundingBox(); - bool bounding_box_enabled = (::BoundingBox::active && g_ActiveConfig.bBBoxEnable); - if (bounding_box_enabled) + return true; + } + + // The only places that call constant updates are safe to have state restored. + WARN_LOG(VIDEO, "Executing command buffer while waiting for space in uniform buffer"); + Renderer::GetInstance()->ExecuteCommandBuffer(false); + + // Since we are on a new command buffer, all constants have been invalidated, and we need + // to reupload them. We may as well do this now, since we're issuing a draw anyway. + UploadAllConstants(); + return false; +} + +void VertexManager::UploadAllConstants() +{ + // We are free to re-use parts of the buffer now since we're uploading all constants. + const u32 ub_alignment = static_cast(g_vulkan_context->GetUniformBufferAlignment()); + const u32 pixel_constants_offset = 0; + const u32 vertex_constants_offset = + Common::AlignUp(pixel_constants_offset + sizeof(PixelShaderConstants), ub_alignment); + const u32 geometry_constants_offset = + Common::AlignUp(vertex_constants_offset + sizeof(VertexShaderConstants), ub_alignment); + const u32 allocation_size = geometry_constants_offset + sizeof(GeometryShaderConstants); + + // Allocate everything at once. + // We should only be here if the buffer was full and a command buffer was submitted anyway. + if (!m_uniform_stream_buffer->ReserveMemory(allocation_size, ub_alignment)) + { + PanicAlert("Failed to allocate space for constants in streaming buffer"); + return; + } + + // Update bindings + StateTracker::GetInstance()->SetGXUniformBuffer( + UBO_DESCRIPTOR_SET_BINDING_PS, m_uniform_stream_buffer->GetBuffer(), + m_uniform_stream_buffer->GetCurrentOffset() + pixel_constants_offset, + sizeof(PixelShaderConstants)); + StateTracker::GetInstance()->SetGXUniformBuffer( + UBO_DESCRIPTOR_SET_BINDING_VS, m_uniform_stream_buffer->GetBuffer(), + m_uniform_stream_buffer->GetCurrentOffset() + vertex_constants_offset, + sizeof(VertexShaderConstants)); + StateTracker::GetInstance()->SetGXUniformBuffer( + UBO_DESCRIPTOR_SET_BINDING_GS, m_uniform_stream_buffer->GetBuffer(), + m_uniform_stream_buffer->GetCurrentOffset() + geometry_constants_offset, + sizeof(GeometryShaderConstants)); + + // Copy the actual data in + std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer() + pixel_constants_offset, + &PixelShaderManager::constants, sizeof(PixelShaderConstants)); + std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer() + vertex_constants_offset, + &VertexShaderManager::constants, sizeof(VertexShaderConstants)); + std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer() + geometry_constants_offset, + &GeometryShaderManager::constants, sizeof(GeometryShaderConstants)); + + // Finally, flush buffer memory after copying + m_uniform_stream_buffer->CommitMemory(allocation_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, allocation_size); + + // Clear dirty flags + VertexShaderManager::dirty = false; + GeometryShaderManager::dirty = false; + PixelShaderManager::dirty = false; +} + +void VertexManager::UploadUtilityUniforms(const void* data, u32 data_size) +{ + InvalidateConstants(); + if (!m_uniform_stream_buffer->ReserveMemory(data_size, + g_vulkan_context->GetUniformBufferAlignment())) + { + WARN_LOG(VIDEO, "Executing command buffer while waiting for ext space in uniform buffer"); + Renderer::GetInstance()->ExecuteCommandBuffer(false); + } + + StateTracker::GetInstance()->SetUtilityUniformBuffer( + m_uniform_stream_buffer->GetBuffer(), m_uniform_stream_buffer->GetCurrentOffset(), data_size); + std::memcpy(m_uniform_stream_buffer->GetCurrentHostPointer(), data, data_size); + m_uniform_stream_buffer->CommitMemory(data_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size); +} + +bool VertexManager::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset) +{ + if (data_size > m_texel_stream_buffer->GetCurrentSize()) + return false; + + const u32 elem_size = GetTexelBufferElementSize(format); + if (!m_texel_stream_buffer->ReserveMemory(data_size, elem_size)) + { + // Try submitting cmdbuffer. + WARN_LOG(VIDEO, "Submitting command buffer while waiting for space in texel buffer"); + Renderer::GetInstance()->ExecuteCommandBuffer(false, false); + if (!m_texel_stream_buffer->ReserveMemory(data_size, elem_size)) { - bounding_box->Flush(); - bounding_box->Invalidate(); + PanicAlert("Failed to allocate %u bytes from texel buffer", data_size); + return false; } } - // Bind all pending state to the command buffer - if (StateTracker::GetInstance()->Bind()) - { - vkCmdDrawIndexed(g_command_buffer_mgr->GetCurrentCommandBuffer(), num_indices, 1, base_index, - base_vertex, 0); - } - else - { - WARN_LOG(VIDEO, "Skipped draw of %u indices", num_indices); - } - - StateTracker::GetInstance()->OnDraw(); + std::memcpy(m_texel_stream_buffer->GetCurrentHostPointer(), data, data_size); + *out_offset = static_cast(m_texel_stream_buffer->GetCurrentOffset()) / elem_size; + m_texel_stream_buffer->CommitMemory(data_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, data_size); + StateTracker::GetInstance()->SetTexelBuffer(0, m_texel_buffer_views[format]); + return true; } +bool VertexManager::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset, const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, u32* out_palette_offset) +{ + const u32 elem_size = GetTexelBufferElementSize(format); + const u32 palette_elem_size = GetTexelBufferElementSize(palette_format); + const u32 reserve_size = data_size + palette_size + palette_elem_size; + if (reserve_size > m_texel_stream_buffer->GetCurrentSize()) + return false; + + if (!m_texel_stream_buffer->ReserveMemory(reserve_size, elem_size)) + { + // Try submitting cmdbuffer. + WARN_LOG(VIDEO, "Submitting command buffer while waiting for space in texel buffer"); + Renderer::GetInstance()->ExecuteCommandBuffer(false, false); + if (!m_texel_stream_buffer->ReserveMemory(reserve_size, elem_size)) + { + PanicAlert("Failed to allocate %u bytes from texel buffer", reserve_size); + return false; + } + } + + const u32 palette_byte_offset = Common::AlignUp(data_size, palette_elem_size); + std::memcpy(m_texel_stream_buffer->GetCurrentHostPointer(), data, data_size); + std::memcpy(m_texel_stream_buffer->GetCurrentHostPointer() + palette_byte_offset, palette_data, + palette_size); + *out_offset = static_cast(m_texel_stream_buffer->GetCurrentOffset()) / elem_size; + *out_palette_offset = + (static_cast(m_texel_stream_buffer->GetCurrentOffset()) + palette_byte_offset) / + palette_elem_size; + + m_texel_stream_buffer->CommitMemory(palette_byte_offset + palette_size); + ADDSTAT(stats.thisFrame.bytesUniformStreamed, palette_byte_offset + palette_size); + StateTracker::GetInstance()->SetTexelBuffer(0, m_texel_buffer_views[format]); + StateTracker::GetInstance()->SetTexelBuffer(1, m_texel_buffer_views[palette_format]); + return true; +} } // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/VertexManager.h b/Source/Core/VideoBackends/Vulkan/VertexManager.h index 65c31e11f4..0a71903c83 100644 --- a/Source/Core/VideoBackends/Vulkan/VertexManager.h +++ b/Source/Core/VideoBackends/Vulkan/VertexManager.h @@ -8,6 +8,7 @@ #include #include "Common/CommonTypes.h" +#include "VideoBackends/Vulkan/VulkanLoader.h" #include "VideoCommon/VertexManagerBase.h" namespace Vulkan @@ -20,26 +21,38 @@ public: VertexManager(); ~VertexManager(); - static VertexManager* GetInstance(); - - bool Initialize(); - - std::unique_ptr - CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) override; + bool Initialize() override; void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) override; + bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset) override; + bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, u32* out_offset, + const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, u32* out_palette_offset) override; protected: - void ResetBuffer(u32 vertex_stride, bool cull_all) override; + void ResetBuffer(u32 vertex_stride) override; void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, u32* out_base_vertex, u32* out_base_index) override; - void UploadConstants() override; - void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) override; + void UploadUniforms() override; - std::vector m_cpu_vertex_buffer; - std::vector m_cpu_index_buffer; + void DestroyTexelBufferViews(); + + void UpdateVertexShaderConstants(); + void UpdateGeometryShaderConstants(); + void UpdatePixelShaderConstants(); + + // Allocates storage in the uniform buffer of the specified size. If this storage cannot be + // allocated immediately, the current command buffer will be submitted and all stage's + // constants will be re-uploaded. false will be returned in this case, otherwise true. + bool ReserveConstantStorage(); + void UploadAllConstants(); std::unique_ptr m_vertex_stream_buffer; std::unique_ptr m_index_stream_buffer; + std::unique_ptr m_uniform_stream_buffer; + std::unique_ptr m_texel_stream_buffer; + std::array m_texel_buffer_views = {}; + u32 m_uniform_buffer_reserve_size = 0; }; -} +} // namespace Vulkan diff --git a/Source/Core/VideoBackends/Vulkan/Vulkan.vcxproj b/Source/Core/VideoBackends/Vulkan/Vulkan.vcxproj index 5b465ff733..fab11bab7e 100644 --- a/Source/Core/VideoBackends/Vulkan/Vulkan.vcxproj +++ b/Source/Core/VideoBackends/Vulkan/Vulkan.vcxproj @@ -38,14 +38,9 @@ - - - - - @@ -53,8 +48,6 @@ - - @@ -65,13 +58,8 @@ - - - - - @@ -80,8 +68,6 @@ - - diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index 4eaea78865..72f929af1c 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -255,6 +255,7 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsDynamicSamplerIndexing = true; // Assumed support. config->backend_info.bSupportsPostProcessing = true; // Assumed support. config->backend_info.bSupportsBackgroundCompiling = true; // Assumed support. + config->backend_info.bSupportsCopyToVram = true; // Assumed support. config->backend_info.bSupportsDualSourceBlend = false; // Dependent on features. config->backend_info.bSupportsGeometryShaders = false; // Dependent on features. config->backend_info.bSupportsGSInstancing = false; // Dependent on features. @@ -264,10 +265,10 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config) config->backend_info.bSupportsDepthClamp = false; // Dependent on features. config->backend_info.bSupportsST3CTextures = false; // Dependent on features. config->backend_info.bSupportsBPTCTextures = false; // Dependent on features. + config->backend_info.bSupportsLogicOp = false; // Dependent on features. + config->backend_info.bSupportsLargePoints = false; // Dependent on features. config->backend_info.bSupportsReversedDepthRange = false; // No support yet due to driver bugs. - config->backend_info.bSupportsLogicOp = false; // Dependent on features. - config->backend_info.bSupportsCopyToVram = true; // Assumed support. - config->backend_info.bSupportsFramebufferFetch = false; + config->backend_info.bSupportsFramebufferFetch = false; // No support. } void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list) @@ -286,6 +287,7 @@ void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalD const VkPhysicalDeviceFeatures& features) { config->backend_info.MaxTextureSize = properties.limits.maxImageDimension2D; + config->backend_info.bUsesLowerLeftOrigin = false; config->backend_info.bSupportsDualSourceBlend = (features.dualSrcBlend == VK_TRUE); config->backend_info.bSupportsGeometryShaders = (features.geometryShader == VK_TRUE); config->backend_info.bSupportsGSInstancing = (features.geometryShader == VK_TRUE); @@ -311,6 +313,13 @@ void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalD config->backend_info.bSupportsST3CTextures = supports_bc; config->backend_info.bSupportsBPTCTextures = supports_bc; + // Some devices don't support point sizes >1 (e.g. Adreno). + // If we can't use a point size above our maximum IR, use triangles instead for EFB pokes. + // This means a 6x increase in the size of the vertices, though. + config->backend_info.bSupportsLargePoints = features.largePoints && + properties.limits.pointSizeRange[0] <= 1.0f && + properties.limits.pointSizeRange[1] >= 16; + // Our usage of primitive restart appears to be broken on AMD's binary drivers. // Seems to be fine on GCN Gen 1-2, unconfirmed on GCN Gen 3, causes driver resets on GCN Gen 4. if (DriverDetails::HasBug(DriverDetails::BUG_PRIMITIVE_RESTART)) @@ -323,11 +332,11 @@ void VulkanContext::PopulateBackendInfoMultisampleModes( // Query image support for the EFB texture formats. VkImageFormatProperties efb_color_properties = {}; vkGetPhysicalDeviceImageFormatProperties( - gpu, EFB_COLOR_TEXTURE_FORMAT, VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + gpu, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, 0, &efb_color_properties); VkImageFormatProperties efb_depth_properties = {}; vkGetPhysicalDeviceImageFormatProperties( - gpu, EFB_DEPTH_TEXTURE_FORMAT, VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, + gpu, VK_FORMAT_D32_SFLOAT, VK_IMAGE_TYPE_2D, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, 0, &efb_depth_properties); // We can only support MSAA if it's supported on our render target formats. @@ -456,15 +465,6 @@ bool VulkanContext::SelectDeviceFeatures() if (!available_features.occlusionQueryPrecise) WARN_LOG(VIDEO, "Vulkan: Missing precise occlusion queries. Perf queries will be inaccurate."); - // Check push constant size. - if (properties.limits.maxPushConstantsSize < static_cast(PUSH_CONSTANT_BUFFER_SIZE)) - { - PanicAlert("Vulkan: Push contant buffer size %u is below minimum %u.", - properties.limits.maxPushConstantsSize, static_cast(PUSH_CONSTANT_BUFFER_SIZE)); - - return false; - } - // Enable the features we use. m_device_features.dualSrcBlend = available_features.dualSrcBlend; m_device_features.geometryShader = available_features.geometryShader; diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.h b/Source/Core/VideoBackends/Vulkan/VulkanContext.h index 6b254e24c0..3f4492bc4a 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.h +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.h @@ -76,10 +76,6 @@ public: { return m_device_features.samplerAnisotropy == VK_TRUE; } - bool SupportsGeometryShaders() const { return m_device_features.geometryShader == VK_TRUE; } - bool SupportsDualSourceBlend() const { return m_device_features.dualSrcBlend == VK_TRUE; } - bool SupportsLogicOps() const { return m_device_features.logicOp == VK_TRUE; } - bool SupportsBoundingBox() const { return m_device_features.fragmentStoresAndAtomics == VK_TRUE; } bool SupportsPreciseOcclusionQueries() const { return m_device_features.occlusionQueryPrecise == VK_TRUE; diff --git a/Source/Core/VideoBackends/Vulkan/main.cpp b/Source/Core/VideoBackends/Vulkan/main.cpp index 8055aab178..911f8d1991 100644 --- a/Source/Core/VideoBackends/Vulkan/main.cpp +++ b/Source/Core/VideoBackends/Vulkan/main.cpp @@ -9,18 +9,17 @@ #include "VideoBackends/Vulkan/CommandBufferManager.h" #include "VideoBackends/Vulkan/Constants.h" -#include "VideoBackends/Vulkan/FramebufferManager.h" #include "VideoBackends/Vulkan/ObjectCache.h" #include "VideoBackends/Vulkan/PerfQuery.h" #include "VideoBackends/Vulkan/Renderer.h" #include "VideoBackends/Vulkan/StateTracker.h" #include "VideoBackends/Vulkan/SwapChain.h" -#include "VideoBackends/Vulkan/TextureCache.h" #include "VideoBackends/Vulkan/VertexManager.h" #include "VideoBackends/Vulkan/VideoBackend.h" #include "VideoBackends/Vulkan/VulkanContext.h" -#include "VideoCommon/OnScreenDisplay.h" +#include "VideoCommon/FramebufferManager.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VideoBackendBase.h" #include "VideoCommon/VideoConfig.h" @@ -200,10 +199,9 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi) return false; } - // Remaining classes are also dependent on object/shader cache. + // Remaining classes are also dependent on object cache. g_object_cache = std::make_unique(); - g_shader_cache = std::make_unique(); - if (!g_object_cache->Initialize() || !g_shader_cache->Initialize()) + if (!g_object_cache->Initialize()) { PanicAlert("Failed to initialize Vulkan object cache."); Shutdown(); @@ -223,29 +221,31 @@ bool VideoBackend::Initialize(const WindowSystemInfo& wsi) } } - // Create main wrapper instances. - g_framebuffer_manager = std::make_unique(); - g_renderer = std::make_unique(std::move(swap_chain), wsi.render_surface_scale); - g_vertex_manager = std::make_unique(); - g_texture_cache = std::make_unique(); - ::g_shader_cache = std::make_unique(); - g_perf_query = std::make_unique(); - - // Invoke init methods on main wrapper classes. - // These have to be done before the others because the destructors - // for the remaining classes may call methods on these. - if (!StateTracker::CreateInstance() || !FramebufferManager::GetInstance()->Initialize() || - !Renderer::GetInstance()->Initialize() || !VertexManager::GetInstance()->Initialize() || - !TextureCache::GetInstance()->Initialize() || !PerfQuery::GetInstance()->Initialize() || - !::g_shader_cache->Initialize()) + if (!StateTracker::CreateInstance()) { - PanicAlert("Failed to initialize Vulkan classes."); + PanicAlert("Failed to create state tracker"); Shutdown(); return false; } - // Display the name so the user knows which device was actually created. - INFO_LOG(VIDEO, "Vulkan Device: %s", g_vulkan_context->GetDeviceProperties().deviceName); + // Create main wrapper instances. + g_renderer = std::make_unique(std::move(swap_chain), wsi.render_surface_scale); + g_vertex_manager = std::make_unique(); + g_shader_cache = std::make_unique(); + g_framebuffer_manager = std::make_unique(); + g_texture_cache = std::make_unique(); + g_perf_query = std::make_unique(); + + if (!g_vertex_manager->Initialize() || !g_shader_cache->Initialize() || + !g_renderer->Initialize() || !g_framebuffer_manager->Initialize() || + !g_texture_cache->Initialize() || !PerfQuery::GetInstance()->Initialize()) + { + PanicAlert("Failed to initialize renderer classes"); + Shutdown(); + return false; + } + + g_shader_cache->InitializeShaderCache(); return true; } @@ -254,23 +254,23 @@ void VideoBackend::Shutdown() if (g_command_buffer_mgr) g_command_buffer_mgr->WaitForGPUIdle(); - if (::g_shader_cache) - ::g_shader_cache->Shutdown(); + if (g_shader_cache) + g_shader_cache->Shutdown(); + + if (g_object_cache) + g_object_cache->Shutdown(); if (g_renderer) g_renderer->Shutdown(); g_perf_query.reset(); - ::g_shader_cache.reset(); g_texture_cache.reset(); + g_framebuffer_manager.reset(); + g_shader_cache.reset(); g_vertex_manager.reset(); g_renderer.reset(); - g_framebuffer_manager.reset(); - StateTracker::DestroyInstance(); - if (g_shader_cache) - g_shader_cache->Shutdown(); - g_shader_cache.reset(); g_object_cache.reset(); + StateTracker::DestroyInstance(); g_command_buffer_mgr.reset(); g_vulkan_context.reset(); ShutdownShared(); diff --git a/Source/Core/VideoCommon/AbstractFramebuffer.cpp b/Source/Core/VideoCommon/AbstractFramebuffer.cpp index f8a9b07ba8..c6a1693788 100644 --- a/Source/Core/VideoCommon/AbstractFramebuffer.cpp +++ b/Source/Core/VideoCommon/AbstractFramebuffer.cpp @@ -5,10 +5,13 @@ #include "VideoCommon/AbstractFramebuffer.h" #include "VideoCommon/AbstractTexture.h" -AbstractFramebuffer::AbstractFramebuffer(AbstractTextureFormat color_format, +AbstractFramebuffer::AbstractFramebuffer(AbstractTexture* color_attachment, + AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples) - : m_color_format(color_format), m_depth_format(depth_format), m_width(width), m_height(height), + : m_color_attachment(color_attachment), m_depth_attachment(depth_attachment), + m_color_format(color_format), m_depth_format(depth_format), m_width(width), m_height(height), m_layers(layers), m_samples(samples) { } @@ -26,7 +29,7 @@ bool AbstractFramebuffer::ValidateConfig(const AbstractTexture* color_attachment // MSAA textures are not supported with mip levels on most backends, and it simplifies our // handling of framebuffers. auto CheckAttachment = [](const AbstractTexture* tex) { - return tex->GetConfig().rendertarget && tex->GetConfig().levels == 1; + return tex->GetConfig().IsRenderTarget() && tex->GetConfig().levels == 1; }; if ((color_attachment && !CheckAttachment(color_attachment)) || (depth_attachment && !CheckAttachment(depth_attachment))) diff --git a/Source/Core/VideoCommon/AbstractFramebuffer.h b/Source/Core/VideoCommon/AbstractFramebuffer.h index 9d4b2d29cd..33b243e3b3 100644 --- a/Source/Core/VideoCommon/AbstractFramebuffer.h +++ b/Source/Core/VideoCommon/AbstractFramebuffer.h @@ -18,13 +18,16 @@ class AbstractTexture; class AbstractFramebuffer { public: - AbstractFramebuffer(AbstractTextureFormat color_format, AbstractTextureFormat depth_format, + AbstractFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment, + AbstractTextureFormat color_format, AbstractTextureFormat depth_format, u32 width, u32 height, u32 layers, u32 samples); virtual ~AbstractFramebuffer(); static bool ValidateConfig(const AbstractTexture* color_attachment, const AbstractTexture* depth_attachment); + AbstractTexture* GetColorAttachment() const { return m_color_attachment; } + AbstractTexture* GetDepthAttachment() const { return m_depth_attachment; } AbstractTextureFormat GetColorFormat() const { return m_color_format; } AbstractTextureFormat GetDepthFormat() const { return m_depth_format; } bool HasColorBuffer() const { return m_color_format != AbstractTextureFormat::Undefined; } @@ -36,6 +39,8 @@ public: MathUtil::Rectangle GetRect() const; protected: + AbstractTexture* m_color_attachment; + AbstractTexture* m_depth_attachment; AbstractTextureFormat m_color_format; AbstractTextureFormat m_depth_format; u32 m_width; diff --git a/Source/Core/VideoCommon/AbstractPipeline.h b/Source/Core/VideoCommon/AbstractPipeline.h index c0ae61af28..8c7d7482de 100644 --- a/Source/Core/VideoCommon/AbstractPipeline.h +++ b/Source/Core/VideoCommon/AbstractPipeline.h @@ -45,24 +45,7 @@ struct AbstractPipelineConfig RasterizationState rasterization_state; DepthState depth_state; BlendingState blending_state; - - union FramebufferState - { - BitField<0, 8, AbstractTextureFormat> color_texture_format; - BitField<8, 8, AbstractTextureFormat> depth_texture_format; - BitField<16, 8, u32> samples; - BitField<24, 1, u32> per_sample_shading; - - bool operator==(const FramebufferState& rhs) const { return hex == rhs.hex; } - bool operator!=(const FramebufferState& rhs) const { return hex != rhs.hex; } - FramebufferState& operator=(const FramebufferState& rhs) - { - hex = rhs.hex; - return *this; - } - - u32 hex; - } framebuffer_state; + FramebufferState framebuffer_state; AbstractPipelineUsage usage; diff --git a/Source/Core/VideoCommon/AbstractStagingTexture.h b/Source/Core/VideoCommon/AbstractStagingTexture.h index c87dfd70b0..759f1e79e8 100644 --- a/Source/Core/VideoCommon/AbstractStagingTexture.h +++ b/Source/Core/VideoCommon/AbstractStagingTexture.h @@ -20,8 +20,16 @@ public: virtual ~AbstractStagingTexture(); const TextureConfig& GetConfig() const { return m_config; } + u32 GetWidth() const { return m_config.width; } + u32 GetHeight() const { return m_config.height; } + u32 GetLevels() const { return m_config.levels; } + u32 GetLayers() const { return m_config.layers; } + u32 GetSamples() const { return m_config.samples; } + AbstractTextureFormat GetFormat() const { return m_config.format; } + MathUtil::Rectangle GetRect() const { return m_config.GetRect(); } StagingTextureType GetType() const { return m_type; } size_t GetTexelSize() const { return m_texel_size; } + bool IsMapped() const { return m_map_pointer != nullptr; } char* GetMappedPointer() const { return m_map_pointer; } size_t GetMappedStride() const { return m_map_stride; } diff --git a/Source/Core/VideoCommon/AbstractTexture.cpp b/Source/Core/VideoCommon/AbstractTexture.cpp index 6190f0e51b..d0c27617db 100644 --- a/Source/Core/VideoCommon/AbstractTexture.cpp +++ b/Source/Core/VideoCommon/AbstractTexture.cpp @@ -15,6 +15,10 @@ AbstractTexture::AbstractTexture(const TextureConfig& c) : m_config(c) { } +void AbstractTexture::FinishedRendering() +{ +} + bool AbstractTexture::Save(const std::string& filename, unsigned int level) { // We can't dump compressed textures currently (it would mean drawing them to a RGBA8 @@ -30,7 +34,7 @@ bool AbstractTexture::Save(const std::string& filename, unsigned int level) // Use a temporary staging texture for the download. Certainly not optimal, // but this is not a frequently-executed code path.. TextureConfig readback_texture_config(level_width, level_height, 1, 1, 1, - AbstractTextureFormat::RGBA8, false); + AbstractTextureFormat::RGBA8, 0); auto readback_texture = g_renderer->CreateStagingTexture(StagingTextureType::Readback, readback_texture_config); if (!readback_texture) @@ -84,7 +88,24 @@ bool AbstractTexture::IsStencilFormat(AbstractTextureFormat format) return format == AbstractTextureFormat::D24_S8 || format == AbstractTextureFormat::D32F_S8; } -size_t AbstractTexture::CalculateStrideForFormat(AbstractTextureFormat format, u32 row_length) +AbstractTextureFormat AbstractTexture::GetColorFormatForDepthFormat(AbstractTextureFormat format) +{ + switch (format) + { + case AbstractTextureFormat::D16: + return AbstractTextureFormat::R16; + + case AbstractTextureFormat::D24_S8: // TODO: Incorrect + case AbstractTextureFormat::D32F: + case AbstractTextureFormat::D32F_S8: + return AbstractTextureFormat::R32F; + + default: + return format; + } +} + +u32 AbstractTexture::CalculateStrideForFormat(AbstractTextureFormat format, u32 row_length) { switch (format) { @@ -111,7 +132,7 @@ size_t AbstractTexture::CalculateStrideForFormat(AbstractTextureFormat format, u } } -size_t AbstractTexture::GetTexelSizeForFormat(AbstractTextureFormat format) +u32 AbstractTexture::GetTexelSizeForFormat(AbstractTextureFormat format) { switch (format) { @@ -138,6 +159,21 @@ size_t AbstractTexture::GetTexelSizeForFormat(AbstractTextureFormat format) } } +u32 AbstractTexture::GetBlockSizeForFormat(AbstractTextureFormat format) +{ + switch (format) + { + case AbstractTextureFormat::DXT1: + case AbstractTextureFormat::DXT3: + case AbstractTextureFormat::DXT5: + case AbstractTextureFormat::BPTC: + return 4; + + default: + return 1; + } +} + const TextureConfig& AbstractTexture::GetConfig() const { return m_config; diff --git a/Source/Core/VideoCommon/AbstractTexture.h b/Source/Core/VideoCommon/AbstractTexture.h index b193e5459e..6ff80e9703 100644 --- a/Source/Core/VideoCommon/AbstractTexture.h +++ b/Source/Core/VideoCommon/AbstractTexture.h @@ -21,28 +21,33 @@ public: const MathUtil::Rectangle& src_rect, u32 src_layer, u32 src_level, const MathUtil::Rectangle& dst_rect, u32 dst_layer, u32 dst_level) = 0; - virtual void ScaleRectangleFromTexture(const AbstractTexture* source, - const MathUtil::Rectangle& srcrect, - const MathUtil::Rectangle& dstrect) = 0; virtual void ResolveFromTexture(const AbstractTexture* src, const MathUtil::Rectangle& rect, u32 layer, u32 level) = 0; virtual void Load(u32 level, u32 width, u32 height, u32 row_length, const u8* buffer, size_t buffer_size) = 0; + // Hints to the backend that we have finished rendering to this texture, and it will be used + // as a shader resource and sampled. For Vulkan, this transitions the image layout. + virtual void FinishedRendering(); + u32 GetWidth() const { return m_config.width; } u32 GetHeight() const { return m_config.height; } u32 GetLevels() const { return m_config.levels; } u32 GetLayers() const { return m_config.layers; } u32 GetSamples() const { return m_config.samples; } AbstractTextureFormat GetFormat() const { return m_config.format; } + MathUtil::Rectangle GetRect() const { return m_config.GetRect(); } + MathUtil::Rectangle GetMipRect(u32 level) const { return m_config.GetMipRect(level); } bool IsMultisampled() const { return m_config.IsMultisampled(); } bool Save(const std::string& filename, unsigned int level); static bool IsCompressedFormat(AbstractTextureFormat format); static bool IsDepthFormat(AbstractTextureFormat format); static bool IsStencilFormat(AbstractTextureFormat format); - static size_t CalculateStrideForFormat(AbstractTextureFormat format, u32 row_length); - static size_t GetTexelSizeForFormat(AbstractTextureFormat format); + static AbstractTextureFormat GetColorFormatForDepthFormat(AbstractTextureFormat format); + static u32 CalculateStrideForFormat(AbstractTextureFormat format, u32 row_length); + static u32 GetTexelSizeForFormat(AbstractTextureFormat format); + static u32 GetBlockSizeForFormat(AbstractTextureFormat format); const TextureConfig& GetConfig() const; diff --git a/Source/Core/VideoCommon/BPFunctions.cpp b/Source/Core/VideoCommon/BPFunctions.cpp index 61e4c54b27..7db9d58cad 100644 --- a/Source/Core/VideoCommon/BPFunctions.cpp +++ b/Source/Core/VideoCommon/BPFunctions.cpp @@ -5,8 +5,10 @@ #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" +#include "VideoCommon/AbstractFramebuffer.h" #include "VideoCommon/BPFunctions.h" #include "VideoCommon/BPMemory.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/RenderBase.h" #include "VideoCommon/RenderState.h" #include "VideoCommon/VertexManagerBase.h" @@ -51,8 +53,10 @@ void SetScissor() bpmem.scissorBR.x - xoff + 1, bpmem.scissorBR.y - yoff + 1); native_rc.ClampUL(0, 0, EFB_WIDTH, EFB_HEIGHT); - TargetRectangle target_rc = g_renderer->ConvertEFBRectangle(native_rc); - g_renderer->SetScissorRect(target_rc); + auto target_rc = g_renderer->ConvertEFBRectangle(native_rc); + auto converted_rc = + g_renderer->ConvertFramebufferRectangle(target_rc, g_renderer->GetCurrentFramebuffer()); + g_renderer->SetScissorRect(converted_rc); } void SetViewport() @@ -122,6 +126,21 @@ void SetViewport() far_depth = 1.0f - min_depth; } + // Clamp to size if oversized not supported. Required for D3D. + if (!g_ActiveConfig.backend_info.bSupportsOversizedViewports) + { + const float max_width = static_cast(g_renderer->GetCurrentFramebuffer()->GetWidth()); + const float max_height = static_cast(g_renderer->GetCurrentFramebuffer()->GetHeight()); + x = MathUtil::Clamp(x, 0.0f, max_width - 1.0f); + y = MathUtil::Clamp(y, 0.0f, max_height - 1.0f); + width = MathUtil::Clamp(width, 1.0f, max_width - x); + height = MathUtil::Clamp(height, 1.0f, max_height - y); + } + + // Lower-left flip. + if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) + y = static_cast(g_renderer->GetCurrentFramebuffer()->GetHeight()) - y - height; + g_renderer->SetViewport(x, y, width, height, near_depth, far_depth); } @@ -188,8 +207,6 @@ void ClearScreen(const EFBRectangle& rc) void OnPixelFormatChange() { - int convtype = -1; - // TODO : Check for Z compression format change // When using 16bit Z, the game may enable a special compression format which we need to handle // If we don't, Z values will be completely screwed up, currently only Star Wars:RS2 uses that. @@ -205,58 +222,74 @@ void OnPixelFormatChange() auto old_format = g_renderer->GetPrevPixelFormat(); auto new_format = bpmem.zcontrol.pixel_format; + g_renderer->StorePixelFormat(new_format); + + DEBUG_LOG(VIDEO, "pixelfmt: pixel=%d, zc=%d", static_cast(new_format), + static_cast(bpmem.zcontrol.zformat)); // no need to reinterpret pixel data in these cases if (new_format == old_format || old_format == PEControl::INVALID_FMT) - goto skip; + return; // Check for pixel format changes switch (old_format) { case PEControl::RGB8_Z24: case PEControl::Z24: + { // Z24 and RGB8_Z24 are treated equal, so just return in this case if (new_format == PEControl::RGB8_Z24 || new_format == PEControl::Z24) - goto skip; + return; if (new_format == PEControl::RGBA6_Z24) - convtype = 0; + { + g_renderer->ReinterpretPixelData(EFBReinterpretType::RGB8ToRGBA6); + return; + } else if (new_format == PEControl::RGB565_Z16) - convtype = 1; - break; + { + g_renderer->ReinterpretPixelData(EFBReinterpretType::RGB8ToRGB565); + return; + } + } + break; case PEControl::RGBA6_Z24: + { if (new_format == PEControl::RGB8_Z24 || new_format == PEControl::Z24) - convtype = 2; + { + g_renderer->ReinterpretPixelData(EFBReinterpretType::RGBA6ToRGB8); + return; + } else if (new_format == PEControl::RGB565_Z16) - convtype = 3; - break; + { + g_renderer->ReinterpretPixelData(EFBReinterpretType::RGBA6ToRGB565); + return; + } + } + break; case PEControl::RGB565_Z16: + { if (new_format == PEControl::RGB8_Z24 || new_format == PEControl::Z24) - convtype = 4; + { + g_renderer->ReinterpretPixelData(EFBReinterpretType::RGB565ToRGB8); + return; + } else if (new_format == PEControl::RGBA6_Z24) - convtype = 5; - break; + { + g_renderer->ReinterpretPixelData(EFBReinterpretType::RGB565ToRGBA6); + return; + } + } + break; default: break; } - if (convtype == -1) - { - ERROR_LOG(VIDEO, "Unhandled EFB format change: %d to %d", static_cast(old_format), - static_cast(new_format)); - goto skip; - } - - g_renderer->ReinterpretPixelData(convtype); - -skip: - DEBUG_LOG(VIDEO, "pixelfmt: pixel=%d, zc=%d", static_cast(new_format), - static_cast(bpmem.zcontrol.zformat)); - - g_renderer->StorePixelFormat(new_format); + ERROR_LOG(VIDEO, "Unhandled EFB format change: %d to %d", static_cast(old_format), + static_cast(new_format)); } void SetInterlacingMode(const BPCmd& bp) @@ -286,4 +319,4 @@ void SetInterlacingMode(const BPCmd& bp) break; } } -}; +}; // namespace BPFunctions diff --git a/Source/Core/VideoCommon/CMakeLists.txt b/Source/Core/VideoCommon/CMakeLists.txt index b29492be50..b118080eb5 100644 --- a/Source/Core/VideoCommon/CMakeLists.txt +++ b/Source/Core/VideoCommon/CMakeLists.txt @@ -10,11 +10,11 @@ add_library(videocommon BPStructs.cpp CPMemory.cpp CommandProcessor.cpp - Debugger.cpp DriverDetails.cpp Fifo.cpp FPSCounter.cpp - FramebufferManagerBase.cpp + FramebufferManager.cpp + FramebufferShaderGen.cpp GeometryShaderGen.cpp GeometryShaderManager.cpp HiresTextures.cpp diff --git a/Source/Core/VideoCommon/Debugger.cpp b/Source/Core/VideoCommon/Debugger.cpp deleted file mode 100644 index 4dbaa1865a..0000000000 --- a/Source/Core/VideoCommon/Debugger.cpp +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include - -#include "Common/FileUtil.h" -#include "Common/StringUtil.h" -#include "Common/Thread.h" - -#include "VideoCommon/BPMemory.h" -#include "VideoCommon/Debugger.h" -#include "VideoCommon/VideoConfig.h" - -GFXDebuggerBase* g_pdebugger = nullptr; -volatile bool GFXDebuggerPauseFlag = - false; // if true, the GFX thread will be spin locked until it's false again -volatile PauseEvent GFXDebuggerToPauseAtNext = - NOT_PAUSE; // Event which will trigger spin locking the GFX thread -volatile int GFXDebuggerEventToPauseCount = - 0; // Number of events to wait for until GFX thread will be paused - -void GFXDebuggerUpdateScreen() -{ - // TODO: Implement this in a backend-independent way - /* // update screen - if (D3D::bFrameInProgress) - { - D3D::dev->SetRenderTarget(0, D3D::GetBackBufferSurface()); - D3D::dev->SetDepthStencilSurface(nullptr); - - D3D::dev->StretchRect(FramebufferManager::GetEFBColorRTSurface(), nullptr, - D3D::GetBackBufferSurface(), nullptr, - D3DTEXF_LINEAR); - - D3D::dev->EndScene(); - D3D::dev->Present(nullptr, nullptr, nullptr, nullptr); - - D3D::dev->SetRenderTarget(0, FramebufferManager::GetEFBColorRTSurface()); - D3D::dev->SetDepthStencilSurface(FramebufferManager::GetEFBDepthRTSurface()); - D3D::dev->BeginScene(); - } - else - { - D3D::dev->EndScene(); - D3D::dev->Present(nullptr, nullptr, nullptr, nullptr); - D3D::dev->BeginScene(); - }*/ -} - -// GFX thread -void GFXDebuggerCheckAndPause(bool update) -{ - if (GFXDebuggerPauseFlag) - { - g_pdebugger->OnPause(); - while (GFXDebuggerPauseFlag) - { - if (update) - GFXDebuggerUpdateScreen(); - Common::SleepCurrentThread(5); - } - g_pdebugger->OnContinue(); - } -} - -// GFX thread -void GFXDebuggerToPause(bool update) -{ - GFXDebuggerToPauseAtNext = NOT_PAUSE; - GFXDebuggerPauseFlag = true; - GFXDebuggerCheckAndPause(update); -} - -void ContinueGFXDebugger() -{ - GFXDebuggerPauseFlag = false; -} - -void GFXDebuggerBase::DumpPixelShader(const std::string& path) -{ - const std::string filename = StringFromFormat("%sdump_ps.txt", path.c_str()); - - std::string output; - bool useDstAlpha = bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate && - bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24; - if (!useDstAlpha) - { - output = "Destination alpha disabled:\n"; - /// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, - /// g_nativeVertexFmt->m_components); - } - else - { - if (g_ActiveConfig.backend_info.bSupportsDualSourceBlend) - { - output = "Using dual source blending for destination alpha:\n"; - /// output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, - /// g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); - } - else - { - output = "Using two passes for emulating destination alpha:\n"; - /// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, - /// g_nativeVertexFmt->m_components); - output += "\n\nDestination alpha pass shader:\n"; - /// output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, - /// g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components); - } - } - - File::CreateEmptyFile(filename); - File::WriteStringToFile(output, filename); -} - -void GFXDebuggerBase::DumpVertexShader(const std::string& path) -{ - const std::string filename = StringFromFormat("%sdump_vs.txt", path.c_str()); - - File::CreateEmptyFile(filename); - /// File::WriteStringToFile(GenerateVertexShaderCode(g_nativeVertexFmt->m_components, - /// g_ActiveConfig.backend_info.APIType), filename); -} - -void GFXDebuggerBase::DumpPixelShaderConstants(const std::string& path) -{ - // TODO -} - -void GFXDebuggerBase::DumpVertexShaderConstants(const std::string& path) -{ - // TODO -} - -void GFXDebuggerBase::DumpTextures(const std::string& path) -{ - // TODO -} - -void GFXDebuggerBase::DumpFrameBuffer(const std::string& path) -{ - // TODO -} - -void GFXDebuggerBase::DumpGeometry(const std::string& path) -{ - // TODO -} - -void GFXDebuggerBase::DumpVertexDecl(const std::string& path) -{ - // TODO -} - -void GFXDebuggerBase::DumpMatrices(const std::string& path) -{ - // TODO -} - -void GFXDebuggerBase::DumpStats(const std::string& path) -{ - // TODO -} diff --git a/Source/Core/VideoCommon/Debugger.h b/Source/Core/VideoCommon/Debugger.h deleted file mode 100644 index 1c2d293edc..0000000000 --- a/Source/Core/VideoCommon/Debugger.h +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include - -class GFXDebuggerBase -{ -public: - virtual ~GFXDebuggerBase() {} - // if paused, debugging functions can be enabled - virtual void OnPause() {} - virtual void OnContinue() {} - void DumpPixelShader(const std::string& path); - void DumpVertexShader(const std::string& path); - void DumpPixelShaderConstants(const std::string& path); - void DumpVertexShaderConstants(const std::string& path); - void DumpTextures(const std::string& path); - void DumpFrameBuffer(const std::string& path); - void DumpGeometry(const std::string& path); - void DumpVertexDecl(const std::string& path); - void DumpMatrices(const std::string& path); - void DumpStats(const std::string& path); -}; - -enum PauseEvent -{ - NOT_PAUSE = 0, - NEXT_FRAME = 1 << 0, - NEXT_FLUSH = 1 << 1, - - NEXT_PIXEL_SHADER_CHANGE = 1 << 2, - NEXT_VERTEX_SHADER_CHANGE = 1 << 3, - NEXT_TEXTURE_CHANGE = 1 << 4, - NEXT_NEW_TEXTURE = 1 << 5, - - NEXT_XFB_CMD = 1 << 6, // TODO - NEXT_EFB_CMD = 1 << 7, // TODO - - NEXT_MATRIX_CMD = 1 << 8, // TODO - NEXT_VERTEX_CMD = 1 << 9, // TODO - NEXT_TEXTURE_CMD = 1 << 10, // TODO - NEXT_LIGHT_CMD = 1 << 11, // TODO - NEXT_FOG_CMD = 1 << 12, // TODO - - NEXT_SET_TLUT = 1 << 13, // TODO - - NEXT_ERROR = 1 << 14, // TODO -}; - -extern GFXDebuggerBase* g_pdebugger; -extern volatile bool GFXDebuggerPauseFlag; -extern volatile PauseEvent GFXDebuggerToPauseAtNext; -extern volatile int GFXDebuggerEventToPauseCount; -void ContinueGFXDebugger(); -void GFXDebuggerCheckAndPause(bool update); -void GFXDebuggerToPause(bool update); -void GFXDebuggerUpdateScreen(); - -#define GFX_DEBUGGER_PAUSE_AT(event, update) \ - { \ - if (((GFXDebuggerToPauseAtNext & event) && --GFXDebuggerEventToPauseCount <= 0) || \ - GFXDebuggerPauseFlag) \ - GFXDebuggerToPause(update); \ - } -#define GFX_DEBUGGER_PAUSE_LOG_AT(event, update, dumpfunc) \ - { \ - if (((GFXDebuggerToPauseAtNext & event) && --GFXDebuggerEventToPauseCount <= 0) || \ - GFXDebuggerPauseFlag) \ - { \ - {dumpfunc}; \ - GFXDebuggerToPause(update); \ - } \ - } -#define GFX_DEBUGGER_LOG_AT(event, dumpfunc) \ - { \ - if ((GFXDebuggerToPauseAtNext & event)) \ - { \ - {dumpfunc}; \ - } \ - } diff --git a/Source/Core/VideoCommon/FramebufferManager.cpp b/Source/Core/VideoCommon/FramebufferManager.cpp new file mode 100644 index 0000000000..0af19012ea --- /dev/null +++ b/Source/Core/VideoCommon/FramebufferManager.cpp @@ -0,0 +1,764 @@ +// Copyright 2010 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include "VideoCommon/FramebufferManager.h" +#include +#include "VideoCommon/FramebufferShaderGen.h" +#include "VideoCommon/VertexManagerBase.h" + +#include "Common/Logging/Log.h" +#include "Common/MsgHandler.h" +#include "VideoCommon/AbstractFramebuffer.h" +#include "VideoCommon/AbstractPipeline.h" +#include "VideoCommon/AbstractShader.h" +#include "VideoCommon/AbstractStagingTexture.h" +#include "VideoCommon/AbstractTexture.h" +#include "VideoCommon/DriverDetails.h" +#include "VideoCommon/RenderBase.h" +#include "VideoCommon/VideoConfig.h" + +// Maximum number of pixels poked in one batch * 6 +constexpr size_t MAX_POKE_VERTICES = 32768; + +std::unique_ptr g_framebuffer_manager; + +FramebufferManager::FramebufferManager() = default; + +FramebufferManager::~FramebufferManager() +{ + DestroyClearPipelines(); + DestroyPokePipelines(); + DestroyConversionPipelines(); + DestroyReadbackPipelines(); + DestroyReadbackFramebuffer(); + DestroyEFBFramebuffer(); +} + +bool FramebufferManager::Initialize() +{ + if (!CreateEFBFramebuffer()) + { + PanicAlert("Failed to create EFB framebuffer"); + return false; + } + + if (!CreateReadbackFramebuffer()) + { + PanicAlert("Failed to create EFB readback framebuffer"); + return false; + } + + if (!CompileReadbackPipelines()) + { + PanicAlert("Failed to compile EFB readback pipelines"); + return false; + } + + if (!CompileConversionPipelines()) + { + PanicAlert("Failed to compile EFB conversion pipelines"); + return false; + } + + if (!CompileClearPipelines()) + { + PanicAlert("Failed to compile EFB clear pipelines"); + return false; + } + + if (!CompilePokePipelines()) + { + PanicAlert("Failed to compile EFB poke pipelines"); + return false; + } + + return true; +} + +void FramebufferManager::RecreateEFBFramebuffer() +{ + FlushEFBPokes(); + InvalidatePeekCache(); + + DestroyReadbackFramebuffer(); + DestroyEFBFramebuffer(); + if (!CreateEFBFramebuffer() || !CreateReadbackFramebuffer()) + PanicAlert("Failed to recreate EFB framebuffer"); +} + +void FramebufferManager::RecompileShaders() +{ + DestroyPokePipelines(); + DestroyClearPipelines(); + DestroyConversionPipelines(); + DestroyReadbackPipelines(); + if (!CompileReadbackPipelines() || !CompileConversionPipelines() || !CompileClearPipelines() || + !CompilePokePipelines()) + { + PanicAlert("Failed to recompile EFB pipelines"); + } +} + +AbstractTextureFormat FramebufferManager::GetEFBColorFormat() +{ + // The EFB can be set to different pixel formats by the game through the + // BPMEM_ZCOMPARE register (which should probably have a different name). + // They are: + // - 24-bit RGB (8-bit components) with 24-bit Z + // - 24-bit RGBA (6-bit components) with 24-bit Z + // - Multisampled 16-bit RGB (5-6-5 format) with 16-bit Z + // We only use one EFB format here: 32-bit ARGB with 32-bit Z. + // Multisampling depends on user settings. + // The distinction becomes important for certain operations, i.e. the + // alpha channel should be ignored if the EFB does not have one. + return AbstractTextureFormat::RGBA8; +} + +AbstractTextureFormat FramebufferManager::GetEFBDepthFormat() +{ + // 32-bit depth clears are broken in the Adreno Vulkan driver, and have no effect. + // To work around this, we use a D24_S8 buffer instead, which results in a loss of accuracy. + // We still resolve this to a R32F texture, as there is no 24-bit format. + if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_D32F_CLEAR)) + return AbstractTextureFormat::D24_S8; + else + return AbstractTextureFormat::D32F; +} + +static u32 CalculateEFBLayers() +{ + return (g_ActiveConfig.stereo_mode != StereoMode::Off) ? 2 : 1; +} + +TextureConfig FramebufferManager::GetEFBColorTextureConfig() +{ + return TextureConfig(g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(), 1, + CalculateEFBLayers(), g_ActiveConfig.iMultisamples, GetEFBColorFormat(), + AbstractTextureFlag_RenderTarget); +} + +TextureConfig FramebufferManager::GetEFBDepthTextureConfig() +{ + return TextureConfig(g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(), 1, + CalculateEFBLayers(), g_ActiveConfig.iMultisamples, GetEFBDepthFormat(), + AbstractTextureFlag_RenderTarget); +} + +FramebufferState FramebufferManager::GetEFBFramebufferState() const +{ + FramebufferState ret = {}; + ret.color_texture_format = m_efb_color_texture->GetFormat(); + ret.depth_texture_format = m_efb_depth_texture->GetFormat(); + ret.per_sample_shading = IsEFBMultisampled() && g_ActiveConfig.bSSAA; + ret.samples = m_efb_color_texture->GetSamples(); + return ret; +} + +bool FramebufferManager::CreateEFBFramebuffer() +{ + const TextureConfig efb_color_texture_config = GetEFBColorTextureConfig(); + const TextureConfig efb_depth_texture_config = GetEFBDepthTextureConfig(); + + // We need a second texture to swap with for changing pixel formats + m_efb_color_texture = g_renderer->CreateTexture(efb_color_texture_config); + m_efb_depth_texture = g_renderer->CreateTexture(efb_depth_texture_config); + m_efb_convert_color_texture = g_renderer->CreateTexture(efb_color_texture_config); + if (!m_efb_color_texture || !m_efb_depth_texture || !m_efb_convert_color_texture) + return false; + + m_efb_framebuffer = + g_renderer->CreateFramebuffer(m_efb_color_texture.get(), m_efb_depth_texture.get()); + m_efb_convert_framebuffer = + g_renderer->CreateFramebuffer(m_efb_convert_color_texture.get(), m_efb_depth_texture.get()); + if (!m_efb_framebuffer || !m_efb_convert_framebuffer) + return false; + + // Create resolved textures if MSAA is on + if (g_ActiveConfig.MultisamplingEnabled()) + { + m_efb_resolve_color_texture = g_renderer->CreateTexture( + TextureConfig(efb_color_texture_config.width, efb_color_texture_config.height, 1, + efb_color_texture_config.layers, 1, efb_color_texture_config.format, 0)); + m_efb_depth_resolve_texture = g_renderer->CreateTexture(TextureConfig( + efb_depth_texture_config.width, efb_depth_texture_config.height, 1, + efb_depth_texture_config.layers, 1, + AbstractTexture::GetColorFormatForDepthFormat(efb_depth_texture_config.format), + AbstractTextureFlag_RenderTarget)); + if (!m_efb_resolve_color_texture || !m_efb_depth_resolve_texture) + return false; + + m_efb_depth_resolve_framebuffer = + g_renderer->CreateFramebuffer(m_efb_depth_resolve_texture.get(), nullptr); + if (!m_efb_depth_resolve_framebuffer) + return false; + } + + // Clear the renderable textures out. + g_renderer->SetAndClearFramebuffer( + m_efb_framebuffer.get(), {{0.0f, 0.0f, 0.0f, 0.0f}}, + g_ActiveConfig.backend_info.bSupportsReversedDepthRange ? 1.0f : 0.0f); + return true; +} + +void FramebufferManager::DestroyEFBFramebuffer() +{ + m_efb_framebuffer.reset(); + m_efb_convert_framebuffer.reset(); + m_efb_color_texture.reset(); + m_efb_convert_color_texture.reset(); + m_efb_depth_texture.reset(); + m_efb_resolve_color_texture.reset(); + m_efb_depth_resolve_framebuffer.reset(); + m_efb_depth_resolve_texture.reset(); +} + +void FramebufferManager::BindEFBFramebuffer() +{ + g_renderer->SetFramebuffer(m_efb_framebuffer.get()); +} + +AbstractTexture* FramebufferManager::ResolveEFBColorTexture(const MathUtil::Rectangle& region) +{ + // Return the normal EFB texture if multisampling is off. + if (!IsEFBMultisampled()) + { + m_efb_color_texture->FinishedRendering(); + return m_efb_color_texture.get(); + } + + // It's not valid to resolve an out-of-range rectangle. + MathUtil::Rectangle clamped_region = region; + clamped_region.ClampUL(0, 0, GetEFBWidth(), GetEFBHeight()); + clamped_region = g_renderer->ConvertFramebufferRectangle(clamped_region, m_efb_framebuffer.get()); + + // Resolve to our already-created texture. + for (u32 layer = 0; layer < GetEFBLayers(); layer++) + { + m_efb_resolve_color_texture->ResolveFromTexture(m_efb_color_texture.get(), clamped_region, + layer, 0); + } + + m_efb_resolve_color_texture->FinishedRendering(); + return m_efb_resolve_color_texture.get(); +} + +AbstractTexture* FramebufferManager::ResolveEFBDepthTexture(const MathUtil::Rectangle& region) +{ + if (!IsEFBMultisampled()) + { + m_efb_depth_texture->FinishedRendering(); + return m_efb_depth_texture.get(); + } + + // It's not valid to resolve an out-of-range rectangle. + MathUtil::Rectangle clamped_region = region; + clamped_region.ClampUL(0, 0, GetEFBWidth(), GetEFBHeight()); + clamped_region = g_renderer->ConvertFramebufferRectangle(clamped_region, m_efb_framebuffer.get()); + + m_efb_depth_texture->FinishedRendering(); + g_renderer->BeginUtilityDrawing(); + g_renderer->SetAndDiscardFramebuffer(m_efb_depth_resolve_framebuffer.get()); + g_renderer->SetPipeline(m_efb_depth_resolve_pipeline.get()); + g_renderer->SetTexture(0, m_efb_depth_texture.get()); + g_renderer->SetSamplerState(0, RenderState::GetPointSamplerState()); + g_renderer->SetViewportAndScissor(clamped_region); + g_renderer->Draw(0, 3); + m_efb_depth_resolve_texture->FinishedRendering(); + g_renderer->EndUtilityDrawing(); + + return m_efb_depth_resolve_texture.get(); +} + +bool FramebufferManager::ReinterpretPixelData(EFBReinterpretType convtype) +{ + if (!m_format_conversion_pipelines[static_cast(convtype)]) + return false; + + // Draw to the secondary framebuffer. + m_efb_color_texture->FinishedRendering(); + g_renderer->BeginUtilityDrawing(); + g_renderer->SetAndDiscardFramebuffer(m_efb_convert_framebuffer.get()); + g_renderer->SetViewportAndScissor(m_efb_framebuffer->GetRect()); + g_renderer->SetPipeline(m_format_conversion_pipelines[static_cast(convtype)].get()); + g_renderer->SetTexture(0, m_efb_color_texture.get()); + g_renderer->Draw(0, 3); + + // And swap the framebuffers around, so we do new drawing to the converted framebuffer. + std::swap(m_efb_color_texture, m_efb_convert_color_texture); + std::swap(m_efb_framebuffer, m_efb_convert_framebuffer); + g_renderer->EndUtilityDrawing(); + return true; +} + +bool FramebufferManager::CompileConversionPipelines() +{ + for (u32 i = 0; i < NUM_EFB_REINTERPRET_TYPES; i++) + { + std::unique_ptr pixel_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Pixel, FramebufferShaderGen::GenerateFormatConversionShader( + static_cast(i), GetEFBSamples())); + if (!pixel_shader) + return false; + + AbstractPipelineConfig config = {}; + config.vertex_shader = g_shader_cache->GetScreenQuadVertexShader(); + config.geometry_shader = IsEFBStereo() ? g_shader_cache->GetTexcoordGeometryShader() : nullptr; + config.pixel_shader = pixel_shader.get(); + config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); + config.blending_state = RenderState::GetNoBlendingBlendState(); + config.framebuffer_state = GetEFBFramebufferState(); + config.usage = AbstractPipelineUsage::Utility; + m_format_conversion_pipelines[i] = g_renderer->CreatePipeline(config); + if (!m_format_conversion_pipelines[i]) + return false; + } + + return true; +} + +void FramebufferManager::DestroyConversionPipelines() +{ + for (auto& pipeline : m_format_conversion_pipelines) + pipeline.reset(); +} + +bool FramebufferManager::PopulateColorReadbackTexture() +{ + g_vertex_manager->OnCPUEFBAccess(); + + // Issue a copy from framebuffer -> copy texture if we have >1xIR or MSAA on. + AbstractTexture* src_texture = + ResolveEFBColorTexture(MathUtil::Rectangle(0, 0, GetEFBWidth(), GetEFBHeight())); + if (g_renderer->GetEFBScale() != 1) + { + // Downsample from internal resolution to 1x. + // TODO: This won't produce correct results at IRs above 2x. + g_renderer->BeginUtilityDrawing(); + g_renderer->SetAndDiscardFramebuffer(m_color_copy_framebuffer.get()); + g_renderer->SetViewportAndScissor(m_color_copy_framebuffer->GetRect()); + g_renderer->SetPipeline(m_color_copy_pipeline.get()); + g_renderer->SetTexture(0, src_texture); + g_renderer->SetSamplerState(0, RenderState::GetLinearSamplerState()); + g_renderer->Draw(0, 3); + + // Copy from EFB or copy texture to staging texture. + m_color_readback_texture->CopyFromTexture(m_color_copy_texture.get(), + m_color_readback_texture->GetRect(), 0, 0, + m_color_readback_texture->GetRect()); + + g_renderer->EndUtilityDrawing(); + } + else + { + m_color_readback_texture->CopyFromTexture(src_texture, m_color_readback_texture->GetRect(), 0, + 0, m_color_readback_texture->GetRect()); + } + + // Wait until the copy is complete. + m_color_readback_texture->Flush(); + m_color_readback_texture_valid = true; + return true; +} + +bool FramebufferManager::PopulateDepthReadbackTexture() +{ + g_vertex_manager->OnCPUEFBAccess(); + + // Issue a copy from framebuffer -> copy texture if we have >1xIR or MSAA on. + AbstractTexture* src_texture = + ResolveEFBDepthTexture(MathUtil::Rectangle(0, 0, GetEFBWidth(), GetEFBHeight())); + if (g_renderer->GetEFBScale() != 1) + { + // Downsample from internal resolution to 1x. + // TODO: This won't produce correct results at IRs above 2x. + g_renderer->BeginUtilityDrawing(); + g_renderer->SetAndDiscardFramebuffer(m_depth_copy_framebuffer.get()); + g_renderer->SetViewportAndScissor(m_depth_copy_framebuffer->GetRect()); + g_renderer->SetPipeline(m_depth_copy_pipeline.get()); + g_renderer->SetTexture(0, src_texture); + g_renderer->SetSamplerState(0, RenderState::GetLinearSamplerState()); + g_renderer->Draw(0, 3); + + // No need to call FinishedRendering() here because CopyFromTexture() transitions. + m_depth_readback_texture->CopyFromTexture(m_depth_copy_texture.get(), + m_depth_readback_texture->GetRect(), 0, 0, + m_depth_readback_texture->GetRect()); + + g_renderer->EndUtilityDrawing(); + } + else + { + m_depth_readback_texture->CopyFromTexture(src_texture, m_depth_readback_texture->GetRect(), 0, + 0, m_depth_readback_texture->GetRect()); + } + + // Wait until the copy is complete. + m_depth_readback_texture->Flush(); + m_depth_readback_texture_valid = true; + return true; +} + +void FramebufferManager::InvalidatePeekCache() +{ + m_color_readback_texture_valid = false; + m_depth_readback_texture_valid = false; +} + +bool FramebufferManager::CompileReadbackPipelines() +{ + AbstractPipelineConfig config = {}; + config.vertex_shader = g_shader_cache->GetScreenQuadVertexShader(); + config.geometry_shader = IsEFBStereo() ? g_shader_cache->GetTexcoordGeometryShader() : nullptr; + config.pixel_shader = g_shader_cache->GetTextureCopyPixelShader(); + config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); + config.blending_state = RenderState::GetNoBlendingBlendState(); + config.framebuffer_state = RenderState::GetColorFramebufferState(GetEFBColorFormat()); + config.usage = AbstractPipelineUsage::Utility; + m_color_copy_pipeline = g_renderer->CreatePipeline(config); + if (!m_color_copy_pipeline) + return false; + + // same for depth, except different format + config.framebuffer_state.color_texture_format = + AbstractTexture::GetColorFormatForDepthFormat(GetEFBDepthFormat()); + m_depth_copy_pipeline = g_renderer->CreatePipeline(config); + if (!m_depth_copy_pipeline) + return false; + + if (IsEFBMultisampled()) + { + auto depth_resolve_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Pixel, FramebufferShaderGen::GenerateResolveDepthPixelShader(GetEFBSamples())); + if (!depth_resolve_shader) + return false; + + config.pixel_shader = depth_resolve_shader.get(); + m_efb_depth_resolve_pipeline = g_renderer->CreatePipeline(config); + if (!m_efb_depth_resolve_pipeline) + return false; + } + + return true; +} + +void FramebufferManager::DestroyReadbackPipelines() +{ + m_efb_depth_resolve_pipeline.reset(); + m_depth_copy_pipeline.reset(); + m_color_copy_pipeline.reset(); +} + +bool FramebufferManager::CreateReadbackFramebuffer() +{ + const TextureConfig color_config(EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, GetEFBColorFormat(), + AbstractTextureFlag_RenderTarget); + const TextureConfig depth_config( + EFB_WIDTH, EFB_HEIGHT, 1, 1, 1, + AbstractTexture::GetColorFormatForDepthFormat(GetEFBDepthFormat()), + AbstractTextureFlag_RenderTarget); + if (g_renderer->GetEFBScale() != 1) + { + m_color_copy_texture = g_renderer->CreateTexture(color_config); + m_depth_copy_texture = g_renderer->CreateTexture(depth_config); + if (!m_color_copy_texture || !m_depth_copy_texture) + return false; + + m_color_copy_framebuffer = g_renderer->CreateFramebuffer(m_color_copy_texture.get(), nullptr); + m_depth_copy_framebuffer = g_renderer->CreateFramebuffer(m_depth_copy_texture.get(), nullptr); + if (!m_color_copy_framebuffer || !m_depth_copy_framebuffer) + return false; + } + + m_color_readback_texture = + g_renderer->CreateStagingTexture(StagingTextureType::Mutable, color_config); + m_depth_readback_texture = + g_renderer->CreateStagingTexture(StagingTextureType::Mutable, depth_config); + if (!m_color_readback_texture || !m_depth_readback_texture) + return false; + + return true; +} + +void FramebufferManager::DestroyReadbackFramebuffer() +{ + m_depth_copy_framebuffer.reset(); + m_depth_copy_texture.reset(); + m_depth_readback_texture_valid = false; + m_color_copy_framebuffer.reset(); + m_color_copy_texture.reset(); + m_color_readback_texture_valid = false; +} + +void FramebufferManager::ClearEFB(const MathUtil::Rectangle& rc, bool clear_color, + bool clear_alpha, bool clear_z, u32 color, u32 z) +{ + FlushEFBPokes(); + InvalidatePeekCache(); + g_renderer->BeginUtilityDrawing(); + + // Set up uniforms. + struct Uniforms + { + float clear_color[4]; + float clear_depth; + float padding1, padding2, padding3; + }; + static_assert(std::is_standard_layout::value); + Uniforms uniforms = {{static_cast((color >> 16) & 0xFF) / 255.0f, + static_cast((color >> 8) & 0xFF) / 255.0f, + static_cast((color >> 0) & 0xFF) / 255.0f, + static_cast((color >> 24) & 0xFF) / 255.0f}, + static_cast(z & 0xFFFFFF) / 16777216.0f}; + if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) + uniforms.clear_depth = 1.0f - uniforms.clear_depth; + g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms)); + + const auto target_rc = g_renderer->ConvertFramebufferRectangle( + g_renderer->ConvertEFBRectangle(rc), m_efb_framebuffer.get()); + g_renderer->SetPipeline(m_efb_clear_pipelines[clear_color][clear_alpha][clear_z].get()); + g_renderer->SetViewportAndScissor(target_rc); + g_renderer->Draw(0, 3); + g_renderer->EndUtilityDrawing(); +} + +bool FramebufferManager::CompileClearPipelines() +{ + auto vertex_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Vertex, FramebufferShaderGen::GenerateClearVertexShader()); + if (!vertex_shader) + return false; + + AbstractPipelineConfig config; + config.vertex_format = nullptr; + config.vertex_shader = vertex_shader.get(); + config.geometry_shader = IsEFBStereo() ? g_shader_cache->GetColorGeometryShader() : nullptr; + config.pixel_shader = g_shader_cache->GetColorPixelShader(); + config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles); + config.depth_state = RenderState::GetAlwaysWriteDepthState(); + config.blending_state = RenderState::GetNoBlendingBlendState(); + config.framebuffer_state = GetEFBFramebufferState(); + config.usage = AbstractPipelineUsage::Utility; + + for (u32 color_enable = 0; color_enable < 2; color_enable++) + { + config.blending_state.colorupdate = color_enable != 0; + for (u32 alpha_enable = 0; alpha_enable < 2; alpha_enable++) + { + config.blending_state.alphaupdate = alpha_enable != 0; + for (u32 depth_enable = 0; depth_enable < 2; depth_enable++) + { + config.depth_state.testenable = depth_enable != 0; + config.depth_state.updateenable = depth_enable != 0; + + m_efb_clear_pipelines[color_enable][alpha_enable][depth_enable] = + g_renderer->CreatePipeline(config); + if (!m_efb_clear_pipelines[color_enable][alpha_enable][depth_enable]) + return false; + } + } + } + + return true; +} + +void FramebufferManager::DestroyClearPipelines() +{ + for (u32 color_enable = 0; color_enable < 2; color_enable++) + { + for (u32 alpha_enable = 0; alpha_enable < 2; alpha_enable++) + { + for (u32 depth_enable = 0; depth_enable < 2; depth_enable++) + { + m_efb_clear_pipelines[color_enable][alpha_enable][depth_enable].reset(); + } + } + } +} + +u32 FramebufferManager::PeekEFBColor(u32 x, u32 y) +{ + if (!m_color_readback_texture_valid && !PopulateColorReadbackTexture()) + return 0; + + // The y coordinate here assumes upper-left origin, but the readback texture is lower-left in GL. + if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) + y = EFB_HEIGHT - 1 - y; + + u32 value; + m_color_readback_texture->ReadTexel(x, y, &value); + return value; +} + +float FramebufferManager::PeekEFBDepth(u32 x, u32 y) +{ + if (!m_depth_readback_texture_valid && !PopulateDepthReadbackTexture()) + return 0.0f; + + // The y coordinate here assumes upper-left origin, but the readback texture is lower-left in GL. + if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) + y = EFB_HEIGHT - 1 - y; + + float value; + m_depth_readback_texture->ReadTexel(x, y, &value); + return value; +} + +void FramebufferManager::PokeEFBColor(u32 x, u32 y, u32 color) +{ + // Flush if we exceeded the number of vertices per batch. + if ((m_color_poke_vertices.size() + 6) > MAX_POKE_VERTICES) + FlushEFBPokes(); + + CreatePokeVertices(&m_color_poke_vertices, x, y, 0.0f, color); + + // Update the peek cache if it's valid, since we know the color of the pixel now. + if (m_color_readback_texture_valid) + { + // See comment above for reasoning for lower-left coordinates. + if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) + y = EFB_HEIGHT - 1 - y; + + m_color_readback_texture->WriteTexel(x, y, &color); + } +} + +void FramebufferManager::PokeEFBDepth(u32 x, u32 y, float depth) +{ + // Flush if we exceeded the number of vertices per batch. + if ((m_color_poke_vertices.size() + 6) > MAX_POKE_VERTICES) + FlushEFBPokes(); + + CreatePokeVertices(&m_depth_poke_vertices, x, y, depth, 0); + + // Update the peek cache if it's valid, since we know the color of the pixel now. + if (m_depth_readback_texture_valid) + { + // See comment above for reasoning for lower-left coordinates. + if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) + y = EFB_HEIGHT - 1 - y; + + m_depth_readback_texture->WriteTexel(x, y, &depth); + } +} + +void FramebufferManager::CreatePokeVertices(std::vector* destination_list, u32 x, + u32 y, float z, u32 color) +{ + const float cs_pixel_width = 1.0f / EFB_WIDTH * 2.0f; + const float cs_pixel_height = 1.0f / EFB_HEIGHT * 2.0f; + if (g_ActiveConfig.backend_info.bSupportsLargePoints) + { + // GPU will expand the point to a quad. + const float cs_x = (static_cast(x) + 0.5f) * cs_pixel_width - 1.0f; + const float cs_y = 1.0f - (static_cast(y) + 0.5f) * cs_pixel_height; + const float point_size = static_cast(g_renderer->GetEFBScale()); + destination_list->push_back({{cs_x, cs_y, z, point_size}, color}); + return; + } + + // Generate quad from the single point (clip-space coordinates). + const float x1 = static_cast(x) * cs_pixel_width - 1.0f; + const float y1 = 1.0f - static_cast(y) * cs_pixel_height; + const float x2 = x1 + cs_pixel_width; + const float y2 = y1 + cs_pixel_height; + destination_list->push_back({{x1, y1, z, 1.0f}, color}); + destination_list->push_back({{x2, y1, z, 1.0f}, color}); + destination_list->push_back({{x1, y2, z, 1.0f}, color}); + destination_list->push_back({{x1, y2, z, 1.0f}, color}); + destination_list->push_back({{x2, y1, z, 1.0f}, color}); + destination_list->push_back({{x2, y2, z, 1.0f}, color}); +} + +void FramebufferManager::FlushEFBPokes() +{ + if (!m_color_poke_vertices.empty()) + { + DrawPokeVertices(m_color_poke_vertices.data(), static_cast(m_color_poke_vertices.size()), + m_color_poke_pipeline.get()); + m_color_poke_vertices.clear(); + } + + if (!m_depth_poke_vertices.empty()) + { + DrawPokeVertices(m_depth_poke_vertices.data(), static_cast(m_depth_poke_vertices.size()), + m_depth_poke_pipeline.get()); + m_depth_poke_vertices.clear(); + } +} + +void FramebufferManager::DrawPokeVertices(const EFBPokeVertex* vertices, u32 vertex_count, + const AbstractPipeline* pipeline) +{ + // Copy to vertex buffer. + g_renderer->BeginUtilityDrawing(); + u32 base_vertex, base_index; + g_vertex_manager->UploadUtilityVertices(vertices, sizeof(EFBPokeVertex), + static_cast(vertex_count), nullptr, 0, &base_vertex, + &base_index); + + // Now we can draw. + g_renderer->SetViewportAndScissor(m_efb_framebuffer->GetRect()); + g_renderer->SetPipeline(pipeline); + g_renderer->Draw(base_vertex, vertex_count); + g_renderer->EndUtilityDrawing(); +} + +bool FramebufferManager::CompilePokePipelines() +{ + PortableVertexDeclaration vtx_decl = {}; + vtx_decl.position.enable = true; + vtx_decl.position.type = VAR_FLOAT; + vtx_decl.position.components = 4; + vtx_decl.position.integer = false; + vtx_decl.position.offset = offsetof(EFBPokeVertex, position); + vtx_decl.colors[0].enable = true; + vtx_decl.colors[0].type = VAR_UNSIGNED_BYTE; + vtx_decl.colors[0].components = 4; + vtx_decl.colors[0].integer = false; + vtx_decl.colors[0].offset = offsetof(EFBPokeVertex, color); + vtx_decl.stride = sizeof(EFBPokeVertex); + + m_poke_vertex_format = g_renderer->CreateNativeVertexFormat(vtx_decl); + if (!m_poke_vertex_format) + return false; + + auto poke_vertex_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Vertex, FramebufferShaderGen::GenerateEFBPokeVertexShader()); + if (!poke_vertex_shader) + return false; + + AbstractPipelineConfig config = {}; + config.vertex_format = m_poke_vertex_format.get(); + config.vertex_shader = poke_vertex_shader.get(); + config.geometry_shader = IsEFBStereo() ? g_shader_cache->GetColorGeometryShader() : nullptr; + config.pixel_shader = g_shader_cache->GetColorPixelShader(); + config.rasterization_state = RenderState::GetNoCullRasterizationState( + g_ActiveConfig.backend_info.bSupportsLargePoints ? PrimitiveType::Points : + PrimitiveType::Triangles); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); + config.blending_state = RenderState::GetNoBlendingBlendState(); + config.framebuffer_state = GetEFBFramebufferState(); + config.usage = AbstractPipelineUsage::Utility; + m_color_poke_pipeline = g_renderer->CreatePipeline(config); + if (!m_color_poke_pipeline) + return false; + + // Turn off color writes, depth writes on for depth pokes. + config.depth_state = RenderState::GetAlwaysWriteDepthState(); + config.blending_state = RenderState::GetNoColorWriteBlendState(); + m_depth_poke_pipeline = g_renderer->CreatePipeline(config); + if (!m_depth_poke_pipeline) + return false; + + return true; +} + +void FramebufferManager::DestroyPokePipelines() +{ + m_depth_poke_pipeline.reset(); + m_color_poke_pipeline.reset(); + m_poke_vertex_format.reset(); +} diff --git a/Source/Core/VideoCommon/FramebufferManager.h b/Source/Core/VideoCommon/FramebufferManager.h new file mode 100644 index 0000000000..d0de7b62b1 --- /dev/null +++ b/Source/Core/VideoCommon/FramebufferManager.h @@ -0,0 +1,171 @@ +// Copyright 2010 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "Common/CommonTypes.h" +#include "VideoCommon/AbstractTexture.h" +#include "VideoCommon/RenderState.h" +#include "VideoCommon/TextureConfig.h" + +class AbstractFramebuffer; +class AbstractPipeline; +class AbstractStagingTexture; +class NativeVertexFormat; + +enum class EFBReinterpretType +{ + RGB8ToRGB565 = 0, + RGB8ToRGBA6 = 1, + RGBA6ToRGB8 = 2, + RGBA6ToRGB565 = 3, + RGB565ToRGB8 = 4, + RGB565ToRGBA6 = 5 +}; +constexpr u32 NUM_EFB_REINTERPRET_TYPES = 6; + +inline bool AddressRangesOverlap(u32 aLower, u32 aUpper, u32 bLower, u32 bUpper) +{ + return !((aLower >= bUpper) || (bLower >= aUpper)); +} + +class FramebufferManager final +{ +public: + FramebufferManager(); + virtual ~FramebufferManager(); + + // Does not require the framebuffer to be created. Slower than direct queries. + static AbstractTextureFormat GetEFBColorFormat(); + static AbstractTextureFormat GetEFBDepthFormat(); + static TextureConfig GetEFBColorTextureConfig(); + static TextureConfig GetEFBDepthTextureConfig(); + + // Accessors. + AbstractTexture* GetEFBColorTexture() const { return m_efb_color_texture.get(); } + AbstractTexture* GetEFBDepthTexture() const { return m_efb_depth_texture.get(); } + AbstractFramebuffer* GetEFBFramebuffer() const { return m_efb_framebuffer.get(); } + u32 GetEFBWidth() const { return m_efb_color_texture->GetWidth(); } + u32 GetEFBHeight() const { return m_efb_color_texture->GetHeight(); } + u32 GetEFBLayers() const { return m_efb_color_texture->GetLayers(); } + u32 GetEFBSamples() const { return m_efb_color_texture->GetSamples(); } + bool IsEFBMultisampled() const { return m_efb_color_texture->IsMultisampled(); } + bool IsEFBStereo() const { return m_efb_color_texture->GetLayers() > 1; } + FramebufferState GetEFBFramebufferState() const; + + // First-time setup. + bool Initialize(); + + // Recreate EFB framebuffers, call when the EFB size (IR) changes. + void RecreateEFBFramebuffer(); + + // Recompile shaders, use when MSAA mode changes. + void RecompileShaders(); + + // This is virtual, because D3D has both normalized and integer framebuffers. + void BindEFBFramebuffer(); + + // Resolve color/depth textures to a non-msaa texture, and return it. + AbstractTexture* ResolveEFBColorTexture(const MathUtil::Rectangle& region); + AbstractTexture* ResolveEFBDepthTexture(const MathUtil::Rectangle& region); + + // Reinterpret pixel format of EFB color texture. + // Assumes no render pass is currently in progress. + // Swaps EFB framebuffers, so re-bind afterwards. + bool ReinterpretPixelData(EFBReinterpretType convtype); + + // Clears the EFB using shaders. + void ClearEFB(const MathUtil::Rectangle& rc, bool clear_color, bool clear_alpha, + bool clear_z, u32 color, u32 z); + + // Reads a framebuffer value back from the GPU. This may block if the cache is not current. + u32 PeekEFBColor(u32 x, u32 y); + float PeekEFBDepth(u32 x, u32 y); + void InvalidatePeekCache(); + + // Writes a value to the framebuffer. This will never block, and writes will be batched. + void PokeEFBColor(u32 x, u32 y, u32 color); + void PokeEFBDepth(u32 x, u32 y, float depth); + void FlushEFBPokes(); + +protected: + struct EFBPokeVertex + { + float position[4]; + u32 color; + }; + static_assert(std::is_standard_layout::value, "EFBPokeVertex is standard-layout"); + + bool CreateEFBFramebuffer(); + void DestroyEFBFramebuffer(); + + bool CompileConversionPipelines(); + void DestroyConversionPipelines(); + + bool CompileReadbackPipelines(); + void DestroyReadbackPipelines(); + + bool CreateReadbackFramebuffer(); + void DestroyReadbackFramebuffer(); + + bool CompileClearPipelines(); + void DestroyClearPipelines(); + + bool CompilePokePipelines(); + void DestroyPokePipelines(); + + bool PopulateColorReadbackTexture(); + bool PopulateDepthReadbackTexture(); + + void CreatePokeVertices(std::vector* destination_list, u32 x, u32 y, float z, + u32 color); + + void DrawPokeVertices(const EFBPokeVertex* vertices, u32 vertex_count, + const AbstractPipeline* pipeline); + + std::unique_ptr m_efb_color_texture; + std::unique_ptr m_efb_convert_color_texture; + std::unique_ptr m_efb_depth_texture; + std::unique_ptr m_efb_resolve_color_texture; + std::unique_ptr m_efb_depth_resolve_texture; + + std::unique_ptr m_efb_framebuffer; + std::unique_ptr m_efb_convert_framebuffer; + std::unique_ptr m_efb_depth_resolve_framebuffer; + std::unique_ptr m_efb_depth_resolve_pipeline; + + // Format conversion shaders + std::array, 6> m_format_conversion_pipelines; + + // EFB readback texture + std::unique_ptr m_color_copy_texture; + std::unique_ptr m_depth_copy_texture; + std::unique_ptr m_color_copy_framebuffer; + std::unique_ptr m_depth_copy_framebuffer; + std::unique_ptr m_color_copy_pipeline; + std::unique_ptr m_depth_copy_pipeline; + + // CPU-side EFB readback texture + std::unique_ptr m_color_readback_texture; + std::unique_ptr m_depth_readback_texture; + bool m_color_readback_texture_valid = false; + bool m_depth_readback_texture_valid = false; + + // EFB clear pipelines + // Indexed by [color_write_enabled][alpha_write_enabled][depth_write_enabled] + std::array, 2>, 2>, 2> + m_efb_clear_pipelines; + + // EFB poke drawing setup + std::unique_ptr m_poke_vertex_format; + std::unique_ptr m_color_poke_pipeline; + std::unique_ptr m_depth_poke_pipeline; + std::vector m_color_poke_vertices; + std::vector m_depth_poke_vertices; +}; + +extern std::unique_ptr g_framebuffer_manager; diff --git a/Source/Core/VideoCommon/FramebufferManagerBase.cpp b/Source/Core/VideoCommon/FramebufferManagerBase.cpp deleted file mode 100644 index 530aac7584..0000000000 --- a/Source/Core/VideoCommon/FramebufferManagerBase.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "VideoCommon/FramebufferManagerBase.h" - -#include - -#include "VideoCommon/AbstractTexture.h" -#include "VideoCommon/DriverDetails.h" -#include "VideoCommon/RenderBase.h" - -std::unique_ptr g_framebuffer_manager; - -unsigned int FramebufferManagerBase::m_EFBLayers = 1; - -FramebufferManagerBase::~FramebufferManagerBase() = default; - -AbstractTextureFormat FramebufferManagerBase::GetEFBDepthFormat() -{ - // 32-bit depth clears are broken in the Adreno Vulkan driver, and have no effect. - // To work around this, we use a D24_S8 buffer instead, which results in a loss of accuracy. - // We still resolve this to a R32F texture, as there is no 24-bit format. - if (DriverDetails::HasBug(DriverDetails::BUG_BROKEN_D32F_CLEAR)) - return AbstractTextureFormat::D24_S8; - else - return AbstractTextureFormat::D32F; -} diff --git a/Source/Core/VideoCommon/FramebufferManagerBase.h b/Source/Core/VideoCommon/FramebufferManagerBase.h deleted file mode 100644 index ff9ff0cc2e..0000000000 --- a/Source/Core/VideoCommon/FramebufferManagerBase.h +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2010 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include - -#include "Common/CommonTypes.h" - -enum class AbstractTextureFormat : u32; - -inline bool AddressRangesOverlap(u32 aLower, u32 aUpper, u32 bLower, u32 bUpper) -{ - return !((aLower >= bUpper) || (bLower >= aUpper)); -} - -class FramebufferManagerBase -{ -public: - virtual ~FramebufferManagerBase(); - - static unsigned int GetEFBLayers() { return m_EFBLayers; } - static AbstractTextureFormat GetEFBDepthFormat(); - -protected: - static unsigned int m_EFBLayers; -}; - -extern std::unique_ptr g_framebuffer_manager; diff --git a/Source/Core/VideoCommon/FramebufferShaderGen.cpp b/Source/Core/VideoCommon/FramebufferShaderGen.cpp new file mode 100644 index 0000000000..de7bdf3ee8 --- /dev/null +++ b/Source/Core/VideoCommon/FramebufferShaderGen.cpp @@ -0,0 +1,464 @@ +#include "VideoCommon/FramebufferShaderGen.h" +#include +#include "VideoCommon/FramebufferManager.h" +#include "VideoCommon/VertexShaderGen.h" + +namespace FramebufferShaderGen +{ +static APIType GetAPIType() +{ + return g_ActiveConfig.backend_info.api_type; +} + +static void EmitUniformBufferDeclaration(std::stringstream& ss) +{ + if (GetAPIType() == APIType::D3D) + ss << "cbuffer UBO : register(b0)\n"; + else + ss << "UBO_BINDING(std140, 1) uniform UBO\n"; +} + +static void EmitSamplerDeclarations(std::stringstream& ss, u32 start = 0, u32 end = 1, + bool multisampled = false) +{ + switch (GetAPIType()) + { + case APIType::D3D: + { + for (u32 i = start; i < end; i++) + { + ss << (multisampled ? "Texture2DMSArray" : "Texture2DArray") << " tex" << i + << " : register(t" << i << ");\n"; + ss << "SamplerState" + << " samp" << i << " : register(s" << i << ");\n"; + } + } + break; + + case APIType::OpenGL: + case APIType::Vulkan: + { + for (u32 i = start; i < end; i++) + { + ss << "SAMPLER_BINDING(" << i << ") uniform " + << (multisampled ? "sampler2DMSArray" : "sampler2DArray") << " samp" << i << ";\n"; + } + } + break; + default: + break; + } +} + +static void EmitSampleTexture(std::stringstream& ss, u32 n, const char* coords) +{ + switch (GetAPIType()) + { + case APIType::D3D: + ss << "tex" << n << ".Sample(samp" << n << ", " << coords << ")"; + break; + + case APIType::OpenGL: + case APIType::Vulkan: + ss << "texture(samp" << n << ", " << coords << ")"; + break; + + default: + break; + } +} + +static void EmitVertexMainDeclaration(std::stringstream& ss, u32 num_tex_inputs, + u32 num_color_inputs, bool position_input, + u32 num_tex_outputs, u32 num_color_outputs, + const char* extra_inputs = "") +{ + switch (GetAPIType()) + { + case APIType::D3D: + { + ss << "void main("; + for (u32 i = 0; i < num_tex_inputs; i++) + ss << "in float3 rawtex" << i << " : TEXCOORD" << i << ", "; + for (u32 i = 0; i < num_color_inputs; i++) + ss << "in float4 rawcolor" << i << " : COLOR" << i << ", "; + if (position_input) + ss << "in float4 rawpos : POSITION, "; + ss << extra_inputs; + for (u32 i = 0; i < num_tex_outputs; i++) + ss << "out float3 v_tex" << i << " : TEXCOORD" << i << ", "; + for (u32 i = 0; i < num_color_outputs; i++) + ss << "out float4 v_col" << i << " : COLOR" << i << ", "; + ss << "out float4 opos : SV_Position)\n"; + } + break; + + case APIType::OpenGL: + case APIType::Vulkan: + { + for (u32 i = 0; i < num_tex_inputs; i++) + ss << "ATTRIBUTE_LOCATION(" << (SHADER_TEXTURE0_ATTRIB + i) << ") in float3 rawtex" << i + << ";\n"; + for (u32 i = 0; i < num_color_inputs; i++) + ss << "ATTRIBUTE_LOCATION(" << (SHADER_COLOR0_ATTRIB + i) << ") in float4 rawcolor" << i + << ";\n"; + if (position_input) + ss << "ATTRIBUTE_LOCATION(" << SHADER_POSITION_ATTRIB << ") in float4 rawpos;\n"; + for (u32 i = 0; i < num_tex_outputs; i++) + ss << "VARYING_LOCATION(" << i << ") out float3 v_tex" << i << ";\n"; + for (u32 i = 0; i < num_color_outputs; i++) + ss << "VARYING_LOCATION(" << (num_tex_inputs + i) << ") out float4 v_col" << i << ";\n"; + ss << "#define opos gl_Position\n"; + ss << extra_inputs << "\n"; + ss << "void main()\n"; + } + break; + default: + break; + } +} + +static void EmitPixelMainDeclaration(std::stringstream& ss, u32 num_tex_inputs, + u32 num_color_inputs, const char* output_type = "float4", + const char* extra_vars = "") +{ + switch (GetAPIType()) + { + case APIType::D3D: + { + ss << "void main("; + for (u32 i = 0; i < num_tex_inputs; i++) + ss << "in float3 v_tex" << i << " : TEXCOORD" << i << ", "; + for (u32 i = 0; i < num_color_inputs; i++) + ss << "in float4 v_col" << i << " : COLOR" << i << ", "; + ss << extra_vars << "out " << output_type << " ocol0 : SV_Target)\n"; + } + break; + + case APIType::OpenGL: + case APIType::Vulkan: + { + for (u32 i = 0; i < num_tex_inputs; i++) + ss << "VARYING_LOCATION(" << i << ") in float3 v_tex" << i << ";\n"; + for (u32 i = 0; i < num_color_inputs; i++) + ss << "VARYING_LOCATION(" << (num_tex_inputs + i) << ") in float4 v_col" << i << ";\n"; + ss << "FRAGMENT_OUTPUT_LOCATION(0) out " << output_type << " ocol0;\n"; + ss << extra_vars << "\n"; + ss << "void main()\n"; + } + break; + + default: + break; + } +} + +std::string GenerateScreenQuadVertexShader() +{ + std::stringstream ss; + EmitVertexMainDeclaration(ss, 0, 0, false, 1, 0, + GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " : + "#define id gl_VertexID\n"); + ss << "{\n"; + ss << " v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"; + ss << " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n"; + + // NDC space is flipped in Vulkan. We also flip in GL so that (0,0) is in the lower-left. + if (GetAPIType() == APIType::Vulkan || GetAPIType() == APIType::OpenGL) + ss << " opos.y = -opos.y;\n"; + + ss << "}\n"; + + return ss.str(); +} + +std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors) +{ + std::stringstream ss; + if (GetAPIType() == APIType::D3D) + { + ss << "struct VS_OUTPUT\n"; + ss << "{\n"; + for (u32 i = 0; i < num_tex; i++) + ss << " float3 tex" << i << " : TEXCOORD" << i << ";\n"; + for (u32 i = 0; i < num_colors; i++) + ss << " float4 color" << i << " : COLOR" << i << ";\n"; + ss << " float4 position : SV_Position;\n"; + ss << "};\n"; + ss << "struct GS_OUTPUT\n"; + ss << "{"; + for (u32 i = 0; i < num_tex; i++) + ss << " float3 tex" << i << " : TEXCOORD" << i << ";\n"; + for (u32 i = 0; i < num_colors; i++) + ss << " float4 color" << i << " : COLOR" << i << ";\n"; + ss << " float4 position : SV_Position;\n"; + ss << " uint slice : SV_RenderTargetArrayIndex;\n"; + ss << "};\n\n"; + ss << "[maxvertexcount(6)]\n"; + ss << "void main(triangle VS_OUTPUT vso[3], inout TriangleStream output)\n"; + ss << "{\n"; + ss << " for (uint slice = 0; slice < 2u; slice++)\n"; + ss << " {\n"; + ss << " for (int i = 0; i < 3; i++)\n"; + ss << " {\n"; + ss << " GS_OUTPUT gso;\n"; + ss << " gso.position = vso[i].position;\n"; + for (u32 i = 0; i < num_tex; i++) + ss << " gso.tex" << i << " = float3(vso[i].tex" << i << ".xy, float(slice));\n"; + for (u32 i = 0; i < num_colors; i++) + ss << " gso.color" << i << " = vso[i].color" << i << ";\n"; + ss << " gso.slice = slice;\n"; + ss << " output.Append(gso);\n"; + ss << " }\n"; + ss << " output.RestartStrip();\n"; + ss << " }\n"; + ss << "}\n"; + } + else if (GetAPIType() == APIType::OpenGL || GetAPIType() == APIType::Vulkan) + { + ss << "layout(triangles) in;\n"; + ss << "layout(triangle_strip, max_vertices = 6) out;\n"; + for (u32 i = 0; i < num_tex; i++) + { + ss << "layout(location = " << i << ") in float3 v_tex" << i << "[];\n"; + ss << "layout(location = " << i << ") out float3 out_tex" << i << ";\n"; + } + for (u32 i = 0; i < num_colors; i++) + { + ss << "layout(location = " << (num_tex + i) << ") in float4 v_col" << i << "[];\n"; + ss << "layout(location = " << (num_tex + i) << ") out float4 out_col" << i << ";\n"; + } + ss << "\n"; + ss << "void main()\n"; + ss << "{\n"; + ss << " for (int j = 0; j < 2; j++)\n"; + ss << " {\n"; + ss << " gl_Layer = j;\n"; + + // We have to explicitly unroll this loop otherwise the GL compiler gets cranky. + for (u32 v = 0; v < 3; v++) + { + ss << " gl_Position = gl_in[" << v << "].gl_Position;\n"; + for (u32 i = 0; i < num_tex; i++) + ss << " out_tex" << i << " = float3(v_tex" << i << "[" << v << "].xy, float(j));\n"; + for (u32 i = 0; i < num_colors; i++) + ss << " out_col" << i << " = v_col" << i << "[" << v << "];\n"; + ss << " EmitVertex();\n\n"; + } + ss << " EndPrimitive();\n"; + ss << " }\n"; + ss << "}\n"; + } + + return ss.str(); +} + +std::string GenerateTextureCopyVertexShader() +{ + std::stringstream ss; + EmitUniformBufferDeclaration(ss); + ss << "{"; + ss << " float2 src_offset;\n"; + ss << " float2 src_size;\n"; + ss << "};\n\n"; + + EmitVertexMainDeclaration(ss, 0, 0, false, 1, 0, + GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " : + "#define id gl_VertexID"); + ss << "{\n"; + ss << " v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"; + ss << " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n"; + ss << " v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n"; + + // NDC space is flipped in Vulkan. We also flip in GL so that (0,0) is in the lower-left. + if (GetAPIType() == APIType::Vulkan || GetAPIType() == APIType::OpenGL) + ss << " opos.y = -opos.y;\n"; + + ss << "}\n"; + + return ss.str(); +} + +std::string GenerateTextureCopyPixelShader() +{ + std::stringstream ss; + EmitSamplerDeclarations(ss, 0, 1, false); + EmitPixelMainDeclaration(ss, 1, 0); + ss << "{\n"; + ss << " ocol0 = "; + EmitSampleTexture(ss, 0, "v_tex0"); + ss << ";\n"; + ss << "}\n"; + return ss.str(); +} + +std::string GenerateColorPixelShader() +{ + std::stringstream ss; + EmitPixelMainDeclaration(ss, 0, 1); + ss << "{\n"; + ss << " ocol0 = v_col0;\n"; + ss << "}\n"; + return ss.str(); +} + +std::string GenerateResolveDepthPixelShader(u32 samples) +{ + std::stringstream ss; + EmitSamplerDeclarations(ss, 0, 1, true); + EmitPixelMainDeclaration(ss, 1, 0, "float", + GetAPIType() == APIType::D3D ? "in float4 ipos : SV_Position, " : ""); + ss << "{\n"; + ss << " int layer = int(v_tex0.z);\n"; + if (GetAPIType() == APIType::D3D) + ss << " int3 coords = int3(int2(ipos.xy), layer);\n"; + else + ss << " int3 coords = int3(int2(gl_FragCoord.xy), layer);\n"; + + // Take the minimum of all depth samples. + if (GetAPIType() == APIType::D3D) + ss << " ocol0 = tex0.Load(coords, 0).r;\n"; + else + ss << " ocol0 = texelFetch(samp0, coords, 0).r;\n"; + ss << " for (int i = 1; i < " << samples << "; i++)\n"; + if (GetAPIType() == APIType::D3D) + ss << " ocol0 = min(ocol0, tex0.Load(coords, i).r);\n"; + else + ss << " ocol0 = min(ocol0, texelFetch(samp0, coords, i).r);\n"; + + ss << "}\n"; + return ss.str(); +} + +std::string GenerateClearVertexShader() +{ + std::stringstream ss; + EmitUniformBufferDeclaration(ss); + ss << "{\n"; + ss << " float4 clear_color;\n"; + ss << " float clear_depth;\n"; + ss << "};\n"; + + EmitVertexMainDeclaration(ss, 0, 0, false, 0, 1, + GetAPIType() == APIType::D3D ? "in uint id : SV_VertexID, " : + "#define id gl_VertexID\n"); + ss << "{\n"; + ss << " float2 coord = float2(float((id << 1) & 2), float(id & 2));\n"; + ss << " opos = float4(coord * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), clear_depth, 1.0f);\n"; + ss << " v_col0 = clear_color;\n"; + + // NDC space is flipped in Vulkan + if (GetAPIType() == APIType::Vulkan) + ss << " opos.y = -opos.y;\n"; + + ss << "}\n"; + + return ss.str(); +} + +std::string GenerateEFBPokeVertexShader() +{ + std::stringstream ss; + EmitVertexMainDeclaration(ss, 0, 1, true, 0, 1); + ss << "{\n"; + ss << " v_col0 = rawcolor0;\n"; + ss << " opos = float4(rawpos.xyz, 1.0f);\n"; + if (g_ActiveConfig.backend_info.bSupportsLargePoints) + ss << " gl_PointSize = rawpos.w;\n"; + + // NDC space is flipped in Vulkan. + if (GetAPIType() == APIType::Vulkan) + ss << " opos.y = -opos.y;\n"; + + ss << "}\n"; + return ss.str(); +} + +std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samples) +{ + std::stringstream ss; + EmitSamplerDeclarations(ss, 0, 1, samples > 1); + EmitPixelMainDeclaration(ss, 1, 0, "float4", + GetAPIType() == APIType::D3D ? + "in float4 ipos : SV_Position, in uint isample : SV_SampleIndex, " : + ""); + ss << "{\n"; + ss << " int layer = int(v_tex0.z);\n"; + if (GetAPIType() == APIType::D3D) + ss << " int3 coords = int3(int2(ipos.xy), layer);\n"; + else + ss << " int3 coords = int3(int2(gl_FragCoord.xy), layer);\n"; + + if (samples == 1) + { + // No MSAA at all. + if (GetAPIType() == APIType::D3D) + ss << " float4 val = tex0.Load(int4(coords, 0));\n"; + else + ss << " float4 val = texelFetch(samp0, coords, 0);\n"; + } + else if (g_ActiveConfig.bSSAA) + { + // Sample shading, shader runs once per sample + if (GetAPIType() == APIType::D3D) + ss << " float4 val = tex0.Load(coords, isample);"; + else + ss << " float4 val = texelFetch(samp0, coords, gl_SampleID);"; + } + else + { + // MSAA without sample shading, average out all samples. + ss << " float4 val = float4(0.0f, 0.0f, 0.0f, 0.0f);\n"; + ss << " for (int i = 0; i < " << samples << "; i++)\n"; + if (GetAPIType() == APIType::D3D) + ss << " val += tex0.Load(coords, i);\n"; + else + ss << " val += texelFetch(samp0, coords, i);\n"; + ss << " val /= float(" << samples << ");\n"; + } + + switch (convtype) + { + case EFBReinterpretType::RGB8ToRGBA6: + ss << " int4 src8 = int4(round(val * 255.f));\n"; + ss << " int4 dst6;\n"; + ss << " dst6.r = src8.r >> 2;\n"; + ss << " dst6.g = ((src8.r & 0x3) << 4) | (src8.g >> 4);\n"; + ss << " dst6.b = ((src8.g & 0xF) << 2) | (src8.b >> 6);\n"; + ss << " dst6.a = src8.b & 0x3F;\n"; + ss << " ocol0 = float4(dst6) / 63.f;\n"; + break; + + case EFBReinterpretType::RGB8ToRGB565: + ss << " ocol0 = val;\n"; + break; + + case EFBReinterpretType::RGBA6ToRGB8: + ss << " int4 src6 = int4(round(val * 63.f));\n"; + ss << " int4 dst8;\n"; + ss << " dst8.r = (src6.r << 2) | (src6.g >> 4);\n"; + ss << " dst8.g = ((src6.g & 0xF) << 4) | (src6.b >> 2);\n"; + ss << " dst8.b = ((src6.b & 0x3) << 6) | src6.a;\n"; + ss << " dst8.a = 255;\n"; + ss << " ocol0 = float4(dst8) / 255.f;\n"; + break; + + case EFBReinterpretType::RGBA6ToRGB565: + ss << " ocol0 = val;\n"; + break; + + case EFBReinterpretType::RGB565ToRGB8: + ss << " ocol0 = val;\n"; + break; + + case EFBReinterpretType::RGB565ToRGBA6: + // + ss << " ocol0 = val;\n"; + break; + } + + ss << "}\n"; + return ss.str(); +} + +} // namespace FramebufferShaderGen diff --git a/Source/Core/VideoCommon/FramebufferShaderGen.h b/Source/Core/VideoCommon/FramebufferShaderGen.h new file mode 100644 index 0000000000..0e065521cf --- /dev/null +++ b/Source/Core/VideoCommon/FramebufferShaderGen.h @@ -0,0 +1,32 @@ +#pragma once +#include +#include "VideoCommon/VideoCommon.h" + +enum class EFBReinterpretType; + +namespace FramebufferShaderGen +{ +struct Config +{ + Config(APIType api_type_, u32 efb_samples_, u32 efb_layers_, bool ssaa_) + : api_type(api_type_), efb_samples(efb_samples_), efb_layers(efb_layers_), ssaa(ssaa_) + { + } + + APIType api_type; + u32 efb_samples; + u32 efb_layers; + bool ssaa; +}; + +std::string GenerateScreenQuadVertexShader(); +std::string GeneratePassthroughGeometryShader(u32 num_tex, u32 num_colors); +std::string GenerateTextureCopyVertexShader(); +std::string GenerateTextureCopyPixelShader(); +std::string GenerateResolveDepthPixelShader(u32 samples); +std::string GenerateClearVertexShader(); +std::string GenerateEFBPokeVertexShader(); +std::string GenerateColorPixelShader(); +std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samples); + +} // namespace FramebufferShaderGen diff --git a/Source/Core/VideoCommon/NativeVertexFormat.h b/Source/Core/VideoCommon/NativeVertexFormat.h index 75cf841a32..b7c3477a60 100644 --- a/Source/Core/VideoCommon/NativeVertexFormat.h +++ b/Source/Core/VideoCommon/NativeVertexFormat.h @@ -104,17 +104,17 @@ struct hash class NativeVertexFormat { public: + NativeVertexFormat(const PortableVertexDeclaration& vtx_decl) : m_decl(vtx_decl) {} virtual ~NativeVertexFormat() {} + NativeVertexFormat(const NativeVertexFormat&) = delete; NativeVertexFormat& operator=(const NativeVertexFormat&) = delete; NativeVertexFormat(NativeVertexFormat&&) = default; NativeVertexFormat& operator=(NativeVertexFormat&&) = default; - u32 GetVertexStride() const { return vtx_decl.stride; } - const PortableVertexDeclaration& GetVertexDeclaration() const { return vtx_decl; } + u32 GetVertexStride() const { return m_decl.stride; } + const PortableVertexDeclaration& GetVertexDeclaration() const { return m_decl; } protected: - // Let subclasses construct. - NativeVertexFormat() {} - PortableVertexDeclaration vtx_decl; + PortableVertexDeclaration m_decl; }; diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 9fbbb2cda4..9936bcb85c 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -171,8 +171,7 @@ PixelShaderUid GetPixelShaderUid() uid_data->genMode_numindstages = bpmem.genMode.numindstages; uid_data->genMode_numtevstages = bpmem.genMode.numtevstages; uid_data->genMode_numtexgens = bpmem.genMode.numtexgens; - uid_data->bounding_box = g_ActiveConfig.BBoxUseFragmentShaderImplementation() && - g_ActiveConfig.bBBoxEnable && BoundingBox::active; + uid_data->bounding_box = g_ActiveConfig.bBBoxEnable && BoundingBox::active; uid_data->rgba6_format = bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24 && !g_ActiveConfig.bForceTrueColor; uid_data->dither = bpmem.blendmode.dither && uid_data->rgba6_format; @@ -456,10 +455,6 @@ void WritePixelShaderCommonHeader(ShaderCode& out, APIType ApiType, u32 num_texg out.Write("globallycoherent RWBuffer bbox_data : register(u2);\n"); } } - - out.Write("struct VS_OUTPUT {\n"); - GenerateVSOutputMembers(out, ApiType, num_texgens, host_config, ""); - out.Write("};\n"); } static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, int n, @@ -804,7 +799,7 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host } else { - if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) + if (!host_config.backend_reversed_depth_range) out.Write("\tint zCoord = int((1.0 - rawpos.z) * 16777216.0);\n"); else out.Write("\tint zCoord = int(rawpos.z * 16777216.0);\n"); @@ -818,7 +813,7 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host // Note: z-textures are not written to depth buffer if early depth test is used if (uid_data->per_pixel_depth && uid_data->early_ztest) { - if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) + if (!host_config.backend_reversed_depth_range) out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); else out.Write("\tdepth = float(zCoord) / 16777216.0;\n"); @@ -839,7 +834,7 @@ ShaderCode GeneratePixelShaderCode(APIType ApiType, const ShaderHostConfig& host if (uid_data->per_pixel_depth && uid_data->late_ztest) { - if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) + if (!host_config.backend_reversed_depth_range) out.Write("\tdepth = 1.0 - float(zCoord) / 16777216.0;\n"); else out.Write("\tdepth = float(zCoord) / 16777216.0;\n"); @@ -1316,7 +1311,7 @@ static void WriteAlphaTest(ShaderCode& out, const pixel_shader_uid_data* uid_dat if (per_pixel_depth) { out.Write("\t\tdepth = %s;\n", - (ApiType == APIType::D3D || ApiType == APIType::Vulkan) ? "0.0" : "1.0"); + !g_ActiveConfig.backend_info.bSupportsReversedDepthRange ? "0.0" : "1.0"); } // ZCOMPLOC HACK: diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 3ff6f9f073..7799a0abcb 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -522,9 +522,7 @@ void PixelShaderManager::SetBlendModeChanged() void PixelShaderManager::SetBoundingBoxActive(bool active) { - const bool enable = - active && g_ActiveConfig.bBBoxEnable && g_ActiveConfig.BBoxUseFragmentShaderImplementation(); - + const bool enable = active && g_ActiveConfig.bBBoxEnable; if (enable == (constants.bounding_box != 0)) return; diff --git a/Source/Core/VideoCommon/PostProcessing.cpp b/Source/Core/VideoCommon/PostProcessing.cpp index a83469e451..b1c9c42056 100644 --- a/Source/Core/VideoCommon/PostProcessing.cpp +++ b/Source/Core/VideoCommon/PostProcessing.cpp @@ -5,74 +5,44 @@ #include #include +#include "Common/Assert.h" #include "Common/CommonPaths.h" #include "Common/CommonTypes.h" #include "Common/FileSearch.h" #include "Common/FileUtil.h" #include "Common/IniFile.h" #include "Common/Logging/Log.h" +#include "Common/MsgHandler.h" #include "Common/StringUtil.h" +#include "VideoCommon/AbstractFramebuffer.h" +#include "VideoCommon/AbstractPipeline.h" +#include "VideoCommon/AbstractShader.h" +#include "VideoCommon/AbstractTexture.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/PostProcessing.h" +#include "VideoCommon/RenderBase.h" +#include "VideoCommon/ShaderCache.h" +#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VideoConfig.h" +namespace VideoCommon +{ static const char s_default_shader[] = "void main() { SetOutput(Sample()); }\n"; -PostProcessingShaderImplementation::PostProcessingShaderImplementation() -{ - m_timer.Start(); -} +PostProcessingConfiguration::PostProcessingConfiguration() = default; -PostProcessingShaderImplementation::~PostProcessingShaderImplementation() -{ - m_timer.Stop(); -} +PostProcessingConfiguration::~PostProcessingConfiguration() = default; -static std::vector GetShaders(const std::string& sub_dir = "") -{ - std::vector paths = - Common::DoFileSearch({File::GetUserPath(D_SHADERS_IDX) + sub_dir, - File::GetSysDirectory() + SHADERS_DIR DIR_SEP + sub_dir}, - {".glsl"}); - std::vector result; - for (std::string path : paths) - { - std::string name; - SplitPath(path, nullptr, &name, nullptr); - result.push_back(name); - } - return result; -} - -std::vector PostProcessingShaderImplementation::GetShaderList(APIType api_type) -{ - // Currently there is no differentiation between API types and shader languages. - // This could change in the future, hence the api_type parameter, but ideally, - // shaders should be compatible across backends. - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - return GetShaders(); - - return {}; -} - -std::vector PostProcessingShaderImplementation::GetAnaglyphShaderList(APIType api_type) -{ - if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) - return GetShaders(ANAGLYPH_DIR DIR_SEP); - - return {}; -} - -PostProcessingShaderConfiguration::PostProcessingShaderConfiguration() = default; - -PostProcessingShaderConfiguration::~PostProcessingShaderConfiguration() = default; - -std::string PostProcessingShaderConfiguration::LoadShader(std::string shader) +void PostProcessingConfiguration::LoadShader(const std::string& shader) { // Load the shader from the configuration if there isn't one sent to us. - if (shader.empty()) - shader = g_ActiveConfig.sPostProcessingShader; m_current_shader = shader; + if (shader.empty()) + { + LoadDefaultShader(); + return; + } const std::string sub_dir = (g_Config.stereo_mode == StereoMode::Anaglyph) ? ANAGLYPH_DIR DIR_SEP : ""; @@ -81,32 +51,32 @@ std::string PostProcessingShaderConfiguration::LoadShader(std::string shader) std::string code; std::string path = File::GetUserPath(D_SHADERS_IDX) + sub_dir + shader + ".glsl"; - if (shader.empty()) + if (!File::Exists(path)) { - code = s_default_shader; + // Fallback to shared user dir + path = File::GetSysDirectory() + SHADERS_DIR DIR_SEP + sub_dir + shader + ".glsl"; } - else - { - if (!File::Exists(path)) - { - // Fallback to shared user dir - path = File::GetSysDirectory() + SHADERS_DIR DIR_SEP + sub_dir + shader + ".glsl"; - } - if (!File::ReadFileToString(path, code)) - { - ERROR_LOG(VIDEO, "Post-processing shader not found: %s", path.c_str()); - code = s_default_shader; - } + if (!File::ReadFileToString(path, code)) + { + ERROR_LOG(VIDEO, "Post-processing shader not found: %s", path.c_str()); + LoadDefaultShader(); + return; } LoadOptions(code); LoadOptionsConfiguration(); - - return code; + m_current_shader_code = code; } -void PostProcessingShaderConfiguration::LoadOptions(const std::string& code) +void PostProcessingConfiguration::LoadDefaultShader() +{ + m_options.clear(); + m_any_options_dirty = false; + m_current_shader_code = s_default_shader; +} + +void PostProcessingConfiguration::LoadOptions(const std::string& code) { const std::string config_start_delimiter = "[configuration]"; const std::string config_end_delimiter = "[/configuration]"; @@ -254,7 +224,7 @@ void PostProcessingShaderConfiguration::LoadOptions(const std::string& code) } } -void PostProcessingShaderConfiguration::LoadOptionsConfiguration() +void PostProcessingConfiguration::LoadOptionsConfiguration() { IniFile ini; ini.Load(File::GetUserPath(F_DOLPHINCONFIG_IDX)); @@ -288,7 +258,7 @@ void PostProcessingShaderConfiguration::LoadOptionsConfiguration() } } -void PostProcessingShaderConfiguration::SaveOptionsConfiguration() +void PostProcessingConfiguration::SaveOptionsConfiguration() { IniFile ini; ini.Load(File::GetUserPath(F_DOLPHINCONFIG_IDX)); @@ -331,13 +301,7 @@ void PostProcessingShaderConfiguration::SaveOptionsConfiguration() ini.Save(File::GetUserPath(F_DOLPHINCONFIG_IDX)); } -void PostProcessingShaderConfiguration::ReloadShader() -{ - m_current_shader = ""; -} - -void PostProcessingShaderConfiguration::SetOptionf(const std::string& option, int index, - float value) +void PostProcessingConfiguration::SetOptionf(const std::string& option, int index, float value) { auto it = m_options.find(option); @@ -346,7 +310,7 @@ void PostProcessingShaderConfiguration::SetOptionf(const std::string& option, in m_any_options_dirty = true; } -void PostProcessingShaderConfiguration::SetOptioni(const std::string& option, int index, s32 value) +void PostProcessingConfiguration::SetOptioni(const std::string& option, int index, s32 value) { auto it = m_options.find(option); @@ -355,7 +319,7 @@ void PostProcessingShaderConfiguration::SetOptioni(const std::string& option, in m_any_options_dirty = true; } -void PostProcessingShaderConfiguration::SetOptionb(const std::string& option, bool value) +void PostProcessingConfiguration::SetOptionb(const std::string& option, bool value) { auto it = m_options.find(option); @@ -363,3 +327,384 @@ void PostProcessingShaderConfiguration::SetOptionb(const std::string& option, bo it->second.m_dirty = true; m_any_options_dirty = true; } + +PostProcessing::PostProcessing() +{ + m_timer.Start(); +} + +PostProcessing::~PostProcessing() +{ + m_timer.Stop(); +} + +static std::vector GetShaders(const std::string& sub_dir = "") +{ + std::vector paths = + Common::DoFileSearch({File::GetUserPath(D_SHADERS_IDX) + sub_dir, + File::GetSysDirectory() + SHADERS_DIR DIR_SEP + sub_dir}, + {".glsl"}); + std::vector result; + for (std::string path : paths) + { + std::string name; + SplitPath(path, nullptr, &name, nullptr); + result.push_back(name); + } + return result; +} + +std::vector PostProcessing::GetShaderList() +{ + return GetShaders(); +} + +std::vector PostProcessing::GetAnaglyphShaderList() +{ + return GetShaders(ANAGLYPH_DIR DIR_SEP); +} + +bool PostProcessing::Initialize(AbstractTextureFormat format) +{ + m_framebuffer_format = format; + if (!CompileVertexShader() || !CompilePixelShader() || !CompilePipeline()) + return false; + + return true; +} + +void PostProcessing::RecompileShader() +{ + m_pipeline.reset(); + m_pixel_shader.reset(); + if (!CompilePixelShader()) + return; + + CompilePipeline(); +} + +void PostProcessing::RecompilePipeline() +{ + m_pipeline.reset(); + CompilePipeline(); +} + +void PostProcessing::BlitFromTexture(const MathUtil::Rectangle& dst, + const MathUtil::Rectangle& src, + const AbstractTexture* src_tex, int src_layer) +{ + if (g_renderer->GetCurrentFramebuffer()->GetColorFormat() != m_framebuffer_format) + { + m_framebuffer_format = g_renderer->GetCurrentFramebuffer()->GetColorFormat(); + RecompilePipeline(); + } + + if (!m_pipeline) + return; + + FillUniformBuffer(src, src_tex, src_layer); + g_vertex_manager->UploadUtilityUniforms(m_uniform_staging_buffer.data(), + static_cast(m_uniform_staging_buffer.size())); + + g_renderer->SetViewportAndScissor( + g_renderer->ConvertFramebufferRectangle(dst, g_renderer->GetCurrentFramebuffer())); + g_renderer->SetPipeline(m_pipeline.get()); + g_renderer->SetTexture(0, src_tex); + g_renderer->SetSamplerState(0, RenderState::GetLinearSamplerState()); + g_renderer->Draw(0, 3); +} + +std::string PostProcessing::GetUniformBufferHeader() const +{ + std::stringstream ss; + u32 unused_counter = 1; + if (g_ActiveConfig.backend_info.api_type == APIType::D3D) + ss << "cbuffer PSBlock : register(b0) {\n"; + else + ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n"; + + // Builtin uniforms + ss << " float4 resolution;\n"; + ss << " float4 src_rect;\n"; + ss << " uint time;\n"; + ss << " int layer;\n"; + for (u32 i = 0; i < 2; i++) + ss << " uint ubo_align_" << unused_counter++ << "_;\n"; + ss << "\n"; + + // Custom options/uniforms + for (const auto& it : m_config.GetOptions()) + { + if (it.second.m_type == + PostProcessingConfiguration::ConfigurationOption::OptionType::OPTION_BOOL) + { + ss << StringFromFormat(" int %s;\n", it.first.c_str()); + for (u32 i = 0; i < 3; i++) + ss << " int ubo_align_" << unused_counter++ << "_;\n"; + } + else if (it.second.m_type == + PostProcessingConfiguration::ConfigurationOption::OptionType::OPTION_INTEGER) + { + u32 count = static_cast(it.second.m_integer_values.size()); + if (count == 1) + ss << StringFromFormat(" int %s;\n", it.first.c_str()); + else + ss << StringFromFormat(" int%u %s;\n", count, it.first.c_str()); + + for (u32 i = count; i < 4; i++) + ss << " int ubo_align_" << unused_counter++ << "_;\n"; + } + else if (it.second.m_type == + PostProcessingConfiguration::ConfigurationOption::OptionType::OPTION_FLOAT) + { + u32 count = static_cast(it.second.m_float_values.size()); + if (count == 1) + ss << StringFromFormat(" float %s;\n", it.first.c_str()); + else + ss << StringFromFormat(" float%u %s;\n", count, it.first.c_str()); + + for (u32 i = count; i < 4; i++) + ss << " float ubo_align_" << unused_counter++ << "_;\n"; + } + } + + ss << "};\n\n"; + return ss.str(); +} + +std::string PostProcessing::GetHeader() const +{ + std::stringstream ss; + ss << GetUniformBufferHeader(); + if (g_ActiveConfig.backend_info.api_type == APIType::D3D) + { + ss << "Texture2DArray samp0 : register(t0);\n"; + ss << "SamplerState samp0_ss : register(s0);\n"; + } + else + { + ss << "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"; + ss << "VARYING_LOCATION(0) in float3 v_tex0;\n"; + ss << "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n"; + } + + // Rename main, since we need to set up globals + if (g_ActiveConfig.backend_info.api_type == APIType::D3D) + { + ss << R"( +#define main real_main +static float3 v_tex0; +static float4 ocol0; + +// Wrappers for sampling functions. +#define texture(sampler, coords) sampler.Sample(sampler##_ss, coords) +#define textureOffset(sampler, coords, offset) sampler.Sample(sampler##_ss, coords, offset) +)"; + } + + ss << R"( +float4 Sample() { return texture(samp0, float3(v_tex0.xy, float(layer))); } +float4 SampleLocation(float2 location) { return texture(samp0, float3(location, float(layer))); } +float4 SampleLayer(int layer) { return texture(samp0, float3(v_tex0.xy, float(layer))); } +#define SampleOffset(offset) textureOffset(samp0, float3(v_tex0.xy, float(layer)), offset) + +float2 GetResolution() +{ + return resolution.xy; +} + +float2 GetInvResolution() +{ + return resolution.zw; +} + +float2 GetCoordinates() +{ + return v_tex0.xy; +} + +uint GetTime() +{ + return time; +} + +void SetOutput(float4 color) +{ + ocol0 = color; +} + +#define GetOption(x) (x) +#define OptionEnabled(x) ((x) != 0) + +)"; + return ss.str(); +} + +std::string PostProcessing::GetFooter() const +{ + if (g_ActiveConfig.backend_info.api_type == APIType::D3D) + { + return R"( + +#undef main +void main(in float3 v_tex0_ : TEXCOORD0, out float4 ocol0_ : SV_Target) +{ + v_tex0 = v_tex0_; + real_main(); + ocol0_ = ocol0; +})"; + } + else + { + return {}; + } +} + +bool PostProcessing::CompileVertexShader() +{ + std::stringstream ss; + ss << GetUniformBufferHeader(); + + if (g_ActiveConfig.backend_info.api_type == APIType::D3D) + { + ss << "void main(in uint id : SV_VertexID, out float3 v_tex0 : TEXCOORD0,\n"; + ss << " out float4 opos : SV_Position) {\n"; + } + else + { + ss << "VARYING_LOCATION(0) out float3 v_tex0;\n"; + ss << "#define id gl_VertexID\n"; + ss << "#define opos gl_Position\n"; + ss << "void main() {\n"; + } + ss << " v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"; + ss << " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n"; + ss << " v_tex0 = float3(src_rect.xy + (src_rect.zw * v_tex0.xy), 0.0f);\n"; + + if (g_ActiveConfig.backend_info.api_type == APIType::Vulkan) + ss << " opos.y = -opos.y;\n"; + + ss << "}\n"; + + m_vertex_shader = g_renderer->CreateShaderFromSource(ShaderStage::Vertex, ss.str()); + if (!m_vertex_shader) + { + PanicAlert("Failed to compile post-processing vertex shader"); + return false; + } + + return true; +} + +struct BuiltinUniforms +{ + float resolution[4]; + float src_rect[4]; + s32 time; + u32 layer; + u32 padding[2]; +}; + +size_t PostProcessing::CalculateUniformsSize() const +{ + // Allocate a vec4 for each uniform to simplify allocation. + return sizeof(BuiltinUniforms) + m_config.GetOptions().size() * sizeof(float) * 4; +} + +void PostProcessing::FillUniformBuffer(const MathUtil::Rectangle& src, + const AbstractTexture* src_tex, int src_layer) +{ + const float rcp_src_width = 1.0f / src_tex->GetWidth(); + const float rcp_src_height = 1.0f / src_tex->GetHeight(); + BuiltinUniforms builtin_uniforms = { + {static_cast(src_tex->GetWidth()), static_cast(src_tex->GetHeight()), + rcp_src_width, rcp_src_height}, + {static_cast(src.left) * rcp_src_width, static_cast(src.top) * rcp_src_height, + static_cast(src.GetWidth()) * rcp_src_width, + static_cast(src.GetHeight()) * rcp_src_height}, + static_cast(m_timer.GetTimeElapsed()), + static_cast(src_layer), + }; + + u8* buf = m_uniform_staging_buffer.data(); + std::memcpy(buf, &builtin_uniforms, sizeof(builtin_uniforms)); + buf += sizeof(builtin_uniforms); + + for (const auto& it : m_config.GetOptions()) + { + union + { + u32 as_bool[4]; + s32 as_int[4]; + float as_float[4]; + } value = {}; + + switch (it.second.m_type) + { + case PostProcessingConfiguration::ConfigurationOption::OptionType::OPTION_BOOL: + value.as_bool[0] = it.second.m_bool_value ? 1 : 0; + break; + + case PostProcessingConfiguration::ConfigurationOption::OptionType::OPTION_INTEGER: + ASSERT(it.second.m_integer_values.size() < 4); + std::copy_n(it.second.m_integer_values.begin(), it.second.m_integer_values.size(), + value.as_int); + break; + + case PostProcessingConfiguration::ConfigurationOption::OptionType::OPTION_FLOAT: + ASSERT(it.second.m_float_values.size() < 4); + std::copy_n(it.second.m_float_values.begin(), it.second.m_float_values.size(), + value.as_float); + break; + } + + std::memcpy(buf, &value, sizeof(value)); + buf += sizeof(value); + } +} + +bool PostProcessing::CompilePixelShader() +{ + m_pipeline.reset(); + m_pixel_shader.reset(); + + // Generate GLSL and compile the new shader. + m_config.LoadShader(g_ActiveConfig.sPostProcessingShader); + m_pixel_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Pixel, GetHeader() + m_config.GetShaderCode() + GetFooter()); + if (!m_pixel_shader) + { + PanicAlert("Failed to compile post-processing shader %s", m_config.GetShader().c_str()); + + // Use default shader. + m_config.LoadDefaultShader(); + m_pixel_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Pixel, GetHeader() + m_config.GetShaderCode() + GetFooter()); + if (!m_pixel_shader) + return false; + } + + m_uniform_staging_buffer.resize(CalculateUniformsSize()); + return true; +} + +bool PostProcessing::CompilePipeline() +{ + AbstractPipelineConfig config = {}; + config.vertex_shader = m_vertex_shader.get(); + config.geometry_shader = g_ActiveConfig.stereo_mode == StereoMode::QuadBuffer ? + g_shader_cache->GetTexcoordGeometryShader() : + nullptr; + config.pixel_shader = m_pixel_shader.get(); + config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); + config.blending_state = RenderState::GetNoBlendingBlendState(); + config.framebuffer_state = RenderState::GetColorFramebufferState(m_framebuffer_format); + config.usage = AbstractPipelineUsage::Utility; + m_pipeline = g_renderer->CreatePipeline(config); + if (!m_pipeline) + return false; + + return true; +} +} // namespace VideoCommon diff --git a/Source/Core/VideoCommon/PostProcessing.h b/Source/Core/VideoCommon/PostProcessing.h index e610b7fc38..ede8adaf6e 100644 --- a/Source/Core/VideoCommon/PostProcessing.h +++ b/Source/Core/VideoCommon/PostProcessing.h @@ -10,9 +10,16 @@ #include "Common/CommonTypes.h" #include "Common/Timer.h" +#include "VideoCommon/TextureConfig.h" #include "VideoCommon/VideoCommon.h" -class PostProcessingShaderConfiguration +class AbstractTexture; +class AbstractPipeline; +class AbstractShader; + +namespace VideoCommon +{ +class PostProcessingConfiguration { public: struct ConfigurationOption @@ -48,16 +55,17 @@ public: using ConfigMap = std::map; - PostProcessingShaderConfiguration(); - virtual ~PostProcessingShaderConfiguration(); + PostProcessingConfiguration(); + virtual ~PostProcessingConfiguration(); // Loads the configuration with a shader // If the argument is "" the class will load the shader from the g_activeConfig option. // Returns the loaded shader source from file - std::string LoadShader(std::string shader = ""); + void LoadShader(const std::string& shader); + void LoadDefaultShader(); void SaveOptionsConfiguration(); - void ReloadShader(); const std::string& GetShader() const { return m_current_shader; } + const std::string& GetShaderCode() const { return m_current_shader_code; } bool IsDirty() const { return m_any_options_dirty; } void SetDirty(bool dirty) { m_any_options_dirty = dirty; } bool HasOptions() const { return m_options.size() > 0; } @@ -72,26 +80,53 @@ public: private: bool m_any_options_dirty = false; std::string m_current_shader; + std::string m_current_shader_code; ConfigMap m_options; void LoadOptions(const std::string& code); void LoadOptionsConfiguration(); }; -class PostProcessingShaderImplementation +class PostProcessing { public: - PostProcessingShaderImplementation(); - virtual ~PostProcessingShaderImplementation(); + PostProcessing(); + virtual ~PostProcessing(); - static std::vector GetShaderList(APIType api_type); - static std::vector GetAnaglyphShaderList(APIType api_type); + static std::vector GetShaderList(); + static std::vector GetAnaglyphShaderList(); - PostProcessingShaderConfiguration* GetConfig() { return &m_config; } + PostProcessingConfiguration* GetConfig() { return &m_config; } + + bool Initialize(AbstractTextureFormat format); + + void RecompileShader(); + void RecompilePipeline(); + + void BlitFromTexture(const MathUtil::Rectangle& dst, const MathUtil::Rectangle& src, + const AbstractTexture* src_tex, int src_layer); protected: + std::string GetUniformBufferHeader() const; + std::string GetHeader() const; + std::string GetFooter() const; + + bool CompileVertexShader(); + bool CompilePixelShader(); + bool CompilePipeline(); + + size_t CalculateUniformsSize() const; + void FillUniformBuffer(const MathUtil::Rectangle& src, const AbstractTexture* src_tex, + int src_layer); + // Timer for determining our time value Common::Timer m_timer; + PostProcessingConfiguration m_config; - PostProcessingShaderConfiguration m_config; + std::unique_ptr m_vertex_shader; + std::unique_ptr m_pixel_shader; + std::unique_ptr m_pipeline; + AbstractTextureFormat m_framebuffer_format = AbstractTextureFormat::Undefined; + std::vector m_uniform_staging_buffer; }; +} // namespace VideoCommon diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 5a5b1c5aba..132ce4bd6c 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -50,14 +50,15 @@ #include "VideoCommon/AbstractFramebuffer.h" #include "VideoCommon/AbstractStagingTexture.h" #include "VideoCommon/AbstractTexture.h" +#include "VideoCommon/BPFunctions.h" #include "VideoCommon/BPMemory.h" #include "VideoCommon/CPMemory.h" #include "VideoCommon/CommandProcessor.h" -#include "VideoCommon/Debugger.h" #include "VideoCommon/FPSCounter.h" -#include "VideoCommon/FramebufferManagerBase.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/ImageWrite.h" #include "VideoCommon/OnScreenDisplay.h" +#include "VideoCommon/PixelEngine.h" #include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/PostProcessing.h" #include "VideoCommon/ShaderCache.h" @@ -68,12 +69,10 @@ #include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexShaderManager.h" +#include "VideoCommon/VideoBackendBase.h" #include "VideoCommon/VideoConfig.h" #include "VideoCommon/XFMemory.h" -// TODO: Move these out of here. -int frameCount; - std::unique_ptr g_renderer; static float AspectToWidescreen(float aspect) @@ -97,7 +96,14 @@ Renderer::~Renderer() = default; bool Renderer::Initialize() { - return InitializeImGui(); + if (!InitializeImGui()) + return false; + + m_post_processor = std::make_unique(); + if (!m_post_processor->Initialize(m_backbuffer_format)) + return false; + + return true; } void Renderer::Shutdown() @@ -106,6 +112,142 @@ void Renderer::Shutdown() // can require additional graphics sub-systems so it needs to be done first ShutdownFrameDumping(); ShutdownImGui(); + m_post_processor.reset(); +} + +void Renderer::BeginUtilityDrawing() +{ + g_vertex_manager->Flush(); +} + +void Renderer::EndUtilityDrawing() +{ + // Reset framebuffer/scissor/viewport. Pipeline will be reset at next draw. + g_framebuffer_manager->BindEFBFramebuffer(); + BPFunctions::SetScissor(); + BPFunctions::SetViewport(); +} + +void Renderer::SetFramebuffer(AbstractFramebuffer* framebuffer) +{ + m_current_framebuffer = framebuffer; +} + +void Renderer::SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer) +{ + m_current_framebuffer = framebuffer; +} + +void Renderer::SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, + const ClearColor& color_value, float depth_value) +{ + m_current_framebuffer = framebuffer; +} + +std::unique_ptr Renderer::CreateShaderFromSource(ShaderStage stage, + const std::string& source) +{ + return CreateShaderFromSource(stage, source.c_str(), source.size()); +} + +void Renderer::ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, + u32 color, u32 z) +{ + g_framebuffer_manager->ClearEFB(rc, colorEnable, alphaEnable, zEnable, color, z); +} + +void Renderer::ReinterpretPixelData(EFBReinterpretType convtype) +{ + g_framebuffer_manager->ReinterpretPixelData(convtype); +} + +u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) +{ + if (type == EFBAccessType::PeekColor) + { + u32 color = g_framebuffer_manager->PeekEFBColor(x, y); + + // a little-endian value is expected to be returned + color = ((color & 0xFF00FF00) | ((color >> 16) & 0xFF) | ((color << 16) & 0xFF0000)); + + // check what to do with the alpha channel (GX_PokeAlphaRead) + PixelEngine::UPEAlphaReadReg alpha_read_mode = PixelEngine::GetAlphaReadMode(); + + if (bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24) + { + color = RGBA8ToRGBA6ToRGBA8(color); + } + else if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) + { + color = RGBA8ToRGB565ToRGBA8(color); + } + if (bpmem.zcontrol.pixel_format != PEControl::RGBA6_Z24) + { + color |= 0xFF000000; + } + + if (alpha_read_mode.ReadMode == 2) + { + return color; // GX_READ_NONE + } + else if (alpha_read_mode.ReadMode == 1) + { + return color | 0xFF000000; // GX_READ_FF + } + else /*if(alpha_read_mode.ReadMode == 0)*/ + { + return color & 0x00FFFFFF; // GX_READ_00 + } + } + else // if (type == EFBAccessType::PeekZ) + { + // Depth buffer is inverted for improved precision near far plane + float depth = g_framebuffer_manager->PeekEFBDepth(x, y); + if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) + depth = 1.0f - depth; + + u32 ret = 0; + if (bpmem.zcontrol.pixel_format == PEControl::RGB565_Z16) + { + // if Z is in 16 bit format you must return a 16 bit integer + ret = MathUtil::Clamp(static_cast(depth * 65536.0f), 0, 0xFFFF); + } + else + { + ret = MathUtil::Clamp(static_cast(depth * 16777216.0f), 0, 0xFFFFFF); + } + + return ret; + } +} + +void Renderer::PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) +{ + if (type == EFBAccessType::PokeColor) + { + for (size_t i = 0; i < num_points; i++) + { + // Convert to expected format (BGRA->RGBA) + // TODO: Check alpha, depending on mode? + const EfbPokeData& point = points[i]; + u32 color = ((point.data & 0xFF00FF00) | ((point.data >> 16) & 0xFF) | + ((point.data << 16) & 0xFF0000)); + g_framebuffer_manager->PokeEFBColor(point.x, point.y, color); + } + } + else // if (type == EFBAccessType::PokeZ) + { + for (size_t i = 0; i < num_points; i++) + { + // Convert to floating-point depth. + const EfbPokeData& point = points[i]; + float depth = float(point.data & 0xFFFFFF) / 16777216.0f; + if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) + depth = 1.0f - depth; + + g_framebuffer_manager->PokeEFBDepth(point.x, point.y, depth); + } + } } void Renderer::RenderToXFB(u32 xfbAddr, const EFBRectangle& sourceRc, u32 fbStride, u32 fbHeight, @@ -169,6 +311,8 @@ bool Renderer::CalculateTargetSize() int new_efb_width = 0; int new_efb_height = 0; std::tie(new_efb_width, new_efb_height) = CalculateTargetScale(EFB_WIDTH, EFB_HEIGHT); + new_efb_width = std::max(new_efb_width, 1); + new_efb_height = std::max(new_efb_height, 1); if (new_efb_width != m_target_width || new_efb_height != m_target_height) { @@ -251,6 +395,11 @@ void Renderer::CheckForConfigChanges() // Update texture cache settings with any changed options. g_texture_cache->OnConfigChanged(g_ActiveConfig); + // Check for post-processing shader changes. Done up here as it doesn't affect anything outside + // the post-processor. Note that options are applied every frame, so no need to check those. + if (m_post_processor->GetConfig()->GetShader() != g_ActiveConfig.sPostProcessingShader) + m_post_processor->RecompileShader(); + // Determine which (if any) settings have changed. ShaderHostConfig new_host_config = ShaderHostConfig::GetCurrent(); u32 changed_bits = 0; @@ -278,13 +427,30 @@ void Renderer::CheckForConfigChanges() // Notify the backend of the changes, if any. OnConfigChanged(changed_bits); + // Framebuffer changed? + if (changed_bits & (CONFIG_CHANGE_BIT_MULTISAMPLES | CONFIG_CHANGE_BIT_STEREO_MODE | + CONFIG_CHANGE_BIT_TARGET_SIZE)) + { + g_framebuffer_manager->RecreateEFBFramebuffer(); + } + // Reload shaders if host config has changed. if (changed_bits & (CONFIG_CHANGE_BIT_HOST_CONFIG | CONFIG_CHANGE_BIT_MULTISAMPLES)) { OSD::AddMessage("Video config changed, reloading shaders.", OSD::Duration::NORMAL); + WaitForGPUIdle(); SetPipeline(nullptr); g_vertex_manager->InvalidatePipelineObject(); - g_shader_cache->SetHostConfig(new_host_config, g_ActiveConfig.iMultisamples); + g_shader_cache->SetHostConfig(new_host_config); + g_shader_cache->Reload(); + g_framebuffer_manager->RecompileShaders(); + } + + // Viewport and scissor rect have to be reset since they will be scaled differently. + if (changed_bits & CONFIG_CHANGE_BIT_TARGET_SIZE) + { + BPFunctions::SetViewport(); + BPFunctions::SetScissor(); } } @@ -389,6 +555,86 @@ void Renderer::ResizeSurface() m_surface_resized.Set(); } +void Renderer::SetViewportAndScissor(const MathUtil::Rectangle& rect, float min_depth, + float max_depth) +{ + SetViewport(static_cast(rect.left), static_cast(rect.top), + static_cast(rect.GetWidth()), static_cast(rect.GetHeight()), min_depth, + max_depth); + SetScissorRect(rect); +} + +void Renderer::ScaleTexture(AbstractFramebuffer* dst_framebuffer, + const MathUtil::Rectangle& dst_rect, + const AbstractTexture* src_texture, + const MathUtil::Rectangle& src_rect) +{ + ASSERT(dst_framebuffer->GetColorFormat() == AbstractTextureFormat::RGBA8); + + BeginUtilityDrawing(); + + // The shader needs to know the source rectangle. + const auto converted_src_rect = g_renderer->ConvertFramebufferRectangle( + src_rect, src_texture->GetWidth(), src_texture->GetHeight()); + const float rcp_src_width = 1.0f / src_texture->GetWidth(); + const float rcp_src_height = 1.0f / src_texture->GetHeight(); + const std::array uniforms = {{converted_src_rect.left * rcp_src_width, + converted_src_rect.top * rcp_src_height, + converted_src_rect.GetWidth() * rcp_src_width, + converted_src_rect.GetHeight() * rcp_src_height}}; + g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms)); + + // Discard if we're overwriting the whole thing. + if (static_cast(dst_rect.GetWidth()) == dst_framebuffer->GetWidth() && + static_cast(dst_rect.GetHeight()) == dst_framebuffer->GetHeight()) + { + SetAndDiscardFramebuffer(dst_framebuffer); + } + else + { + SetFramebuffer(dst_framebuffer); + } + + SetViewportAndScissor(ConvertFramebufferRectangle(dst_rect, dst_framebuffer)); + SetPipeline(dst_framebuffer->GetLayers() > 1 ? g_shader_cache->GetRGBA8StereoCopyPipeline() : + g_shader_cache->GetRGBA8CopyPipeline()); + SetTexture(0, src_texture); + SetSamplerState(0, RenderState::GetLinearSamplerState()); + Draw(0, 3); + EndUtilityDrawing(); + if (dst_framebuffer->GetColorAttachment()) + dst_framebuffer->GetColorAttachment()->FinishedRendering(); +} + +MathUtil::Rectangle +Renderer::ConvertFramebufferRectangle(const MathUtil::Rectangle& rect, + const AbstractFramebuffer* framebuffer) +{ + return ConvertFramebufferRectangle(rect, framebuffer->GetWidth(), framebuffer->GetHeight()); +} + +MathUtil::Rectangle Renderer::ConvertFramebufferRectangle(const MathUtil::Rectangle& rect, + u32 fb_width, u32 fb_height) +{ + MathUtil::Rectangle ret = rect; + if (g_ActiveConfig.backend_info.bUsesLowerLeftOrigin) + { + ret.top = fb_height - rect.bottom; + ret.bottom = fb_height - rect.top; + } + return ret; +} + +TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc) +{ + TargetRectangle result; + result.left = EFBToScaledX(rc.left); + result.top = EFBToScaledY(rc.top); + result.right = EFBToScaledX(rc.right); + result.bottom = EFBToScaledY(rc.bottom); + return result; +} + std::tuple Renderer::ScaleToDisplayAspectRatio(const int width, const int height) const { @@ -700,7 +946,7 @@ bool Renderer::InitializeImGui() vdecl.texcoords[0] = {VAR_FLOAT, 2, offsetof(ImDrawVert, uv), true, false}; vdecl.colors[0] = {VAR_UNSIGNED_BYTE, 4, offsetof(ImDrawVert, col), true, false}; vdecl.stride = sizeof(ImDrawVert); - m_imgui_vertex_format = g_vertex_manager->CreateNativeVertexFormat(vdecl); + m_imgui_vertex_format = CreateNativeVertexFormat(vdecl); if (!m_imgui_vertex_format) { PanicAlert("Failed to create imgui vertex format"); @@ -723,10 +969,10 @@ bool Renderer::InitializeImGui() pconfig.vertex_format = m_imgui_vertex_format.get(); pconfig.vertex_shader = vertex_shader.get(); pconfig.pixel_shader = pixel_shader.get(); - pconfig.rasterization_state.hex = RenderState::GetNoCullRasterizationState().hex; - pconfig.rasterization_state.primitive = PrimitiveType::Triangles; - pconfig.depth_state.hex = RenderState::GetNoDepthTestingDepthStencilState().hex; - pconfig.blending_state.hex = RenderState::GetNoBlendingBlendState().hex; + pconfig.rasterization_state = + RenderState::GetCullBackFaceRasterizationState(PrimitiveType::Triangles); + pconfig.depth_state = RenderState::GetNoDepthTestingDepthState(); + pconfig.blending_state = RenderState::GetNoBlendingBlendState(); pconfig.blending_state.blendenable = true; pconfig.blending_state.srcfactor = BlendMode::SRCALPHA; pconfig.blending_state.dstfactor = BlendMode::INVSRCALPHA; @@ -752,7 +998,7 @@ bool Renderer::InitializeImGui() io.Fonts->GetTexDataAsRGBA32(&font_tex_pixels, &font_tex_width, &font_tex_height); TextureConfig font_tex_config(font_tex_width, font_tex_height, 1, 1, 1, - AbstractTextureFormat::RGBA8, false); + AbstractTextureFormat::RGBA8, 0); std::unique_ptr font_tex = CreateTexture(font_tex_config); if (!font_tex) { @@ -799,10 +1045,8 @@ void Renderer::BeginImGuiFrame() ImGui::NewFrame(); } -void Renderer::RenderImGui() +void Renderer::DrawImGui() { - ImGui::Render(); - ImDrawData* draw_data = ImGui::GetDrawData(); if (!draw_data) return; @@ -842,9 +1086,11 @@ void Renderer::RenderImGui() continue; } - SetScissorRect(MathUtil::Rectangle( - static_cast(cmd.ClipRect.x), static_cast(cmd.ClipRect.y), - static_cast(cmd.ClipRect.z), static_cast(cmd.ClipRect.w))); + SetScissorRect(ConvertFramebufferRectangle( + MathUtil::Rectangle( + static_cast(cmd.ClipRect.x), static_cast(cmd.ClipRect.y), + static_cast(cmd.ClipRect.z), static_cast(cmd.ClipRect.w)), + m_current_framebuffer)); SetTexture(0, reinterpret_cast(cmd.TextureId)); DrawIndexed(base_index, cmd.ElemCount, base_vertex); base_index += cmd.ElemCount; @@ -859,7 +1105,10 @@ std::unique_lock Renderer::GetImGuiLock() void Renderer::BeginUIFrame() { - ResetAPIState(); + if (IsHeadless()) + return; + + BeginUtilityDrawing(); BindBackbuffer({0.0f, 0.0f, 0.0f, 1.0f}); } @@ -867,16 +1116,19 @@ void Renderer::EndUIFrame() { { auto lock = GetImGuiLock(); - RenderImGui(); + ImGui::Render(); } + if (!IsHeadless()) { + DrawImGui(); + std::lock_guard guard(m_swap_mutex); PresentBackbuffer(); + EndUtilityDrawing(); } BeginImGuiFrame(); - RestoreAPIState(); } void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc, @@ -946,32 +1198,34 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const // with the loader, and it has not been unmapped yet. Force a pipeline flush to avoid this. g_vertex_manager->Flush(); - // Render the XFB to the screen. - ResetAPIState(); - BindBackbuffer({0.0f, 0.0f, 0.0f, 1.0f}); - UpdateDrawRectangle(); - RenderXFBToScreen(xfb_entry->texture.get(), xfb_rect); - - // Hold the imgui lock while we're presenting. - // It's only to prevent races on inputs anyway, at this point. + // Render any UI elements to the draw list. { auto lock = GetImGuiLock(); DrawDebugText(); OSD::DrawMessages(); - - RenderImGui(); + ImGui::Render(); } - // Present to the window system. + // Render the XFB to the screen. + BeginUtilityDrawing(); + if (!IsHeadless()) { - std::lock_guard guard(m_swap_mutex); - PresentBackbuffer(); - } + BindBackbuffer({{0.0f, 0.0f, 0.0f, 1.0f}}); + UpdateDrawRectangle(); + RenderXFBToScreen(xfb_entry->texture.get(), xfb_rect); + DrawImGui(); - // Update the window size based on the frame that was just rendered. - // Due to depending on guest state, we need to call this every frame. - SetWindowSize(texture_config.width, texture_config.height); + // Present to the window system. + { + std::lock_guard guard(m_swap_mutex); + PresentBackbuffer(); + } + + // Update the window size based on the frame that was just rendered. + // Due to depending on guest state, we need to call this every frame. + SetWindowSize(texture_config.width, texture_config.height); + } m_fps_counter.Update(); @@ -984,12 +1238,11 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const if (IsFrameDumping()) DumpCurrentFrame(); - frameCount++; - GFX_DEBUGGER_PAUSE_AT(NEXT_FRAME, true); - // Begin new frame + m_frame_count++; stats.ResetFrame(); g_shader_cache->RetrieveAsyncShaders(); + g_vertex_manager->OnEndFrame(); BeginImGuiFrame(); // We invalidate the pipeline object at the start of the frame. @@ -1003,13 +1256,13 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const g_texture_cache->FlushEFBCopies(); // Remove stale EFB/XFB copies. - g_texture_cache->Cleanup(frameCount); + g_texture_cache->Cleanup(m_frame_count); // Handle any config changes, this gets propogated to the backend. CheckForConfigChanges(); g_Config.iSaveTargetId = 0; - RestoreAPIState(); + EndUtilityDrawing(); Core::Callback_VideoCopiedToXFB(true); } @@ -1028,6 +1281,24 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const } } +void Renderer::RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) +{ + const auto target_rc = GetTargetRectangle(); + if (g_ActiveConfig.stereo_mode == StereoMode::SBS || + g_ActiveConfig.stereo_mode == StereoMode::TAB) + { + TargetRectangle left_rc, right_rc; + std::tie(left_rc, right_rc) = ConvertStereoRectangle(target_rc); + + m_post_processor->BlitFromTexture(left_rc, rc, texture, 0); + m_post_processor->BlitFromTexture(right_rc, rc, texture, 1); + } + else + { + m_post_processor->BlitFromTexture(target_rc, rc, texture, 0); + } +} + bool Renderer::IsFrameDumping() { if (m_screenshot_request.IsSet()) @@ -1040,15 +1311,6 @@ bool Renderer::IsFrameDumping() } void Renderer::DumpCurrentFrame() -{ - // Scale/render to frame dump texture. - RenderFrameDump(); - - // Queue a readback for the next frame. - QueueFrameDumpReadback(); -} - -void Renderer::RenderFrameDump() { int target_width, target_height; if (!g_ActiveConfig.bInternalResolutionFrameDumps && !IsHeadless()) @@ -1063,50 +1325,76 @@ void Renderer::RenderFrameDump() m_last_xfb_texture->GetConfig().width, m_last_xfb_texture->GetConfig().height); } - // Ensure framebuffer exists (we lazily allocate it in case frame dumping isn't used). - // Or, resize texture if it isn't large enough to accommodate the current frame. - if (!m_frame_dump_render_texture || - m_frame_dump_render_texture->GetConfig().width != static_cast(target_width) || - m_frame_dump_render_texture->GetConfig().height != static_cast(target_height)) + // We only need to render a copy if we need to stretch/scale the XFB copy. + const AbstractTexture* source_tex = m_last_xfb_texture; + MathUtil::Rectangle source_rect = m_last_xfb_region; + if (source_rect.GetWidth() != target_width || source_rect.GetHeight() != target_height) { - // Recreate texture objects. Release before creating so we don't temporarily use twice the RAM. - TextureConfig config(target_width, target_height, 1, 1, 1, AbstractTextureFormat::RGBA8, true); - m_frame_dump_render_texture.reset(); - m_frame_dump_render_texture = CreateTexture(config); - ASSERT(m_frame_dump_render_texture); + if (!CheckFrameDumpRenderTexture(target_width, target_height)) + return; + + source_tex = m_frame_dump_render_texture.get(); + source_rect = MathUtil::Rectangle(0, 0, target_width, target_height); + ScaleTexture(m_frame_dump_render_framebuffer.get(), source_rect, m_last_xfb_texture, + m_last_xfb_region); } - // Scaling is likely to occur here, but if possible, do a bit-for-bit copy. - if (m_last_xfb_region.GetWidth() != target_width || - m_last_xfb_region.GetHeight() != target_height) - { - m_frame_dump_render_texture->ScaleRectangleFromTexture( - m_last_xfb_texture, m_last_xfb_region, EFBRectangle{0, 0, target_width, target_height}); - } - else - { - m_frame_dump_render_texture->CopyRectangleFromTexture( - m_last_xfb_texture, m_last_xfb_region, 0, 0, - EFBRectangle{0, 0, target_width, target_height}, 0, 0); - } -} - -void Renderer::QueueFrameDumpReadback() -{ // Index 0 was just sent to AVI dump. Swap with the second texture. if (m_frame_dump_readback_textures[0]) std::swap(m_frame_dump_readback_textures[0], m_frame_dump_readback_textures[1]); - std::unique_ptr& rbtex = m_frame_dump_readback_textures[0]; - if (!rbtex || rbtex->GetConfig() != m_frame_dump_render_texture->GetConfig()) - { - rbtex = CreateStagingTexture(StagingTextureType::Readback, - m_frame_dump_render_texture->GetConfig()); - } + if (!CheckFrameDumpReadbackTexture(target_width, target_height)) + return; + const auto converted_region = + ConvertFramebufferRectangle(source_rect, source_tex->GetWidth(), source_tex->GetHeight()); + m_frame_dump_readback_textures[0]->CopyFromTexture( + source_tex, converted_region, 0, 0, + MathUtil::Rectangle(0, 0, target_width, target_height)); m_last_frame_state = AVIDump::FetchState(m_last_xfb_ticks); m_last_frame_exported = true; - rbtex->CopyFromTexture(m_frame_dump_render_texture.get(), 0, 0); +} + +bool Renderer::CheckFrameDumpRenderTexture(u32 target_width, u32 target_height) +{ + // Ensure framebuffer exists (we lazily allocate it in case frame dumping isn't used). + // Or, resize texture if it isn't large enough to accommodate the current frame. + if (m_frame_dump_render_texture && m_frame_dump_render_texture->GetWidth() == target_width && + m_frame_dump_render_texture->GetHeight() == target_height) + { + return true; + } + + // Recreate texture, but release before creating so we don't temporarily use twice the RAM. + m_frame_dump_render_framebuffer.reset(); + m_frame_dump_render_texture.reset(); + m_frame_dump_render_texture = + CreateTexture(TextureConfig(target_width, target_height, 1, 1, 1, + AbstractTextureFormat::RGBA8, AbstractTextureFlag_RenderTarget)); + if (!m_frame_dump_render_texture) + { + PanicAlert("Failed to allocate frame dump render texture"); + return false; + } + m_frame_dump_render_framebuffer = CreateFramebuffer(m_frame_dump_render_texture.get(), nullptr); + ASSERT(m_frame_dump_render_framebuffer); + return true; +} + +bool Renderer::CheckFrameDumpReadbackTexture(u32 target_width, u32 target_height) +{ + std::unique_ptr& rbtex = m_frame_dump_readback_textures[0]; + if (rbtex && rbtex->GetWidth() == target_width && rbtex->GetHeight() == target_height) + return true; + + rbtex.reset(); + rbtex = CreateStagingTexture( + StagingTextureType::Readback, + TextureConfig(target_width, target_height, 1, 1, 1, AbstractTextureFormat::RGBA8, 0)); + if (!rbtex) + return false; + + return true; } void Renderer::FlushFrameDump() @@ -1151,6 +1439,7 @@ void Renderer::ShutdownFrameDumping() m_frame_dump_start.Set(); if (m_frame_dump_thread.joinable()) m_frame_dump_thread.join(); + m_frame_dump_render_framebuffer.reset(); m_frame_dump_render_texture.reset(); for (auto& tex : m_frame_dump_readback_textures) tex.reset(); diff --git a/Source/Core/VideoCommon/RenderBase.h b/Source/Core/VideoCommon/RenderBase.h index 45a82c1968..deaaa89aa9 100644 --- a/Source/Core/VideoCommon/RenderBase.h +++ b/Source/Core/VideoCommon/RenderBase.h @@ -41,22 +41,26 @@ class AbstractShader; class AbstractTexture; class AbstractStagingTexture; class NativeVertexFormat; -class PostProcessingShaderImplementation; struct TextureConfig; struct ComputePipelineConfig; struct AbstractPipelineConfig; +struct PortableVertexDeclaration; enum class ShaderStage; enum class EFBAccessType; +enum class EFBReinterpretType; enum class StagingTextureType; +namespace VideoCommon +{ +class PostProcessing; +} + struct EfbPokeData { u16 x, y; u32 data; }; -extern int frameCount; - // Renderer really isn't a very good name for this class - it's more like "Misc". // The long term goal is to get rid of this class and replace it with others that make // more sense. @@ -78,37 +82,38 @@ public: virtual void SetScissorRect(const MathUtil::Rectangle& rc) {} virtual void SetTexture(u32 index, const AbstractTexture* texture) {} virtual void SetSamplerState(u32 index, const SamplerState& state) {} + virtual void SetComputeImageTexture(AbstractTexture* texture, bool read, bool write) {} virtual void UnbindTexture(const AbstractTexture* texture) {} - virtual void SetInterlacingMode() {} virtual void SetViewport(float x, float y, float width, float height, float near_depth, float far_depth) { } virtual void SetFullscreen(bool enable_fullscreen) {} virtual bool IsFullscreen() const { return false; } - virtual void ApplyState() {} - virtual void RestoreState() {} - virtual void ResetAPIState() {} - virtual void RestoreAPIState() {} + virtual void BeginUtilityDrawing(); + virtual void EndUtilityDrawing(); virtual std::unique_ptr CreateTexture(const TextureConfig& config) = 0; virtual std::unique_ptr CreateStagingTexture(StagingTextureType type, const TextureConfig& config) = 0; virtual std::unique_ptr - CreateFramebuffer(const AbstractTexture* color_attachment, - const AbstractTexture* depth_attachment) = 0; + CreateFramebuffer(AbstractTexture* color_attachment, AbstractTexture* depth_attachment) = 0; // Framebuffer operations. - virtual void SetFramebuffer(const AbstractFramebuffer* framebuffer) {} - virtual void SetAndDiscardFramebuffer(const AbstractFramebuffer* framebuffer) {} - virtual void SetAndClearFramebuffer(const AbstractFramebuffer* framebuffer, - const ClearColor& color_value = {}, float depth_value = 0.0f) - { - } + virtual void SetFramebuffer(AbstractFramebuffer* framebuffer); + virtual void SetAndDiscardFramebuffer(AbstractFramebuffer* framebuffer); + virtual void SetAndClearFramebuffer(AbstractFramebuffer* framebuffer, + const ClearColor& color_value = {}, float depth_value = 0.0f); // Drawing with currently-bound pipeline state. virtual void Draw(u32 base_vertex, u32 num_vertices) {} virtual void DrawIndexed(u32 base_index, u32 num_indices, u32 base_vertex) {} + // Dispatching compute shaders with currently-bound state. + virtual void DispatchComputeShader(const AbstractShader* shader, u32 groups_x, u32 groups_y, + u32 groups_z) + { + } + // Binds the backbuffer for rendering. The buffer will be cleared immediately after binding. // This is where any window size changes are detected, therefore m_backbuffer_width and/or // m_backbuffer_height may change after this function returns. @@ -122,12 +127,15 @@ public: CreateShaderFromSource(ShaderStage stage, const char* source, size_t length) = 0; virtual std::unique_ptr CreateShaderFromBinary(ShaderStage stage, const void* data, size_t length) = 0; + virtual std::unique_ptr + CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) = 0; virtual std::unique_ptr CreatePipeline(const AbstractPipelineConfig& config) = 0; + std::unique_ptr CreateShaderFromSource(ShaderStage stage, + const std::string& source); + + AbstractFramebuffer* GetCurrentFramebuffer() const { return m_current_framebuffer; } - const AbstractFramebuffer* GetCurrentFramebuffer() const { return m_current_framebuffer; } - u32 GetCurrentFramebufferWidth() const { return m_current_framebuffer_width; } - u32 GetCurrentFramebufferHeight() const { return m_current_framebuffer_height; } // Ideal internal resolution - multiple of the native EFB resolution int GetTargetWidth() const { return m_target_width; } int GetTargetHeight() const { return m_target_height; } @@ -137,10 +145,27 @@ public: float GetBackbufferScale() const { return m_backbuffer_scale; } void SetWindowSize(int width, int height); - // EFB coordinate conversion functions + // Sets viewport and scissor to the specified rectangle. rect is assumed to be in framebuffer + // coordinates, i.e. lower-left origin in OpenGL. + void SetViewportAndScissor(const MathUtil::Rectangle& rect, float min_depth = 0.0f, + float max_depth = 1.0f); + // Scales a GPU texture using a copy shader. + virtual void ScaleTexture(AbstractFramebuffer* dst_framebuffer, + const MathUtil::Rectangle& dst_rect, + const AbstractTexture* src_texture, + const MathUtil::Rectangle& src_rect); + + // Converts an upper-left to lower-left if required by the backend, optionally + // clamping to the framebuffer size. + MathUtil::Rectangle ConvertFramebufferRectangle(const MathUtil::Rectangle& rect, + u32 fb_width, u32 fb_height); + MathUtil::Rectangle ConvertFramebufferRectangle(const MathUtil::Rectangle& rect, + const AbstractFramebuffer* framebuffer); + + // EFB coordinate conversion functions // Use this to convert a whole native EFB rect to backbuffer coordinates - virtual TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) = 0; + TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc); const TargetRectangle& GetTargetRectangle() const { return m_target_rectangle; } float CalculateDrawAspectRatio() const; @@ -170,18 +195,20 @@ public: bool InitializeImGui(); virtual void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, - u32 color, u32 z) = 0; - virtual void ReinterpretPixelData(unsigned int convtype) = 0; + u32 color, u32 z); + virtual void ReinterpretPixelData(EFBReinterpretType convtype); void RenderToXFB(u32 xfbAddr, const EFBRectangle& sourceRc, u32 fbStride, u32 fbHeight, float Gamma = 1.0f); - virtual u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) = 0; - virtual void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points) = 0; + virtual u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data); + virtual void PokeEFB(EFBAccessType type, const EfbPokeData* points, size_t num_points); virtual u16 BBoxRead(int index) = 0; virtual void BBoxWrite(int index, u16 value) = 0; + virtual void BBoxFlush() {} virtual void Flush() {} + virtual void WaitForGPUIdle() {} // Finish up the current frame, print some stats void Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc, @@ -189,14 +216,14 @@ public: // Draws the specified XFB buffer to the screen, performing any post-processing. // Assumes that the backbuffer has already been bound and cleared. - virtual void RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc) {} + virtual void RenderXFBToScreen(const AbstractTexture* texture, const EFBRectangle& rc); // Called when the configuration changes, and backend structures need to be updated. virtual void OnConfigChanged(u32 bits) {} PEControl::PixelFormat GetPrevPixelFormat() const { return m_prev_efb_format; } void StorePixelFormat(PEControl::PixelFormat new_format) { m_prev_efb_format = new_format; } - PostProcessingShaderImplementation* GetPostProcessor() const { return m_post_processor.get(); } + VideoCommon::PostProcessing* GetPostProcessor() const { return m_post_processor.get(); } // Final surface changing // This is called when the surface is resized (WX) or the window changes (Android). void ChangeSurface(void* new_surface_handle); @@ -246,12 +273,10 @@ protected: // Renders ImGui windows to the currently-bound framebuffer. // Should be called with the ImGui lock held. - void RenderImGui(); + void DrawImGui(); - // TODO: Remove the width/height parameters once we make the EFB an abstract framebuffer. - const AbstractFramebuffer* m_current_framebuffer = nullptr; - u32 m_current_framebuffer_width = 1; - u32 m_current_framebuffer_height = 1; + AbstractFramebuffer* m_current_framebuffer = nullptr; + const AbstractPipeline* m_current_pipeline = nullptr; Common::Flag m_screenshot_request; Common::Event m_screenshot_completed; @@ -260,8 +285,8 @@ protected: bool m_aspect_wide = false; // The framebuffer size - int m_target_width = 0; - int m_target_height = 0; + int m_target_width = 1; + int m_target_height = 1; // Backbuffer (window) size and render area int m_backbuffer_width = 0; @@ -269,10 +294,11 @@ protected: float m_backbuffer_scale = 1.0f; AbstractTextureFormat m_backbuffer_format = AbstractTextureFormat::Undefined; TargetRectangle m_target_rectangle = {}; + int m_frame_count = 0; FPSCounter m_fps_counter; - std::unique_ptr m_post_processor; + std::unique_ptr m_post_processor; void* m_new_surface_handle = nullptr; Common::Flag m_surface_changed; @@ -315,6 +341,7 @@ private: // Texture used for screenshot/frame dumping std::unique_ptr m_frame_dump_render_texture; + std::unique_ptr m_frame_dump_render_framebuffer; std::array, 2> m_frame_dump_readback_textures; AVIDump::Frame m_last_frame_state; bool m_last_frame_exported = false; @@ -340,15 +367,15 @@ private: bool IsFrameDumping(); - // Asynchronously encodes the current staging texture to the frame dump. + // Checks that the frame dump render texture exists and is the correct size. + bool CheckFrameDumpRenderTexture(u32 target_width, u32 target_height); + + // Checks that the frame dump readback texture exists and is the correct size. + bool CheckFrameDumpReadbackTexture(u32 target_width, u32 target_height); + + // Fills the frame dump staging texture with the current XFB texture. void DumpCurrentFrame(); - // Fills the frame dump render texture with the current XFB texture. - void RenderFrameDump(); - - // Queues the current frame for readback, which will be written to AVI next frame. - void QueueFrameDumpReadback(); - // Asynchronously encodes the specified pointer of frame data to the frame dump. void DumpFrameData(const u8* data, int w, int h, int stride, const AVIDump::Frame& state); diff --git a/Source/Core/VideoCommon/RenderState.cpp b/Source/Core/VideoCommon/RenderState.cpp index b61b031b04..04d8804f0c 100644 --- a/Source/Core/VideoCommon/RenderState.cpp +++ b/Source/Core/VideoCommon/RenderState.cpp @@ -6,6 +6,7 @@ #include #include #include "VideoCommon/SamplerCommon.h" +#include "VideoCommon/TextureConfig.h" void RasterizationState::Generate(const BPMemory& bp, PrimitiveType primitive_type) { @@ -23,6 +24,12 @@ RasterizationState& RasterizationState::operator=(const RasterizationState& rhs) return *this; } +FramebufferState& FramebufferState::operator=(const FramebufferState& rhs) +{ + hex = rhs.hex; + return *this; +} + void DepthState::Generate(const BPMemory& bp) { testenable = bp.zmode.testenable.Value(); @@ -206,10 +213,19 @@ RasterizationState GetInvalidRasterizationState() return state; } -RasterizationState GetNoCullRasterizationState() +RasterizationState GetNoCullRasterizationState(PrimitiveType primitive) { RasterizationState state = {}; state.cullmode = GenMode::CULL_NONE; + state.primitive = primitive; + return state; +} + +RasterizationState GetCullBackFaceRasterizationState(PrimitiveType primitive) +{ + RasterizationState state = {}; + state.cullmode = GenMode::CULL_BACK; + state.primitive = primitive; return state; } @@ -220,7 +236,7 @@ DepthState GetInvalidDepthState() return state; } -DepthState GetNoDepthTestingDepthStencilState() +DepthState GetNoDepthTestingDepthState() { DepthState state = {}; state.testenable = false; @@ -229,6 +245,15 @@ DepthState GetNoDepthTestingDepthStencilState() return state; } +DepthState GetAlwaysWriteDepthState() +{ + DepthState state = {}; + state.testenable = true; + state.updateenable = true; + state.func = ZMode::ALWAYS; + return state; +} + BlendingState GetInvalidBlendingState() { BlendingState state; @@ -251,6 +276,21 @@ BlendingState GetNoBlendingBlendState() return state; } +BlendingState GetNoColorWriteBlendState() +{ + BlendingState state = {}; + state.usedualsrc = false; + state.blendenable = false; + state.srcfactor = BlendMode::ONE; + state.srcfactoralpha = BlendMode::ONE; + state.dstfactor = BlendMode::ZERO; + state.dstfactoralpha = BlendMode::ZERO; + state.logicopenable = false; + state.colorupdate = false; + state.alphaupdate = false; + return state; +} + SamplerState GetInvalidSamplerState() { SamplerState state; @@ -287,4 +327,20 @@ SamplerState GetLinearSamplerState() state.anisotropic_filtering = false; return state; } + +FramebufferState GetColorFramebufferState(AbstractTextureFormat format) +{ + FramebufferState state = {}; + state.color_texture_format = format; + state.depth_texture_format = AbstractTextureFormat::Undefined; + state.per_sample_shading = false; + state.samples = 1; + return state; } + +FramebufferState GetRGBA8FramebufferState() +{ + return GetColorFramebufferState(AbstractTextureFormat::RGBA8); +} + +} // namespace RenderState diff --git a/Source/Core/VideoCommon/RenderState.h b/Source/Core/VideoCommon/RenderState.h index 5cd0ba7470..dc0676ccc4 100644 --- a/Source/Core/VideoCommon/RenderState.h +++ b/Source/Core/VideoCommon/RenderState.h @@ -9,6 +9,8 @@ #include "VideoCommon/BPMemory.h" #include "VideoCommon/BPStructs.h" +enum class AbstractTextureFormat : u32; + enum class PrimitiveType : u32 { Points, @@ -32,6 +34,20 @@ union RasterizationState u32 hex; }; +union FramebufferState +{ + BitField<0, 8, AbstractTextureFormat> color_texture_format; + BitField<8, 8, AbstractTextureFormat> depth_texture_format; + BitField<16, 8, u32> samples; + BitField<24, 1, u32> per_sample_shading; + + bool operator==(const FramebufferState& rhs) const { return hex == rhs.hex; } + bool operator!=(const FramebufferState& rhs) const { return hex != rhs.hex; } + FramebufferState& operator=(const FramebufferState& rhs); + + u32 hex; +}; + union DepthState { void Generate(const BPMemory& bp); @@ -114,12 +130,17 @@ union SamplerState namespace RenderState { RasterizationState GetInvalidRasterizationState(); -RasterizationState GetNoCullRasterizationState(); +RasterizationState GetNoCullRasterizationState(PrimitiveType primitive); +RasterizationState GetCullBackFaceRasterizationState(PrimitiveType primitive); DepthState GetInvalidDepthState(); -DepthState GetNoDepthTestingDepthStencilState(); +DepthState GetNoDepthTestingDepthState(); +DepthState GetAlwaysWriteDepthState(); BlendingState GetInvalidBlendingState(); BlendingState GetNoBlendingBlendState(); +BlendingState GetNoColorWriteBlendState(); SamplerState GetInvalidSamplerState(); SamplerState GetPointSamplerState(); SamplerState GetLinearSamplerState(); -} +FramebufferState GetColorFramebufferState(AbstractTextureFormat format); +FramebufferState GetRGBA8FramebufferState(); +} // namespace RenderState diff --git a/Source/Core/VideoCommon/ShaderCache.cpp b/Source/Core/VideoCommon/ShaderCache.cpp index 3b0c110f96..b42587a2d4 100644 --- a/Source/Core/VideoCommon/ShaderCache.cpp +++ b/Source/Core/VideoCommon/ShaderCache.cpp @@ -9,7 +9,8 @@ #include "Common/MsgHandler.h" #include "Core/ConfigManager.h" -#include "VideoCommon/FramebufferManagerBase.h" +#include "VideoCommon/FramebufferManager.h" +#include "VideoCommon/FramebufferShaderGen.h" #include "VideoCommon/RenderBase.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/VertexLoaderManager.h" @@ -22,17 +23,26 @@ std::unique_ptr g_shader_cache; namespace VideoCommon { ShaderCache::ShaderCache() = default; -ShaderCache::~ShaderCache() = default; +ShaderCache::~ShaderCache() +{ + ClearShaderCaches(); + ClearPipelineCaches(); +} bool ShaderCache::Initialize() { m_api_type = g_ActiveConfig.backend_info.api_type; m_host_config = ShaderHostConfig::GetCurrent(); - m_efb_depth_format = FramebufferManagerBase::GetEFBDepthFormat(); - m_efb_multisamples = g_ActiveConfig.iMultisamples; - // Create the async compiler, and start the worker threads. + if (!CompileSharedPipelines()) + return false; + m_async_shader_compiler = g_renderer->CreateAsyncShaderCompiler(); + return true; +} + +void ShaderCache::InitializeShaderCache() +{ m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderPrecompilerThreads()); // Load shader and UID caches. @@ -53,17 +63,6 @@ bool ShaderCache::Initialize() // Switch to the runtime shader compiler thread configuration. m_async_shader_compiler->ResizeWorkerThreads(g_ActiveConfig.GetShaderCompilerThreads()); - return true; -} - -void ShaderCache::SetHostConfig(const ShaderHostConfig& host_config, u32 efb_multisamples) -{ - if (m_host_config.bits == host_config.bits && m_efb_multisamples == efb_multisamples) - return; - - m_host_config = host_config; - m_efb_multisamples = efb_multisamples; - Reload(); } void ShaderCache::Reload() @@ -99,8 +98,6 @@ void ShaderCache::Shutdown() // until everything has finished compiling. m_async_shader_compiler->StopWorkerThreads(); ClosePipelineUIDCache(); - ClearShaderCaches(); - ClearPipelineCaches(); } const AbstractPipeline* ShaderCache::GetPipelineForUid(const GXPipelineUid& uid) @@ -445,6 +442,11 @@ bool ShaderCache::NeedsGeometryShader(const GeometryShaderUid& uid) const return m_host_config.backend_geometry_shaders && !uid.GetUidData()->IsPassthrough(); } +bool ShaderCache::UseGeometryShaderForEFBCopies() const +{ + return m_host_config.backend_geometry_shaders && m_host_config.stereo; +} + AbstractPipelineConfig ShaderCache::GetGXPipelineConfig( const NativeVertexFormat* vertex_format, const AbstractShader* vertex_shader, const AbstractShader* geometry_shader, const AbstractShader* pixel_shader, @@ -460,10 +462,7 @@ AbstractPipelineConfig ShaderCache::GetGXPipelineConfig( config.rasterization_state = rasterization_state; config.depth_state = depth_state; config.blending_state = blending_state; - config.framebuffer_state.color_texture_format = AbstractTextureFormat::RGBA8; - config.framebuffer_state.depth_texture_format = m_efb_depth_format; - config.framebuffer_state.per_sample_shading = m_host_config.ssaa; - config.framebuffer_state.samples = m_efb_multisamples; + config.framebuffer_state = g_framebuffer_manager->GetEFBFramebufferState(); return config; } @@ -967,8 +966,9 @@ void ShaderCache::QueueUberShaderPipelines() config.vs_uid = vs_uid; config.gs_uid = gs_uid; config.ps_uid = ps_uid; - config.rasterization_state = RenderState::GetNoCullRasterizationState(); - config.depth_state = RenderState::GetNoDepthTestingDepthStencilState(); + config.rasterization_state = + RenderState::GetCullBackFaceRasterizationState(PrimitiveType::TriangleStrip); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); config.blending_state = RenderState::GetNoBlendingBlendState(); auto iter = m_gx_uber_pipeline_cache.find(config); @@ -998,24 +998,172 @@ void ShaderCache::QueueUberShaderPipelines() }); } -std::string ShaderCache::GetUtilityShaderHeader() const +const AbstractPipeline* +ShaderCache::GetEFBCopyToVRAMPipeline(const TextureConversionShaderGen::TCShaderUid& uid) { - std::stringstream ss; + auto iter = m_efb_copy_to_vram_pipelines.find(uid); + if (iter != m_efb_copy_to_vram_pipelines.end()) + return iter->second.get(); - ss << "#define API_D3D " << (m_api_type == APIType::D3D ? 1 : 0) << "\n"; - ss << "#define API_OPENGL " << (m_api_type == APIType::OpenGL ? 1 : 0) << "\n"; - ss << "#define API_VULKAN " << (m_api_type == APIType::Vulkan ? 1 : 0) << "\n"; - - if (m_efb_multisamples > 1) + auto shader_code = TextureConversionShaderGen::GeneratePixelShader(m_api_type, uid.GetUidData()); + auto shader = g_renderer->CreateShaderFromSource(ShaderStage::Pixel, shader_code.GetBuffer()); + if (!shader) { - ss << "#define MSAA_ENABLED 1" << std::endl; - ss << "#define MSAA_SAMPLES " << m_efb_multisamples << std::endl; - if (m_host_config.ssaa) - ss << "#define SSAA_ENABLED 1" << std::endl; + m_efb_copy_to_vram_pipelines.emplace(uid, nullptr); + return nullptr; } - ss << "#define EFB_LAYERS " << (m_host_config.stereo ? 2 : 1) << std::endl; - - return ss.str(); + AbstractPipelineConfig config = {}; + config.vertex_format = nullptr; + config.vertex_shader = m_efb_copy_vertex_shader.get(); + config.geometry_shader = + UseGeometryShaderForEFBCopies() ? m_texcoord_geometry_shader.get() : nullptr; + config.pixel_shader = shader.get(); + config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); + config.blending_state = RenderState::GetNoBlendingBlendState(); + config.framebuffer_state = RenderState::GetRGBA8FramebufferState(); + config.usage = AbstractPipelineUsage::Utility; + auto iiter = m_efb_copy_to_vram_pipelines.emplace(uid, g_renderer->CreatePipeline(config)); + return iiter.first->second.get(); } + +const AbstractPipeline* ShaderCache::GetEFBCopyToRAMPipeline(const EFBCopyParams& uid) +{ + auto iter = m_efb_copy_to_ram_pipelines.find(uid); + if (iter != m_efb_copy_to_ram_pipelines.end()) + return iter->second.get(); + + auto shader_code = TextureConversionShaderTiled::GenerateEncodingShader(uid, m_api_type); + auto shader = + g_renderer->CreateShaderFromSource(ShaderStage::Pixel, shader_code, std::strlen(shader_code)); + if (!shader) + { + m_efb_copy_to_ram_pipelines.emplace(uid, nullptr); + return nullptr; + } + + AbstractPipelineConfig config = {}; + config.vertex_format = nullptr; + config.vertex_shader = m_screen_quad_vertex_shader.get(); + config.geometry_shader = + UseGeometryShaderForEFBCopies() ? m_texcoord_geometry_shader.get() : nullptr; + config.pixel_shader = shader.get(); + config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); + config.blending_state = RenderState::GetNoBlendingBlendState(); + config.framebuffer_state = RenderState::GetColorFramebufferState(AbstractTextureFormat::BGRA8); + config.usage = AbstractPipelineUsage::Utility; + auto iiter = m_efb_copy_to_ram_pipelines.emplace(uid, g_renderer->CreatePipeline(config)); + return iiter.first->second.get(); +} + +bool ShaderCache::CompileSharedPipelines() +{ + m_screen_quad_vertex_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Vertex, FramebufferShaderGen::GenerateScreenQuadVertexShader()); + m_texture_copy_vertex_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Vertex, FramebufferShaderGen::GenerateTextureCopyVertexShader()); + m_efb_copy_vertex_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Vertex, + TextureConversionShaderGen::GenerateVertexShader(m_api_type).GetBuffer()); + if (!m_screen_quad_vertex_shader || !m_texture_copy_vertex_shader || !m_efb_copy_vertex_shader) + return false; + + if (UseGeometryShaderForEFBCopies()) + { + m_texcoord_geometry_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Geometry, FramebufferShaderGen::GeneratePassthroughGeometryShader(1, 0)); + m_color_geometry_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Geometry, FramebufferShaderGen::GeneratePassthroughGeometryShader(0, 1)); + if (!m_texcoord_geometry_shader || !m_color_geometry_shader) + return false; + } + + m_texture_copy_pixel_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Pixel, FramebufferShaderGen::GenerateTextureCopyPixelShader()); + m_color_pixel_shader = g_renderer->CreateShaderFromSource( + ShaderStage::Pixel, FramebufferShaderGen::GenerateColorPixelShader()); + if (!m_texture_copy_pixel_shader || !m_color_pixel_shader) + return false; + + AbstractPipelineConfig config; + config.vertex_format = nullptr; + config.vertex_shader = m_texture_copy_vertex_shader.get(); + config.geometry_shader = nullptr; + config.pixel_shader = m_texture_copy_pixel_shader.get(); + config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles); + config.depth_state = RenderState::GetNoDepthTestingDepthState(); + config.blending_state = RenderState::GetNoBlendingBlendState(); + config.framebuffer_state = RenderState::GetRGBA8FramebufferState(); + config.usage = AbstractPipelineUsage::Utility; + m_copy_rgba8_pipeline = g_renderer->CreatePipeline(config); + if (!m_copy_rgba8_pipeline) + return false; + + if (UseGeometryShaderForEFBCopies()) + { + config.geometry_shader = m_texcoord_geometry_shader.get(); + m_rgba8_stereo_copy_pipeline = g_renderer->CreatePipeline(config); + if (!m_rgba8_stereo_copy_pipeline) + return false; + } + + if (m_host_config.backend_palette_conversion) + { + config.vertex_shader = m_screen_quad_vertex_shader.get(); + config.geometry_shader = nullptr; + + for (size_t i = 0; i < NUM_PALETTE_CONVERSION_SHADERS; i++) + { + auto shader = g_renderer->CreateShaderFromSource( + ShaderStage::Pixel, TextureConversionShaderTiled::GeneratePaletteConversionShader( + static_cast(i), m_api_type)); + if (!shader) + return false; + + config.pixel_shader = shader.get(); + m_palette_conversion_pipelines[i] = g_renderer->CreatePipeline(config); + if (!m_palette_conversion_pipelines[i]) + return false; + } + } + + return true; +} + +const AbstractPipeline* ShaderCache::GetPaletteConversionPipeline(TLUTFormat format) +{ + ASSERT(static_cast(format) < NUM_PALETTE_CONVERSION_SHADERS); + return m_palette_conversion_pipelines[static_cast(format)].get(); +} + +const AbstractShader* ShaderCache::GetTextureDecodingShader(TextureFormat format, + TLUTFormat palette_format) +{ + const auto key = std::make_pair(static_cast(format), static_cast(palette_format)); + auto iter = m_texture_decoding_shaders.find(key); + if (iter != m_texture_decoding_shaders.end()) + return iter->second.get(); + + std::string shader_source = + TextureConversionShaderTiled::GenerateDecodingShader(format, palette_format, APIType::OpenGL); + if (shader_source.empty()) + { + m_texture_decoding_shaders.emplace(key, nullptr); + return nullptr; + } + + std::unique_ptr shader = + g_renderer->CreateShaderFromSource(ShaderStage::Compute, shader_source); + if (!shader) + { + m_texture_decoding_shaders.emplace(key, nullptr); + return nullptr; + } + + auto iiter = m_texture_decoding_shaders.emplace(key, std::move(shader)); + return iiter.first->second.get(); +} + } // namespace VideoCommon diff --git a/Source/Core/VideoCommon/ShaderCache.h b/Source/Core/VideoCommon/ShaderCache.h index a91559c965..66caad93ac 100644 --- a/Source/Core/VideoCommon/ShaderCache.h +++ b/Source/Core/VideoCommon/ShaderCache.h @@ -25,12 +25,16 @@ #include "VideoCommon/GeometryShaderGen.h" #include "VideoCommon/PixelShaderGen.h" #include "VideoCommon/RenderState.h" +#include "VideoCommon/TextureCacheBase.h" +#include "VideoCommon/TextureConversionShader.h" +#include "VideoCommon/TextureConverterShaderGen.h" #include "VideoCommon/UberShaderPixel.h" #include "VideoCommon/UberShaderVertex.h" #include "VideoCommon/VertexShaderGen.h" class NativeVertexFormat; enum class AbstractTextureFormat : u32; +enum class TLUTFormat; namespace VideoCommon { @@ -44,8 +48,11 @@ public: bool Initialize(); void Shutdown(); - // Changes the shader host config. Shaders will be reloaded if there are changes. - void SetHostConfig(const ShaderHostConfig& host_config, u32 efb_multisamples); + // Compiles/loads cached shaders. + void InitializeShaderCache(); + + // Changes the shader host config. Shaders should be reloaded afterwards. + void SetHostConfig(const ShaderHostConfig& host_config) { m_host_config = host_config; } // Reloads/recreates all shaders and pipelines. void Reload(); @@ -53,9 +60,6 @@ public: // Retrieves all pending shaders/pipelines from the async compiler. void RetrieveAsyncShaders(); - // Get utility shader header based on current config. - std::string GetUtilityShaderHeader() const; - // Accesses ShaderGen shader caches const AbstractPipeline* GetPipelineForUid(const GXPipelineUid& uid); const AbstractPipeline* GetUberPipelineForUid(const GXUberPipelineUid& uid); @@ -64,7 +68,48 @@ public: // The optional will be empty if this pipeline is now background compiling. std::optional GetPipelineForUidAsync(const GXPipelineUid& uid); + // Shared shaders + const AbstractShader* GetScreenQuadVertexShader() const + { + return m_screen_quad_vertex_shader.get(); + } + const AbstractShader* GetTextureCopyVertexShader() const + { + return m_texture_copy_vertex_shader.get(); + } + const AbstractShader* GetEFBCopyVertexShader() const { return m_efb_copy_vertex_shader.get(); } + const AbstractShader* GetTexcoordGeometryShader() const + { + return m_texcoord_geometry_shader.get(); + } + const AbstractShader* GetTextureCopyPixelShader() const + { + return m_texture_copy_pixel_shader.get(); + } + const AbstractShader* GetColorGeometryShader() const { return m_color_geometry_shader.get(); } + const AbstractShader* GetColorPixelShader() const { return m_color_pixel_shader.get(); } + + // EFB copy to RAM/VRAM pipelines + const AbstractPipeline* + GetEFBCopyToVRAMPipeline(const TextureConversionShaderGen::TCShaderUid& uid); + const AbstractPipeline* GetEFBCopyToRAMPipeline(const EFBCopyParams& uid); + + // RGBA8 framebuffer copy pipelines + const AbstractPipeline* GetRGBA8CopyPipeline() const { return m_copy_rgba8_pipeline.get(); } + const AbstractPipeline* GetRGBA8StereoCopyPipeline() const + { + return m_rgba8_stereo_copy_pipeline.get(); + } + + // Palette texture conversion pipelines + const AbstractPipeline* GetPaletteConversionPipeline(TLUTFormat format); + + // Texture decoding compute shaders + const AbstractShader* GetTextureDecodingShader(TextureFormat format, TLUTFormat palette_format); + private: + static constexpr size_t NUM_PALETTE_CONVERSION_SHADERS = 3; + void WaitForAsyncCompiler(); void LoadShaderCaches(); void ClearShaderCaches(); @@ -74,6 +119,7 @@ private: void InvalidateCachedPipelines(); void ClearPipelineCaches(); void QueueUberShaderPipelines(); + bool CompileSharedPipelines(); // GX shader compiler methods std::unique_ptr CompileVertexShader(const VertexShaderUid& uid) const; @@ -93,6 +139,9 @@ private: const AbstractShader* CreateGeometryShader(const GeometryShaderUid& uid); bool NeedsGeometryShader(const GeometryShaderUid& uid) const; + // Should we use geometry shaders for EFB copies? + bool UseGeometryShaderForEFBCopies() const; + // GX pipeline compiler methods AbstractPipelineConfig GetGXPipelineConfig(const NativeVertexFormat* vertex_format, const AbstractShader* vertex_shader, @@ -130,10 +179,17 @@ private: // Configuration bits. APIType m_api_type = APIType::Nothing; ShaderHostConfig m_host_config = {}; - AbstractTextureFormat m_efb_depth_format; - u32 m_efb_multisamples = 1; std::unique_ptr m_async_shader_compiler; + // Shared shaders + std::unique_ptr m_screen_quad_vertex_shader; + std::unique_ptr m_texture_copy_vertex_shader; + std::unique_ptr m_efb_copy_vertex_shader; + std::unique_ptr m_texcoord_geometry_shader; + std::unique_ptr m_color_geometry_shader; + std::unique_ptr m_texture_copy_pixel_shader; + std::unique_ptr m_color_pixel_shader; + // GX Shader Caches template struct ShaderModuleCache @@ -157,6 +213,22 @@ private: std::map, bool>> m_gx_uber_pipeline_cache; File::IOFile m_gx_pipeline_uid_cache_file; + + // EFB copy to VRAM/RAM pipelines + std::map> + m_efb_copy_to_vram_pipelines; + std::map> m_efb_copy_to_ram_pipelines; + + // Copy pipeline for RGBA8 textures + std::unique_ptr m_copy_rgba8_pipeline; + std::unique_ptr m_rgba8_stereo_copy_pipeline; + + // Palette conversion pipelines + std::array, NUM_PALETTE_CONVERSION_SHADERS> + m_palette_conversion_pipelines; + + // Texture decoding shaders + std::map, std::unique_ptr> m_texture_decoding_shaders; }; } // namespace VideoCommon diff --git a/Source/Core/VideoCommon/ShaderGenCommon.cpp b/Source/Core/VideoCommon/ShaderGenCommon.cpp index 54d87691de..ee4c3d32f8 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.cpp +++ b/Source/Core/VideoCommon/ShaderGenCommon.cpp @@ -34,6 +34,7 @@ ShaderHostConfig ShaderHostConfig::GetCurrent() g_ActiveConfig.backend_info.bSupportsDynamicSamplerIndexing; bits.backend_shader_framebuffer_fetch = g_ActiveConfig.backend_info.bSupportsFramebufferFetch; bits.backend_logic_op = g_ActiveConfig.backend_info.bSupportsLogicOp; + bits.backend_palette_conversion = g_ActiveConfig.backend_info.bSupportsPaletteConversion; return bits; } diff --git a/Source/Core/VideoCommon/ShaderGenCommon.h b/Source/Core/VideoCommon/ShaderGenCommon.h index 83416d06d4..216f791df5 100644 --- a/Source/Core/VideoCommon/ShaderGenCommon.h +++ b/Source/Core/VideoCommon/ShaderGenCommon.h @@ -181,7 +181,8 @@ union ShaderHostConfig u32 backend_dynamic_sampler_indexing : 1; u32 backend_shader_framebuffer_fetch : 1; u32 backend_logic_op : 1; - u32 pad : 10; + u32 backend_palette_conversion : 1; + u32 pad : 9; }; static ShaderHostConfig GetCurrent(); @@ -216,7 +217,7 @@ template inline void GenerateVSOutputMembers(T& object, APIType api_type, u32 texgens, const ShaderHostConfig& host_config, const char* qualifier) { - DefineOutputMember(object, api_type, qualifier, "float4", "pos", -1, "POSITION"); + DefineOutputMember(object, api_type, qualifier, "float4", "pos", -1, "SV_Position"); DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 0, "COLOR", 0); DefineOutputMember(object, api_type, qualifier, "float4", "colors_", 1, "COLOR", 1); diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index 90abc01c65..3beaeedbf8 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -28,17 +28,21 @@ #include "Core/FifoPlayer/FifoRecorder.h" #include "Core/HW/Memmap.h" +#include "VideoCommon/AbstractFramebuffer.h" #include "VideoCommon/AbstractStagingTexture.h" #include "VideoCommon/BPMemory.h" -#include "VideoCommon/Debugger.h" -#include "VideoCommon/FramebufferManagerBase.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/HiresTextures.h" #include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/RenderBase.h" #include "VideoCommon/SamplerCommon.h" +#include "VideoCommon/ShaderCache.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/TextureCacheBase.h" +#include "VideoCommon/TextureConversionShader.h" +#include "VideoCommon/TextureConverterShaderGen.h" #include "VideoCommon/TextureDecoder.h" +#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" @@ -51,8 +55,9 @@ std::unique_ptr g_texture_cache; std::bitset<8> TextureCacheBase::valid_bind_points; -TextureCacheBase::TCacheEntry::TCacheEntry(std::unique_ptr tex) - : texture(std::move(tex)) +TextureCacheBase::TCacheEntry::TCacheEntry(std::unique_ptr tex, + std::unique_ptr fb) + : texture(std::move(tex)), framebuffer(std::move(fb)) { } @@ -89,6 +94,25 @@ TextureCacheBase::TextureCacheBase() InvalidateAllBindPoints(); } +TextureCacheBase::~TextureCacheBase() +{ + HiresTexture::Shutdown(); + Invalidate(); + Common::FreeAlignedMemory(temp); + temp = nullptr; +} + +bool TextureCacheBase::Initialize() +{ + if (!CreateUtilityTextures()) + { + PanicAlert("Failed to create utility textures."); + return false; + } + + return true; +} + void TextureCacheBase::Invalidate() { FlushEFBCopies(); @@ -108,14 +132,6 @@ void TextureCacheBase::Invalidate() texture_pool.clear(); } -TextureCacheBase::~TextureCacheBase() -{ - HiresTexture::Shutdown(); - Invalidate(); - Common::FreeAlignedMemory(temp); - temp = nullptr; -} - void TextureCacheBase::OnConfigChanged(VideoConfig& config) { if (config.bHiresTextures != backup_config.hires_textures || @@ -139,14 +155,6 @@ void TextureCacheBase::OnConfigChanged(VideoConfig& config) g_ActiveConfig.bTexFmtOverlayCenter); } - if ((config.stereo_mode != StereoMode::Off) != backup_config.stereo_3d || - config.bStereoEFBMonoDepth != backup_config.efb_mono_depth) - { - g_texture_cache->DeleteShaders(); - if (!g_texture_cache->CompileShaders()) - PanicAlert("Failed to recompile one or more texture conversion shaders."); - } - SetBackupConfig(config); } @@ -243,7 +251,7 @@ TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTForma { TextureConfig new_config = entry->texture->GetConfig(); new_config.levels = 1; - new_config.rendertarget = true; + new_config.flags |= AbstractTextureFlag_RenderTarget; TCacheEntry* decoded_entry = AllocateCacheEntry(new_config); if (!decoded_entry) @@ -279,29 +287,27 @@ void TextureCacheBase::ScaleTextureCacheEntryTo(TextureCacheBase::TCacheEntry* e return; } - TextureConfig newconfig; - newconfig.width = new_width; - newconfig.height = new_height; - newconfig.layers = entry->GetNumLayers(); - newconfig.rendertarget = true; - - std::unique_ptr new_texture = AllocateTexture(newconfig); - if (new_texture) + const TextureConfig newconfig(new_width, new_height, 1, entry->GetNumLayers(), 1, + AbstractTextureFormat::RGBA8, AbstractTextureFlag_RenderTarget); + std::optional new_texture = AllocateTexture(newconfig); + if (!new_texture) { - new_texture->ScaleRectangleFromTexture(entry->texture.get(), - entry->texture->GetConfig().GetRect(), - new_texture->GetConfig().GetRect()); - entry->texture.swap(new_texture); + ERROR_LOG(VIDEO, "Scaling failed due to texture allocation failure"); + return; + } - auto config = new_texture->GetConfig(); - // At this point new_texture has the old texture in it, - // we can potentially reuse this, so let's move it back to the pool - texture_pool.emplace(config, TexPoolEntry(std::move(new_texture))); - } - else - { - ERROR_LOG(VIDEO, "Scaling failed"); - } + // No need to convert the coordinates here since they'll be the same. + g_renderer->ScaleTexture(new_texture->framebuffer.get(), + new_texture->texture->GetConfig().GetRect(), entry->texture.get(), + entry->texture->GetConfig().GetRect()); + entry->texture.swap(new_texture->texture); + entry->framebuffer.swap(new_texture->framebuffer); + + // At this point new_texture has the old texture in it, + // we can potentially reuse this, so let's move it back to the pool + auto config = new_texture->texture->GetConfig(); + texture_pool.emplace( + config, TexPoolEntry(std::move(new_texture->texture), std::move(new_texture->framebuffer))); } TextureCacheBase::TCacheEntry* @@ -747,8 +753,6 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage) entry->frameCount = FRAMECOUNT_INVALID; bound_textures[stage] = entry; - GFX_DEBUGGER_PAUSE_AT(NEXT_TEXTURE_CHANGE, true); - // We need to keep track of invalided textures until they have actually been replaced or // re-loaded valid_bind_points.set(stage); @@ -1036,25 +1040,17 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo // banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since // there's no conversion between formats. In the future this could be extended with a separate // shader, however. - bool decode_on_gpu = !hires_tex && g_ActiveConfig.UseGPUTextureDecoding() && - g_texture_cache->SupportsGPUTextureDecode(texformat, tlutfmt) && - !(from_tmem && texformat == TextureFormat::RGBA8); + const bool decode_on_gpu = !hires_tex && g_ActiveConfig.UseGPUTextureDecoding() && + !(from_tmem && texformat == TextureFormat::RGBA8); // create the entry/texture - TextureConfig config; - config.width = width; - config.height = height; - config.levels = texLevels; - config.format = hires_tex ? hires_tex->GetFormat() : AbstractTextureFormat::RGBA8; - - ArbitraryMipmapDetector arbitrary_mip_detector; - + const TextureConfig config(width, height, texLevels, 1, 1, + hires_tex ? hires_tex->GetFormat() : AbstractTextureFormat::RGBA8, 0); TCacheEntry* entry = AllocateCacheEntry(config); - GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true); - if (!entry) return nullptr; + ArbitraryMipmapDetector arbitrary_mip_detector; const u8* tlut = &texMem[tlutaddr]; if (hires_tex) { @@ -1068,14 +1064,10 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo if (!hires_tex) { - if (decode_on_gpu) - { - u32 row_stride = bytes_per_block * (expandedWidth / bsw); - g_texture_cache->DecodeTextureOnGPU(entry, 0, src_data, texture_size, texformat, width, - height, expandedWidth, expandedHeight, row_stride, tlut, - tlutfmt); - } - else + if (!decode_on_gpu || + !DecodeTextureOnGPU(entry, 0, src_data, texture_size, texformat, width, height, + expandedWidth, expandedHeight, bytes_per_block * (expandedWidth / bsw), + tlut, tlutfmt)) { size_t decoded_texture_size = expandedWidth * sizeof(u32) * expandedHeight; @@ -1168,20 +1160,16 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo const u32 expanded_mip_height = Common::AlignUp(mip_height, bsh); const u8*& mip_src_data = from_tmem ? ((level % 2) ? ptr_odd : ptr_even) : src_data; - size_t mip_size = + const u32 mip_size = TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat); - if (decode_on_gpu) - { - u32 row_stride = bytes_per_block * (expanded_mip_width / bsw); - g_texture_cache->DecodeTextureOnGPU(entry, level, mip_src_data, mip_size, texformat, - mip_width, mip_height, expanded_mip_width, - expanded_mip_height, row_stride, tlut, tlutfmt); - } - else + if (!decode_on_gpu || + !DecodeTextureOnGPU(entry, level, mip_src_data, mip_size, texformat, mip_width, + mip_height, expanded_mip_width, expanded_mip_height, + bytes_per_block * (expanded_mip_width / bsw), tlut, tlutfmt)) { // No need to call CheckTempSize here, as the whole buffer is preallocated at the beginning - size_t decoded_mip_size = expanded_mip_width * sizeof(u32) * expanded_mip_height; + const u32 decoded_mip_size = expanded_mip_width * sizeof(u32) * expanded_mip_height; TexDecoder_Decode(dst_buffer, mip_src_data, expanded_mip_width, expanded_mip_height, texformat, tlut, tlutfmt); entry->texture->Load(level, mip_width, mip_height, expanded_mip_width, dst_buffer, @@ -1212,6 +1200,8 @@ TextureCacheBase::GetTexture(u32 address, u32 width, u32 height, const TextureFo entry = DoPartialTextureUpdates(iter->second, &texMem[tlutaddr], tlutfmt); + // This should only be needed if the texture was updated, or used GPU decoding. + entry->texture->FinishedRendering(); return entry; } @@ -1379,7 +1369,7 @@ TextureCacheBase::GetTextureFromOverlappingTextures(const TextureLookupInformati // or as a container for overlapping textures, never need to be combined // with other textures TCacheEntry* stitched_entry = - CreateNormalTexture(tex_info, FramebufferManagerBase::GetEFBLayers()); + CreateNormalTexture(tex_info, g_framebuffer_manager->GetEFBLayers()); stitched_entry->may_have_overlapping_textures = false; // It is possible that some of the overlapping textures overlap each other. @@ -1540,6 +1530,7 @@ TextureCacheBase::GetTextureFromOverlappingTextures(const TextureLookupInformati return nullptr; } + stitched_entry->texture->FinishedRendering(); return stitched_entry; } @@ -1547,17 +1538,10 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::CreateNormalTexture(const TextureLookupInformation& tex_info, u32 layers) { // create the entry/texture - TextureConfig config; - config.width = tex_info.native_width; - config.height = tex_info.native_height; - config.levels = tex_info.computed_levels; - config.format = AbstractTextureFormat::RGBA8; - config.rendertarget = true; - config.layers = layers; - + const TextureConfig config(tex_info.native_width, tex_info.native_height, + tex_info.computed_levels, layers, 1, AbstractTextureFormat::RGBA8, + AbstractTextureFlag_RenderTarget); TCacheEntry* entry = AllocateCacheEntry(config); - GFX_DEBUGGER_PAUSE_AT(NEXT_NEW_TEXTURE, true); - if (!entry) return nullptr; @@ -1590,15 +1574,15 @@ TextureCacheBase::GetTextureFromMemory(const TextureLookupInformation& tex_info) // banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since // there's no conversion between formats. In the future this could be extended with a separate // shader, however. - bool decode_on_gpu = g_ActiveConfig.UseGPUTextureDecoding() && - g_texture_cache->SupportsGPUTextureDecode(tex_info.full_format.texfmt, - tex_info.full_format.tlutfmt) && - !(tex_info.from_tmem && tex_info.full_format.texfmt == TextureFormat::RGBA8); + const bool decode_on_gpu = + g_ActiveConfig.UseGPUTextureDecoding() && + !(tex_info.from_tmem && tex_info.full_format.texfmt == TextureFormat::RGBA8); // Since it's coming from RAM, it can only have one layer (no stereo). TCacheEntry* entry = CreateNormalTexture(tex_info, 1); entry->may_have_overlapping_textures = false; LoadTextureLevelZeroFromMemory(entry, tex_info, decode_on_gpu); + entry->texture->FinishedRendering(); return entry; } @@ -1608,15 +1592,13 @@ void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_upda { const u8* tlut = &texMem[tex_info.tlut_address]; - if (decode_on_gpu) - { - u32 row_stride = tex_info.bytes_per_block * (tex_info.expanded_width / tex_info.block_width); - g_texture_cache->DecodeTextureOnGPU( - entry_to_update, 0, tex_info.src_data, tex_info.total_bytes, tex_info.full_format.texfmt, - tex_info.native_width, tex_info.native_height, tex_info.expanded_width, - tex_info.expanded_height, row_stride, tlut, tex_info.full_format.tlutfmt); - } - else + if (!decode_on_gpu || + DecodeTextureOnGPU(entry_to_update, 0, tex_info.src_data, tex_info.total_bytes, + tex_info.full_format.texfmt, tex_info.native_width, tex_info.native_height, + tex_info.expanded_width, tex_info.expanded_height, + tex_info.bytes_per_block * + (tex_info.expanded_width / tex_info.block_width), + tlut, tex_info.full_format.tlutfmt)) { size_t decoded_texture_size = tex_info.expanded_width * sizeof(u32) * tex_info.expanded_height; CheckTempSize(decoded_texture_size); @@ -1637,12 +1619,12 @@ void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_upda } } -TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetRAMCopyFilterCoefficients( - const CopyFilterCoefficients::Values& coefficients) const +EFBCopyFilterCoefficients +TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) { // To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1 // are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below. - return {{ + return EFBCopyFilterCoefficients{ static_cast(static_cast(coefficients[0]) + static_cast(coefficients[1])) / 64.0f, static_cast(static_cast(coefficients[2]) + static_cast(coefficients[3]) + @@ -1650,31 +1632,31 @@ TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetRAMCopyFilterC 64.0f, static_cast(static_cast(coefficients[5]) + static_cast(coefficients[6])) / 64.0f, - }}; + }; } -TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetVRAMCopyFilterCoefficients( - const CopyFilterCoefficients::Values& coefficients) const +EFBCopyFilterCoefficients +TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) { // If the user disables the copy filter, only apply it to the VRAM copy. // This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected. - CopyFilterCoefficientArray res = GetRAMCopyFilterCoefficients(coefficients); + EFBCopyFilterCoefficients res = GetRAMCopyFilterCoefficients(coefficients); if (!g_ActiveConfig.bDisableCopyFilter) return res; // Disabling the copy filter in options should not ignore the values the game sets completely, // as some games use the filter coefficients to control the brightness of the screen. Instead, // add all coefficients to the middle sample, so the deflicker/vertical filter has no effect. - res[1] += res[0] + res[2]; - res[0] = 0; - res[2] = 0; + res.middle = res.upper + res.middle + res.lower; + res.upper = 0.0f; + res.lower = 0.0f; return res; } -bool TextureCacheBase::NeedsCopyFilterInShader(const CopyFilterCoefficientArray& coefficients) const +bool TextureCacheBase::NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients) { // If the top/bottom coefficients are zero, no point sampling/blending from these rows. - return coefficients[0] != 0 || coefficients[2] != 0; + return coefficients.upper != 0 || coefficients.lower != 0; } void TextureCacheBase::CopyRenderTargetToTexture( @@ -1816,12 +1798,8 @@ void TextureCacheBase::CopyRenderTargetToTexture( if (copy_to_vram) { // create the texture - TextureConfig config; - config.rendertarget = true; - config.width = scaled_tex_w; - config.height = scaled_tex_h; - config.layers = FramebufferManagerBase::GetEFBLayers(); - + const TextureConfig config(scaled_tex_w, scaled_tex_h, 1, g_framebuffer_manager->GetEFBLayers(), + 1, AbstractTextureFormat::RGBA8, AbstractTextureFlag_RenderTarget); entry = AllocateCacheEntry(config); if (entry) { @@ -1866,7 +1844,7 @@ void TextureCacheBase::CopyRenderTargetToTexture( if (copy_to_ram) { - CopyFilterCoefficientArray coefficients = GetRAMCopyFilterCoefficients(filter_coefficients); + EFBCopyFilterCoefficients coefficients = GetRAMCopyFilterCoefficients(filter_coefficients); PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format; EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity, NeedsCopyFilterInShader(coefficients)); @@ -2006,11 +1984,6 @@ void TextureCacheBase::FlushEFBCopies() m_pending_efb_copies.clear(); } -TextureConfig TextureCacheBase::GetEncodingTextureConfig() -{ - return TextureConfig(EFB_WIDTH * 4, 1024, 1, 1, 1, AbstractTextureFormat::BGRA8, true); -} - void TextureCacheBase::WriteEFBCopyToRAM(u8* dst_ptr, u32 width, u32 height, u32 stride, std::unique_ptr staging_texture) { @@ -2069,8 +2042,8 @@ std::unique_ptr TextureCacheBase::GetEFBCopyStagingTextu return ptr; } - std::unique_ptr tex = - g_renderer->CreateStagingTexture(StagingTextureType::Readback, GetEncodingTextureConfig()); + std::unique_ptr tex = g_renderer->CreateStagingTexture( + StagingTextureType::Readback, m_efb_encoding_texture->GetConfig()); if (!tex) WARN_LOG(VIDEO, "Failed to create EFB copy staging texture"); @@ -2127,37 +2100,50 @@ void TextureCacheBase::UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_ TextureCacheBase::TCacheEntry* TextureCacheBase::AllocateCacheEntry(const TextureConfig& config) { - std::unique_ptr texture = AllocateTexture(config); - - if (!texture) - { + std::optional alloc = AllocateTexture(config); + if (!alloc) return nullptr; - } - TCacheEntry* cacheEntry = new TCacheEntry(std::move(texture)); + + TCacheEntry* cacheEntry = + new TCacheEntry(std::move(alloc->texture), std::move(alloc->framebuffer)); cacheEntry->textures_by_hash_iter = textures_by_hash.end(); cacheEntry->id = last_entry_id++; return cacheEntry; } -std::unique_ptr TextureCacheBase::AllocateTexture(const TextureConfig& config) +std::optional +TextureCacheBase::AllocateTexture(const TextureConfig& config) { TexPool::iterator iter = FindMatchingTextureFromPool(config); - std::unique_ptr entry; if (iter != texture_pool.end()) { - entry = std::move(iter->second.texture); + auto entry = std::move(iter->second); texture_pool.erase(iter); + return std::move(entry); } - else + + std::unique_ptr texture = g_renderer->CreateTexture(config); + if (!texture) { - entry = g_renderer->CreateTexture(config); - if (!entry) - return nullptr; - - INCSTAT(stats.numTexturesCreated); + WARN_LOG(VIDEO, "Failed to allocate a %ux%ux%u texture", config.width, config.height, + config.layers); + return {}; } - return entry; + std::unique_ptr framebuffer; + if (config.IsRenderTarget()) + { + framebuffer = g_renderer->CreateFramebuffer(texture.get(), nullptr); + if (!framebuffer) + { + WARN_LOG(VIDEO, "Failed to allocate a %ux%ux%u framebuffer", config.width, config.height, + config.layers); + return {}; + } + } + + INCSTAT(stats.numTexturesCreated); + return TexPoolEntry(std::move(texture), std::move(framebuffer)); } TextureCacheBase::TexPool::iterator @@ -2170,7 +2156,7 @@ TextureCacheBase::FindMatchingTextureFromPool(const TextureConfig& config) // As non-render-target textures are usually static, this should not matter much. auto range = texture_pool.equal_range(config); auto matching_iter = std::find_if(range.first, range.second, [](const auto& iter) { - return iter.first.rendertarget || iter.second.frameCount != FRAMECOUNT_INVALID; + return iter.first.IsRenderTarget() || iter.second.frameCount != FRAMECOUNT_INVALID; }); return matching_iter != range.second ? matching_iter : texture_pool.end(); } @@ -2261,7 +2247,8 @@ TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter, bool discard_pe } auto config = entry->texture->GetConfig(); - texture_pool.emplace(config, TexPoolEntry(std::move(entry->texture))); + texture_pool.emplace(config, + TexPoolEntry(std::move(entry->texture), std::move(entry->framebuffer))); // Don't delete if there's a pending EFB copy, as we need the TCacheEntry alive. if (!entry->pending_efb_copy) @@ -2270,6 +2257,283 @@ TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter, bool discard_pe return textures_by_address.erase(iter); } +bool TextureCacheBase::CreateUtilityTextures() +{ + constexpr TextureConfig encoding_texture_config( + EFB_WIDTH * 4, 1024, 1, 1, 1, AbstractTextureFormat::BGRA8, AbstractTextureFlag_RenderTarget); + m_efb_encoding_texture = g_renderer->CreateTexture(encoding_texture_config); + if (!m_efb_encoding_texture) + return false; + + m_efb_encoding_framebuffer = g_renderer->CreateFramebuffer(m_efb_encoding_texture.get(), nullptr); + if (!m_efb_encoding_framebuffer) + return false; + + if (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding) + { + constexpr TextureConfig decoding_texture_config( + 1024, 1024, 1, 1, 1, AbstractTextureFormat::RGBA8, AbstractTextureFlag_ComputeImage); + m_decoding_texture = g_renderer->CreateTexture(decoding_texture_config); + if (!m_decoding_texture) + return false; + } + + return true; +} + +void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, + const EFBRectangle& src_rect, bool scale_by_half, + EFBCopyFormat dst_format, bool is_intensity, float gamma, + bool clamp_top, bool clamp_bottom, + const EFBCopyFilterCoefficients& filter_coefficients) +{ + // Flush EFB pokes first, as they're expected to be included. + g_framebuffer_manager->FlushEFBPokes(); + + // Get the pipeline which we will be using. If the compilation failed, this will be null. + const AbstractPipeline* copy_pipeline = + g_shader_cache->GetEFBCopyToVRAMPipeline(TextureConversionShaderGen::GetShaderUid( + dst_format, is_depth_copy, is_intensity, scale_by_half, + NeedsCopyFilterInShader(filter_coefficients))); + if (!copy_pipeline) + { + WARN_LOG(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline."); + return; + } + + const auto scaled_src_rect = g_renderer->ConvertEFBRectangle(src_rect); + AbstractTexture* src_texture = + is_depth_copy ? g_framebuffer_manager->ResolveEFBDepthTexture(scaled_src_rect) : + g_framebuffer_manager->ResolveEFBColorTexture(scaled_src_rect); + + g_renderer->BeginUtilityDrawing(); + + // Fill uniform buffer. + struct Uniforms + { + float src_left, src_top, src_width, src_height; + float filter_coefficients[3]; + float gamma_rcp; + float clamp_top; + float clamp_bottom; + float pixel_height; + u32 padding; + }; + Uniforms uniforms; + const auto framebuffer_rect = g_renderer->ConvertFramebufferRectangle( + scaled_src_rect, g_framebuffer_manager->GetEFBFramebuffer()); + const float rcp_efb_width = 1.0f / static_cast(g_framebuffer_manager->GetEFBWidth()); + const float rcp_efb_height = 1.0f / static_cast(g_framebuffer_manager->GetEFBHeight()); + uniforms.src_left = framebuffer_rect.left * rcp_efb_width; + uniforms.src_top = framebuffer_rect.top * rcp_efb_height; + uniforms.src_width = framebuffer_rect.GetWidth() * rcp_efb_width; + uniforms.src_height = framebuffer_rect.GetHeight() * rcp_efb_height; + uniforms.filter_coefficients[0] = filter_coefficients.upper; + uniforms.filter_coefficients[1] = filter_coefficients.middle; + uniforms.filter_coefficients[2] = filter_coefficients.lower; + uniforms.gamma_rcp = 1.0f / gamma; + uniforms.clamp_top = clamp_top ? framebuffer_rect.top * rcp_efb_height : 0.0f; + uniforms.clamp_bottom = clamp_bottom ? framebuffer_rect.bottom * rcp_efb_height : 1.0f; + uniforms.pixel_height = g_ActiveConfig.bCopyEFBScaled ? rcp_efb_height : 1.0f / EFB_HEIGHT; + uniforms.padding = 0; + g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms)); + + // Use the copy pipeline to render the VRAM copy. + g_renderer->SetAndDiscardFramebuffer(entry->framebuffer.get()); + g_renderer->SetViewportAndScissor(entry->framebuffer->GetRect()); + g_renderer->SetPipeline(copy_pipeline); + g_renderer->SetTexture(0, src_texture); + g_renderer->SetSamplerState(0, scale_by_half ? RenderState::GetLinearSamplerState() : + RenderState::GetPointSamplerState()); + g_renderer->Draw(0, 3); + g_renderer->EndUtilityDrawing(); + entry->texture->FinishedRendering(); +} + +void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, + u32 native_width, u32 bytes_per_row, u32 num_blocks_y, + u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, + float y_scale, float gamma, bool clamp_top, bool clamp_bottom, + const EFBCopyFilterCoefficients& filter_coefficients) +{ + // Flush EFB pokes first, as they're expected to be included. + g_framebuffer_manager->FlushEFBPokes(); + + // Get the pipeline which we will be using. If the compilation failed, this will be null. + const AbstractPipeline* copy_pipeline = g_shader_cache->GetEFBCopyToRAMPipeline(params); + if (!copy_pipeline) + { + WARN_LOG(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline."); + return; + } + + const auto scaled_src_rect = g_renderer->ConvertEFBRectangle(src_rect); + AbstractTexture* src_texture = + params.depth ? g_framebuffer_manager->ResolveEFBDepthTexture(scaled_src_rect) : + g_framebuffer_manager->ResolveEFBColorTexture(scaled_src_rect); + + g_renderer->BeginUtilityDrawing(); + + // Fill uniform buffer. + struct Uniforms + { + std::array position_uniform; + float y_scale; + float gamma_rcp; + float clamp_top; + float clamp_bottom; + float filter_coefficients[3]; + u32 padding; + }; + Uniforms encoder_params; + const auto framebuffer_rect = g_renderer->ConvertFramebufferRectangle( + scaled_src_rect, g_framebuffer_manager->GetEFBFramebuffer()); + const float rcp_efb_height = 1.0f / static_cast(g_framebuffer_manager->GetEFBHeight()); + encoder_params.position_uniform[0] = scaled_src_rect.left; + encoder_params.position_uniform[1] = scaled_src_rect.top; + encoder_params.position_uniform[2] = static_cast(native_width); + encoder_params.position_uniform[3] = scale_by_half ? 2 : 1; + encoder_params.y_scale = y_scale; + encoder_params.gamma_rcp = 1.0f / gamma; + encoder_params.clamp_top = clamp_top ? framebuffer_rect.top * rcp_efb_height : 0.0f; + encoder_params.clamp_bottom = clamp_bottom ? framebuffer_rect.bottom * rcp_efb_height : 1.0f; + encoder_params.filter_coefficients[0] = filter_coefficients.upper; + encoder_params.filter_coefficients[1] = filter_coefficients.middle; + encoder_params.filter_coefficients[2] = filter_coefficients.lower; + g_vertex_manager->UploadUtilityUniforms(&encoder_params, sizeof(encoder_params)); + + // We also linear filtering for both box filtering and downsampling higher resolutions to 1x + // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more + // complex down filtering to average all pixels and produce the correct result. + const bool linear_filter = + (scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f; + + // Because the shader uses gl_FragCoord and we read it back, we must render to the lower-left. + const u32 render_width = bytes_per_row / sizeof(u32); + const u32 render_height = num_blocks_y; + const auto encode_rect = MathUtil::Rectangle(0, 0, render_width, render_height); + + // Render to GPU texture, and then copy to CPU-accessible texture. + g_renderer->SetAndDiscardFramebuffer(m_efb_encoding_framebuffer.get()); + g_renderer->SetViewportAndScissor(encode_rect); + g_renderer->SetPipeline(copy_pipeline); + g_renderer->SetTexture(0, src_texture); + g_renderer->SetSamplerState(0, linear_filter ? RenderState::GetLinearSamplerState() : + RenderState::GetPointSamplerState()); + g_renderer->Draw(0, 3); + dst->CopyFromTexture(m_efb_encoding_texture.get(), encode_rect, 0, 0, encode_rect); + g_renderer->EndUtilityDrawing(); + + // Flush if there's sufficient draws between this copy and the last. + g_vertex_manager->OnEFBCopyToRAM(); +} + +bool TextureCacheBase::ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, + const void* palette, TLUTFormat format) +{ + DEBUG_ASSERT(entry->texture->GetConfig().IsRenderTarget() && entry->framebuffer); + if (!g_ActiveConfig.backend_info.bSupportsPaletteConversion) + { + ERROR_LOG(VIDEO, "Backend does not support palette conversion!"); + return false; + } + + g_renderer->BeginUtilityDrawing(); + + const u32 palette_size = unconverted->format == TextureFormat::I4 ? 32 : 512; + u32 texel_buffer_offset; + if (!g_vertex_manager->UploadTexelBuffer(palette, palette_size, + TexelBufferFormat::TEXEL_BUFFER_FORMAT_R16_UINT, + &texel_buffer_offset)) + { + ERROR_LOG(VIDEO, "Texel buffer upload failed"); + return false; + } + + struct Uniforms + { + float multiplier; + u32 texel_buffer_offset; + u32 pad[2]; + }; + static_assert(std::is_standard_layout::value); + Uniforms uniforms = {}; + uniforms.multiplier = unconverted->format == TextureFormat::I4 ? 15.0f : 255.0f; + uniforms.texel_buffer_offset = texel_buffer_offset; + g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms)); + + g_renderer->SetAndDiscardFramebuffer(entry->framebuffer.get()); + g_renderer->SetViewportAndScissor(entry->texture->GetRect()); + g_renderer->SetPipeline(g_shader_cache->GetPaletteConversionPipeline(format)); + g_renderer->SetTexture(1, unconverted->texture.get()); + g_renderer->SetSamplerState(1, RenderState::GetPointSamplerState()); + g_renderer->Draw(0, 3); + g_renderer->EndUtilityDrawing(); + entry->texture->FinishedRendering(); + return true; +} + +bool TextureCacheBase::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, + u32 data_size, TextureFormat format, u32 width, + u32 height, u32 aligned_width, u32 aligned_height, + u32 row_stride, const u8* palette, + TLUTFormat palette_format) +{ + const auto* info = TextureConversionShaderTiled::GetDecodingShaderInfo(format); + if (!info) + return false; + + const AbstractShader* shader = g_shader_cache->GetTextureDecodingShader(format, palette_format); + if (!shader) + return false; + + // Copy to GPU-visible buffer, aligned to the data type. + const u32 bytes_per_buffer_elem = + VertexManagerBase::GetTexelBufferElementSize(info->buffer_format); + + // Allocate space in stream buffer, and copy texture + palette across. + u32 src_offset = 0, palette_offset = 0; + if (info->palette_size > 0) + { + if (!g_vertex_manager->UploadTexelBuffer(data, data_size, info->buffer_format, &src_offset, + palette, info->palette_size, + TEXEL_BUFFER_FORMAT_R16_UINT, &palette_offset)) + { + return false; + } + } + else + { + if (!g_vertex_manager->UploadTexelBuffer(data, data_size, info->buffer_format, &src_offset)) + return false; + } + + // Set up uniforms. + struct Uniforms + { + u32 dst_width, dst_height; + u32 src_width, src_height; + u32 src_offset, src_row_stride; + u32 palette_offset, unused; + } uniforms = {width, height, aligned_width, + aligned_height, src_offset, row_stride / bytes_per_buffer_elem, + palette_offset}; + g_vertex_manager->UploadUtilityUniforms(&uniforms, sizeof(uniforms)); + g_renderer->SetComputeImageTexture(m_decoding_texture.get(), false, true); + + auto dispatch_groups = + TextureConversionShaderTiled::GetDispatchCount(info, aligned_width, aligned_height); + g_renderer->DispatchComputeShader(shader, dispatch_groups.first, dispatch_groups.second, 1); + + // Copy from decoding texture -> final texture + // This is because we don't want to have to create compute view for every layer + const auto copy_rect = entry->texture->GetConfig().GetMipRect(dst_level); + entry->texture->CopyRectangleFromTexture(m_decoding_texture.get(), copy_rect, 0, 0, copy_rect, 0, + dst_level); + entry->texture->FinishedRendering(); + return true; +} + u32 TextureCacheBase::TCacheEntry::BytesPerRow() const { const u32 blockW = TexDecoder_GetBlockWidthInTexels(format.texfmt); @@ -2362,3 +2626,9 @@ u64 TextureCacheBase::TCacheEntry::CalculateHash() const return temp_hash; } } + +TextureCacheBase::TexPoolEntry::TexPoolEntry(std::unique_ptr tex, + std::unique_ptr fb) + : texture(std::move(tex)), framebuffer(std::move(fb)) +{ +} diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index fc9c49311b..bd3ceab046 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -23,6 +23,7 @@ #include "VideoCommon/VideoCommon.h" struct VideoConfig; +class AbstractFramebuffer; class AbstractStagingTexture; struct TextureAndTLUTFormat @@ -68,6 +69,14 @@ struct EFBCopyParams bool copy_filter; }; +// Reduced version of the full coefficient array, with a single value for each row. +struct EFBCopyFilterCoefficients +{ + float upper; + float middle; + float lower; +}; + struct TextureLookupInformation { u32 address; @@ -110,13 +119,11 @@ private: static const int FRAMECOUNT_INVALID = 0; public: - // Reduced version of the full coefficient array, reduced to a single value for each row. - using CopyFilterCoefficientArray = std::array; - struct TCacheEntry { // common members std::unique_ptr texture; + std::unique_ptr framebuffer; u32 addr; u32 size_in_bytes; u64 base_hash; @@ -157,7 +164,8 @@ public: u32 pending_efb_copy_height = 0; bool pending_efb_copy_invalidated = false; - explicit TCacheEntry(std::unique_ptr tex); + explicit TCacheEntry(std::unique_ptr tex, + std::unique_ptr fb); ~TCacheEntry(); @@ -214,7 +222,10 @@ public: AbstractTextureFormat GetFormat() const { return texture->GetConfig().format; } }; - virtual ~TextureCacheBase(); // needs virtual for DX11 dtor + TextureCacheBase(); + virtual ~TextureCacheBase(); + + bool Initialize(); void OnConfigChanged(VideoConfig& config); @@ -224,15 +235,6 @@ public: void Invalidate(); - virtual void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, - bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) = 0; - - virtual bool CompileShaders() = 0; - virtual void DeleteShaders() = 0; - TCacheEntry* Load(const u32 stage); static void InvalidateAllBindPoints() { valid_bind_points.reset(); } static bool IsValidBindPoint(u32 i) { return valid_bind_points.test(i); } @@ -262,39 +264,39 @@ public: bool clamp_top, bool clamp_bottom, const CopyFilterCoefficients::Values& filter_coefficients); - virtual void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette, - TLUTFormat format) = 0; - - // Returns true if the texture data and palette formats are supported by the GPU decoder. - virtual bool SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) - { - return false; - } - - // Decodes the specified data to the GPU texture specified by entry. - // width, height are the size of the image in pixels. - // aligned_width, aligned_height are the size of the image in pixels, aligned to the block size. - // row_stride is the number of bytes for a row of blocks, not pixels. - virtual void DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, - size_t data_size, TextureFormat format, u32 width, u32 height, - u32 aligned_width, u32 aligned_height, u32 row_stride, - const u8* palette, TLUTFormat palette_format) - { - } - void ScaleTextureCacheEntryTo(TCacheEntry* entry, u32 new_width, u32 new_height); // Flushes all pending EFB copies to emulated RAM. void FlushEFBCopies(); - // Returns a texture config suitable for drawing a RAM EFB copy into. - static TextureConfig GetEncodingTextureConfig(); + // Returns false if the top/bottom row coefficients are zero. + static bool NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients); protected: - TextureCacheBase(); + // Applies a palette to an EFB copy/texture. + bool ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette, + TLUTFormat format); - // Returns false if the top/bottom row coefficients are zero. - bool NeedsCopyFilterInShader(const CopyFilterCoefficientArray& coefficients) const; + // Decodes the specified data to the GPU texture specified by entry. + // Returns false if the configuration is not supported. + // width, height are the size of the image in pixels. + // aligned_width, aligned_height are the size of the image in pixels, aligned to the block size. + // row_stride is the number of bytes for a row of blocks, not pixels. + bool DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, u32 data_size, + TextureFormat format, u32 width, u32 height, u32 aligned_width, + u32 aligned_height, u32 row_stride, const u8* palette, + TLUTFormat palette_format); + + virtual void CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams& params, u32 native_width, + u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, + const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, + bool clamp_top, bool clamp_bottom, + const EFBCopyFilterCoefficients& filter_coefficients); + virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, + const EFBRectangle& src_rect, bool scale_by_half, + EFBCopyFormat dst_format, bool is_intensity, float gamma, + bool clamp_top, bool clamp_bottom, + const EFBCopyFilterCoefficients& filter_coefficients); alignas(16) u8* temp = nullptr; size_t temp_size = 0; @@ -307,13 +309,17 @@ private: struct TexPoolEntry { std::unique_ptr texture; + std::unique_ptr framebuffer; int frameCount = FRAMECOUNT_INVALID; - TexPoolEntry(std::unique_ptr tex) : texture(std::move(tex)) {} + + TexPoolEntry(std::unique_ptr tex, std::unique_ptr fb); }; using TexAddrCache = std::multimap; using TexHashCache = std::multimap; using TexPool = std::unordered_multimap; + bool CreateUtilityTextures(); + void SetBackupConfig(const VideoConfig& config); TCacheEntry* ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTFormat tlutfmt); @@ -325,7 +331,7 @@ private: void CheckTempSize(size_t required_size); TCacheEntry* AllocateCacheEntry(const TextureConfig& config); - std::unique_ptr AllocateTexture(const TextureConfig& config); + std::optional AllocateTexture(const TextureConfig& config); TexPool::iterator FindMatchingTextureFromPool(const TextureConfig& config); TexAddrCache::iterator GetTexCacheIter(TCacheEntry* entry); @@ -334,12 +340,6 @@ private: std::pair FindOverlappingTextures(u32 addr, u32 size_in_bytes); - virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, - const EFBRectangle& src_rect, bool scale_by_half, - EFBCopyFormat dst_format, bool is_intensity, float gamma, - bool clamp_top, bool clamp_bottom, - const CopyFilterCoefficientArray& filter_coefficients) = 0; - // Removes and unlinks texture from texture cache and returns it to the pool TexAddrCache::iterator InvalidateTexture(TexAddrCache::iterator t_iter, bool discard_pending_efb_copy = false); @@ -347,10 +347,10 @@ private: void UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, u32 num_blocks_y); // Precomputing the coefficients for the previous, current, and next lines for the copy filter. - CopyFilterCoefficientArray - GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const; - CopyFilterCoefficientArray - GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const; + static EFBCopyFilterCoefficients + GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients); + static EFBCopyFilterCoefficients + GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients); // Flushes a pending EFB copy to RAM from the host to the guest RAM. void WriteEFBCopyToRAM(u8* dst_ptr, u32 width, u32 height, u32 stride, @@ -385,6 +385,13 @@ private: }; BackupConfig backup_config = {}; + // Encoding texture used for EFB copies to RAM. + std::unique_ptr m_efb_encoding_texture; + std::unique_ptr m_efb_encoding_framebuffer; + + // Decoding texture used for GPU texture decoding. + std::unique_ptr m_decoding_texture; + // Pool of readback textures used for deferred EFB copies. std::vector> m_efb_copy_staging_texture_pool; diff --git a/Source/Core/VideoCommon/TextureConfig.cpp b/Source/Core/VideoCommon/TextureConfig.cpp index 0407576af6..98a9dbc4f3 100644 --- a/Source/Core/VideoCommon/TextureConfig.cpp +++ b/Source/Core/VideoCommon/TextureConfig.cpp @@ -9,8 +9,8 @@ bool TextureConfig::operator==(const TextureConfig& o) const { - return std::tie(width, height, levels, layers, samples, format, rendertarget) == - std::tie(o.width, o.height, o.levels, o.layers, o.samples, o.format, o.rendertarget); + return std::tie(width, height, levels, layers, samples, format, flags) == + std::tie(o.width, o.height, o.levels, o.layers, o.samples, o.format, o.flags); } bool TextureConfig::operator!=(const TextureConfig& o) const @@ -38,8 +38,3 @@ size_t TextureConfig::GetMipStride(u32 level) const { return AbstractTexture::CalculateStrideForFormat(format, std::max(width >> level, 1u)); } - -bool TextureConfig::IsMultisampled() const -{ - return samples > 1; -} diff --git a/Source/Core/VideoCommon/TextureConfig.h b/Source/Core/VideoCommon/TextureConfig.h index a2358ff3b0..ee52537285 100644 --- a/Source/Core/VideoCommon/TextureConfig.h +++ b/Source/Core/VideoCommon/TextureConfig.h @@ -34,13 +34,19 @@ enum class StagingTextureType Mutable // Optimize for CPU reads, GPU writes, allow slow CPU reads }; +enum AbstractTextureFlag : u32 +{ + AbstractTextureFlag_RenderTarget = (1 << 0), // Texture is used as a framebuffer. + AbstractTextureFlag_ComputeImage = (1 << 1), // Texture is used as a compute image. +}; + struct TextureConfig { constexpr TextureConfig() = default; constexpr TextureConfig(u32 width_, u32 height_, u32 levels_, u32 layers_, u32 samples_, - AbstractTextureFormat format_, bool rendertarget_) + AbstractTextureFormat format_, u32 flags_) : width(width_), height(height_), levels(levels_), layers(layers_), samples(samples_), - format(format_), rendertarget(rendertarget_) + format(format_), flags(flags_) { } @@ -50,7 +56,10 @@ struct TextureConfig MathUtil::Rectangle GetMipRect(u32 level) const; size_t GetStride() const; size_t GetMipStride(u32 level) const; - bool IsMultisampled() const; + + bool IsMultisampled() const { return samples > 1; } + bool IsRenderTarget() const { return (flags & AbstractTextureFlag_RenderTarget) != 0; } + bool IsComputeImage() const { return (flags & AbstractTextureFlag_ComputeImage) != 0; } u32 width = 0; u32 height = 0; @@ -58,7 +67,7 @@ struct TextureConfig u32 layers = 1; u32 samples = 1; AbstractTextureFormat format = AbstractTextureFormat::RGBA8; - bool rendertarget = false; + u32 flags = 0; }; namespace std @@ -71,7 +80,7 @@ struct hash result_type operator()(const argument_type& c) const noexcept { - const u64 id = static_cast(c.rendertarget) << 63 | static_cast(c.format) << 50 | + const u64 id = static_cast(c.flags) << 58 | static_cast(c.format) << 50 | static_cast(c.layers) << 48 | static_cast(c.levels) << 32 | static_cast(c.height) << 16 | static_cast(c.width); return std::hash{}(id); diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index e57cbdf603..7359c2aea3 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -15,7 +15,9 @@ #include "VideoCommon/RenderBase.h" #include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/TextureConversionShader.h" +#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VideoCommon.h" +#include "VideoCommon/VideoConfig.h" #define WRITE p += sprintf @@ -59,21 +61,10 @@ u16 GetEncodedSampleCount(EFBCopyFormat format) static void WriteHeader(char*& p, APIType ApiType) { - if (ApiType == APIType::OpenGL) + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { // left, top, of source rectangle within source texture // width of the destination rectangle, scale_factor (1 or 2) - WRITE(p, "uniform int4 position;\n"); - WRITE(p, "uniform float y_scale;\n"); - WRITE(p, "uniform float gamma_rcp;\n"); - WRITE(p, "uniform float2 clamp_tb;\n"); - WRITE(p, "uniform float3 filter_coefficients;\n"); - WRITE(p, "#define samp0 samp9\n"); - WRITE(p, "SAMPLER_BINDING(9) uniform sampler2DArray samp0;\n"); - WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); - } - else if (ApiType == APIType::Vulkan) - { WRITE(p, "UBO_BINDING(std140, 1) uniform PSBlock {\n"); WRITE(p, " int4 position;\n"); WRITE(p, " float y_scale;\n"); @@ -81,8 +72,9 @@ static void WriteHeader(char*& p, APIType ApiType) WRITE(p, " float2 clamp_tb;\n"); WRITE(p, " float3 filter_coefficients;\n"); WRITE(p, "};\n"); + WRITE(p, "VARYING_LOCATION(0) in float3 v_tex0;\n"); WRITE(p, "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); - WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); + WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n"); } else // D3D { @@ -147,7 +139,7 @@ static void WriteSampleFunction(char*& p, const EFBCopyParams& params, APIType A else { // Handle D3D depth inversion. - if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) + if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) WRITE(p, "1.0 - ("); else WRITE(p, "("); @@ -225,7 +217,9 @@ static void WriteSwizzler(char*& p, const EFBCopyParams& params, EFBCopyFormat f else // D3D { WRITE(p, "void main(\n"); - WRITE(p, " out float4 ocol0 : SV_Target, in float4 rawpos : SV_Position)\n"); + WRITE(p, " in float3 v_tex0 : TEXCOORD0,\n"); + WRITE(p, " in float4 rawpos : SV_Position,\n"); + WRITE(p, " out float4 ocol0 : SV_Target)\n"); WRITE(p, "{\n" " int2 sampleUv;\n" " int2 uv1 = int2(rawpos.xy);\n"); @@ -846,38 +840,65 @@ const char* GenerateEncodingShader(const EFBCopyParams& params, APIType api_type // NOTE: In these uniforms, a row refers to a row of blocks, not texels. static const char decoding_shader_header[] = R"( -#ifdef VULKAN +#if defined(PALETTE_FORMAT_IA8) || defined(PALETTE_FORMAT_RGB565) || defined(PALETTE_FORMAT_RGB5A3) +#define HAS_PALETTE 1 +#endif -layout(std140, push_constant) uniform PushConstants { - uvec2 dst_size; - uvec2 src_size; - uint src_offset; - uint src_row_stride; - uint palette_offset; -} push_constants; -#define u_dst_size (push_constants.dst_size) -#define u_src_size (push_constants.src_size) -#define u_src_offset (push_constants.src_offset) -#define u_src_row_stride (push_constants.src_row_stride) -#define u_palette_offset (push_constants.palette_offset) +#ifdef API_D3D +cbuffer UBO : register(b0) { +#else +UBO_BINDING(std140, 1) uniform UBO { +#endif + uint2 u_dst_size; + uint2 u_src_size; + uint u_src_offset; + uint u_src_row_stride; + uint u_palette_offset; +}; -TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer; -TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer; +#ifdef API_D3D -IMAGE_BINDING(rgba8, 0) uniform writeonly image2DArray output_image; +Buffer s_input_buffer : register(t0); +#ifdef HAS_PALETTE +Buffer s_palette_buffer : register(t1); +#endif + +RWTexture2DArray output_image : register(u0); + +// Helpers for reading/writing. +#define texelFetch(buffer, pos) buffer.Load(pos) +#define imageStore(image, coords, value) image[coords] = value +#define GROUP_MEMORY_BARRIER_WITH_SYNC GroupMemoryBarrierWithGroupSync(); +#define GROUP_SHARED groupshared + +#define DEFINE_MAIN(lx, ly) \ + [numthreads(lx, ly, 1)] \ + void main(uint3 gl_WorkGroupID : SV_GroupId, \ + uint3 gl_LocalInvocationID : SV_GroupThreadID, \ + uint3 gl_GlobalInvocationID : SV_DispatchThreadID) + +uint bitfieldExtract(uint val, int off, int size) +{ + // This built-in function is only support in OpenGL 4.0+ and ES 3.1+\n" + // Microsoft's HLSL compiler automatically optimises this to a bitfield extract instruction. + uint mask = uint((1 << size) - 1); + return uint(val >> off) & mask; +} #else -uniform uvec2 u_dst_size; -uniform uvec2 u_src_size; -uniform uint u_src_offset; -uniform uint u_src_row_stride; -uniform uint u_palette_offset; +TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer; +#ifdef HAS_PALETTE +TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer; +#endif +IMAGE_BINDING(rgba8, 0) uniform writeonly image2DArray output_image; -SAMPLER_BINDING(9) uniform usamplerBuffer s_input_buffer; -SAMPLER_BINDING(10) uniform usamplerBuffer s_palette_buffer; +#define GROUP_MEMORY_BARRIER_WITH_SYNC memoryBarrierShared(); barrier(); +#define GROUP_SHARED shared -layout(rgba8, binding = 0) uniform writeonly image2DArray output_image; +#define DEFINE_MAIN(lx, ly) \ + layout(local_size_x = lx, local_size_y = ly) in; \ + void main() #endif @@ -908,10 +929,10 @@ uint Convert6To8(uint v) return (v << 2) | (v >> 4); } -uint GetTiledTexelOffset(uvec2 block_size, uvec2 coords) +uint GetTiledTexelOffset(uint2 block_size, uint2 coords) { - uvec2 block = coords / block_size; - uvec2 offset = coords % block_size; + uint2 block = coords / block_size; + uint2 offset = coords % block_size; uint buffer_pos = u_src_offset; buffer_pos += block.y * u_src_row_stride; buffer_pos += block.x * (block_size.x * block_size.y); @@ -920,16 +941,16 @@ uint GetTiledTexelOffset(uvec2 block_size, uvec2 coords) return buffer_pos; } -uvec4 GetPaletteColor(uint index) +uint4 GetPaletteColor(uint index) { // Fetch and swap BE to LE. uint val = Swap16(texelFetch(s_palette_buffer, int(u_palette_offset + index)).x); - uvec4 color; + uint4 color; #if defined(PALETTE_FORMAT_IA8) uint a = bitfieldExtract(val, 8, 8); uint i = bitfieldExtract(val, 0, 8); - color = uvec4(i, i, i, a); + color = uint4(i, i, i, a); #elif defined(PALETTE_FORMAT_RGB565) color.x = Convert5To8(bitfieldExtract(val, 11, 5)); color.y = Convert6To8(bitfieldExtract(val, 5, 6)); @@ -953,29 +974,27 @@ uvec4 GetPaletteColor(uint index) } #else // Not used. - color = uvec4(0, 0, 0, 0); + color = uint4(0, 0, 0, 0); #endif return color; } -vec4 GetPaletteColorNormalized(uint index) +float4 GetPaletteColorNormalized(uint index) { - uvec4 color = GetPaletteColor(index); - return vec4(color) / 255.0; + uint4 color = GetPaletteColor(index); + return float4(color) / 255.0; } )"; static const std::map s_decoding_shader_info{ {TextureFormat::I4, - {BUFFER_FORMAT_R8_UINT, 0, 8, 8, false, + {TEXEL_BUFFER_FORMAT_R8_UINT, 0, 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 8x8 blocks, 4 bits per pixel // We need to do the tiling manually here because the texel size is smaller than @@ -996,108 +1015,98 @@ static const std::map s_decoding_shader_info{ else i = Convert4To8((val & 0x0Fu)); - uvec4 color = uvec4(i, i, i, i); - vec4 norm_color = vec4(color) / 255.0; + uint4 color = uint4(i, i, i, i); + float4 norm_color = float4(color) / 255.0; - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, {TextureFormat::IA4, - {BUFFER_FORMAT_R8_UINT, 0, 8, 8, false, + {TEXEL_BUFFER_FORMAT_R8_UINT, 0, 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 8x4 blocks, 8 bits per pixel - uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords); + uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords); uint val = texelFetch(s_input_buffer, int(buffer_pos)).x; uint i = Convert4To8((val & 0x0Fu)); uint a = Convert4To8((val >> 4)); - uvec4 color = uvec4(i, i, i, a); - vec4 norm_color = vec4(color) / 255.0; + uint4 color = uint4(i, i, i, a); + float4 norm_color = float4(color) / 255.0; - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, {TextureFormat::I8, - {BUFFER_FORMAT_R8_UINT, 0, 8, 8, false, + {TEXEL_BUFFER_FORMAT_R8_UINT, 0, 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 8x4 blocks, 8 bits per pixel - uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords); + uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords); uint i = texelFetch(s_input_buffer, int(buffer_pos)).x; - uvec4 color = uvec4(i, i, i, i); - vec4 norm_color = vec4(color) / 255.0; + uint4 color = uint4(i, i, i, i); + float4 norm_color = float4(color) / 255.0; - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, {TextureFormat::IA8, - {BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, + {TEXEL_BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 4x4 blocks, 16 bits per pixel - uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords); + uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords); uint val = texelFetch(s_input_buffer, int(buffer_pos)).x; uint a = (val & 0xFFu); uint i = (val >> 8); - uvec4 color = uvec4(i, i, i, a); - vec4 norm_color = vec4(color) / 255.0; - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + uint4 color = uint4(i, i, i, a); + float4 norm_color = float4(color) / 255.0; + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, {TextureFormat::RGB565, - {BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, + {TEXEL_BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 4x4 blocks - uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords); + uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords); uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x); - uvec4 color; + uint4 color; color.x = Convert5To8(bitfieldExtract(val, 11, 5)); color.y = Convert6To8(bitfieldExtract(val, 5, 6)); color.z = Convert5To8(bitfieldExtract(val, 0, 5)); color.a = 255u; - vec4 norm_color = vec4(color) / 255.0; - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + float4 norm_color = float4(color) / 255.0; + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, {TextureFormat::RGB5A3, - {BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, + {TEXEL_BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 4x4 blocks - uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords); + uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords); uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x); - uvec4 color; + uint4 color; if ((val & 0x8000u) != 0u) { color.x = Convert5To8(bitfieldExtract(val, 10, 5)); @@ -1113,19 +1122,17 @@ static const std::map s_decoding_shader_info{ color.b = Convert4To8(bitfieldExtract(val, 0, 4)); } - vec4 norm_color = vec4(color) / 255.0; - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + float4 norm_color = float4(color) / 255.0; + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, {TextureFormat::RGBA8, - {BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, + {TEXEL_BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 4x4 blocks // We can't use the normal calculation function, as these are packed as the AR channels @@ -1144,18 +1151,18 @@ static const std::map s_decoding_shader_info{ uint val1 = texelFetch(s_input_buffer, int(buffer_pos + 0u)).x; uint val2 = texelFetch(s_input_buffer, int(buffer_pos + 16u)).x; - uvec4 color; + uint4 color; color.a = (val1 & 0xFFu); color.r = (val1 >> 8); color.g = (val2 & 0xFFu); color.b = (val2 >> 8); - vec4 norm_color = vec4(color) / 255.0; - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + float4 norm_color = float4(color) / 255.0; + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, {TextureFormat::CMPR, - {BUFFER_FORMAT_R32G32_UINT, 0, 64, 1, true, + {TEXEL_BUFFER_FORMAT_R32G32_UINT, 0, 64, 1, true, R"( // In the compute version of this decoder, we flatten the blocks to a one-dimension array. // Each group is subdivided into 16, and the first thread in each group fetches the DXT data. @@ -1167,17 +1174,15 @@ static const std::map s_decoding_shader_info{ #define BLOCK_SIZE (BLOCK_SIZE_X * BLOCK_SIZE_Y) #define BLOCKS_PER_GROUP (GROUP_SIZE / BLOCK_SIZE) - layout(local_size_x = GROUP_SIZE, local_size_y = 1) in; - - shared uvec2 shared_temp[BLOCKS_PER_GROUP]; - uint DXTBlend(uint v1, uint v2) { // 3/8 blend, which is close to 1/3 return ((v1 * 3u + v2 * 5u) >> 3); } - void main() + GROUP_SHARED uint2 shared_temp[BLOCKS_PER_GROUP]; + + DEFINE_MAIN(GROUP_SIZE, 8) { uint local_thread_id = gl_LocalInvocationID.x; uint block_in_group = local_thread_id / BLOCK_SIZE; @@ -1188,7 +1193,7 @@ static const std::map s_decoding_shader_info{ // from the block size of the overall texture (4 vs 8). We can however use a multiply and // subtraction to avoid the modulo for calculating the block's X coordinate. uint blocks_wide = u_src_size.x / BLOCK_SIZE_X; - uvec2 block_coords; + uint2 block_coords; block_coords.y = block_index / blocks_wide; block_coords.x = block_index - (block_coords.y * blocks_wide); @@ -1196,8 +1201,8 @@ static const std::map s_decoding_shader_info{ if (thread_in_block == 0u) { // Calculate tiled block coordinates. - uvec2 tile_block_coords = block_coords / 2u; - uvec2 subtile_block_coords = block_coords % 2u; + uint2 tile_block_coords = block_coords / 2u; + uint2 subtile_block_coords = block_coords % 2u; uint buffer_pos = u_src_offset; buffer_pos += tile_block_coords.y * u_src_row_stride; buffer_pos += tile_block_coords.x * 4u; @@ -1205,16 +1210,15 @@ static const std::map s_decoding_shader_info{ buffer_pos += subtile_block_coords.x; // Read the entire DXT block to shared memory. - uvec2 raw_data = texelFetch(s_input_buffer, int(buffer_pos)).xy; + uint2 raw_data = texelFetch(s_input_buffer, int(buffer_pos)).xy; shared_temp[block_in_group] = raw_data; } // Ensure store is completed before the remaining threads in the block continue. - memoryBarrierShared(); - barrier(); + GROUP_MEMORY_BARRIER_WITH_SYNC; // Unpack colors and swap BE to LE. - uvec2 raw_data = shared_temp[block_in_group]; + uint2 raw_data = shared_temp[block_in_group]; uint swapped = ((raw_data.x & 0xFF00FF00u) >> 8) | ((raw_data.x & 0x00FF00FFu) << 8); uint c1 = swapped & 0xFFFFu; uint c2 = swapped >> 16; @@ -1230,18 +1234,18 @@ static const std::map s_decoding_shader_info{ // Determine the four colors the block can use. // It's quicker to just precalculate all four colors rather than branching on the index. // NOTE: These must be masked with 0xFF. This is done at the normalization stage below. - uvec4 color0, color1, color2, color3; - color0 = uvec4(red1, green1, blue1, 255u); - color1 = uvec4(red2, green2, blue2, 255u); + uint4 color0, color1, color2, color3; + color0 = uint4(red1, green1, blue1, 255u); + color1 = uint4(red2, green2, blue2, 255u); if (c1 > c2) { - color2 = uvec4(DXTBlend(red2, red1), DXTBlend(green2, green1), DXTBlend(blue2, blue1), 255u); - color3 = uvec4(DXTBlend(red1, red2), DXTBlend(green1, green2), DXTBlend(blue1, blue2), 255u); + color2 = uint4(DXTBlend(red2, red1), DXTBlend(green2, green1), DXTBlend(blue2, blue1), 255u); + color3 = uint4(DXTBlend(red1, red2), DXTBlend(green1, green2), DXTBlend(blue1, blue2), 255u); } else { - color2 = uvec4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 255u); - color3 = uvec4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 0u); + color2 = uint4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 255u); + color3 = uint4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 0u); } // Calculate the texel coordinates that we will write to. @@ -1257,7 +1261,7 @@ static const std::map s_decoding_shader_info{ // Select the un-normalized color from the precalculated color array. // Using a switch statement here removes the need for dynamic indexing of an array. - uvec4 color; + uint4 color; switch (index) { case 0u: color = color0; break; @@ -1268,19 +1272,17 @@ static const std::map s_decoding_shader_info{ } // Normalize and write to the output image. - vec4 norm_color = vec4(color & 0xFFu) / 255.0; - imageStore(output_image, ivec3(ivec2(uvec2(global_x, global_y)), 0), norm_color); + float4 norm_color = float4(color & 0xFFu) / 255.0; + imageStore(output_image, int3(int2(uint2(global_x, global_y)), 0), norm_color); } )"}}, {TextureFormat::C4, - {BUFFER_FORMAT_R8_UINT, static_cast(TexDecoder_GetPaletteSize(TextureFormat::C4)), 8, 8, - false, + {TEXEL_BUFFER_FORMAT_R8_UINT, static_cast(TexDecoder_GetPaletteSize(TextureFormat::C4)), + 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 8x8 blocks, 4 bits per pixel // We need to do the tiling manually here because the texel size is smaller than @@ -1296,58 +1298,52 @@ static const std::map s_decoding_shader_info{ // Select high nibble for odd texels, low for even. uint val = texelFetch(s_input_buffer, int(buffer_pos)).x; uint index = ((coords.x & 1u) == 0u) ? (val >> 4) : (val & 0x0Fu); - vec4 norm_color = GetPaletteColorNormalized(index); - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + float4 norm_color = GetPaletteColorNormalized(index); + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, {TextureFormat::C8, - {BUFFER_FORMAT_R8_UINT, static_cast(TexDecoder_GetPaletteSize(TextureFormat::C8)), 8, 8, - false, + {TEXEL_BUFFER_FORMAT_R8_UINT, static_cast(TexDecoder_GetPaletteSize(TextureFormat::C8)), + 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 8x4 blocks, 8 bits per pixel - uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords); + uint buffer_pos = GetTiledTexelOffset(uint2(8u, 4u), coords); uint index = texelFetch(s_input_buffer, int(buffer_pos)).x; - vec4 norm_color = GetPaletteColorNormalized(index); - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + float4 norm_color = GetPaletteColorNormalized(index); + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, {TextureFormat::C14X2, - {BUFFER_FORMAT_R16_UINT, static_cast(TexDecoder_GetPaletteSize(TextureFormat::C14X2)), 8, - 8, false, + {TEXEL_BUFFER_FORMAT_R16_UINT, + static_cast(TexDecoder_GetPaletteSize(TextureFormat::C14X2)), 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 coords = gl_GlobalInvocationID.xy; + uint2 coords = gl_GlobalInvocationID.xy; // Tiled in 4x4 blocks, 16 bits per pixel - uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords); + uint buffer_pos = GetTiledTexelOffset(uint2(4u, 4u), coords); uint index = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x) & 0x3FFFu; - vec4 norm_color = GetPaletteColorNormalized(index); - imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); + float4 norm_color = GetPaletteColorNormalized(index); + imageStore(output_image, int3(int2(coords), 0), norm_color); } )"}}, // We do the inverse BT.601 conversion for YCbCr to RGB // http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion {TextureFormat::XFB, - {BUFFER_FORMAT_RGBA8_UINT, 0, 8, 8, false, + {TEXEL_BUFFER_FORMAT_RGBA8_UINT, 0, 8, 8, false, R"( - layout(local_size_x = 8, local_size_y = 8) in; - - void main() + DEFINE_MAIN(8, 8) { - uvec2 uv = gl_GlobalInvocationID.xy; + uint2 uv = gl_GlobalInvocationID.xy; int buffer_pos = int(u_src_offset + (uv.y * u_src_row_stride) + (uv.x / 2u)); - vec4 yuyv = vec4(texelFetch(s_input_buffer, buffer_pos)); + float4 yuyv = float4(texelFetch(s_input_buffer, buffer_pos)); float y = mix(yuyv.r, yuyv.b, (uv.x & 1u) == 1u); @@ -1355,33 +1351,21 @@ static const std::map s_decoding_shader_info{ float uComp = yuyv.g - 128.0; float vComp = yuyv.a - 128.0; - vec4 rgb = vec4(yComp + (1.596 * vComp), + float4 rgb = float4(yComp + (1.596 * vComp), yComp - (0.813 * vComp) - (0.391 * uComp), yComp + (2.018 * uComp), 255.0); - vec4 rgba_norm = rgb / 255.0; - imageStore(output_image, ivec3(ivec2(uv), 0), rgba_norm); + float4 rgba_norm = rgb / 255.0; + imageStore(output_image, int3(int2(uv), 0), rgba_norm); } )"}}}; -static const std::array s_buffer_bytes_per_texel = {{ - 1, // BUFFER_FORMAT_R8_UINT - 2, // BUFFER_FORMAT_R16_UINT - 8, // BUFFER_FORMAT_R32G32_UINT - 4, // BUFFER_FORMAT_RGBA8_UINT -}}; - const DecodingShaderInfo* GetDecodingShaderInfo(TextureFormat format) { auto iter = s_decoding_shader_info.find(format); return iter != s_decoding_shader_info.end() ? &iter->second : nullptr; } -u32 GetBytesPerBufferElement(BufferFormat buffer_format) -{ - return s_buffer_bytes_per_texel[buffer_format]; -} - std::pair GetDispatchCount(const DecodingShaderInfo* info, u32 width, u32 height) { // Flatten to a single dimension? @@ -1419,4 +1403,126 @@ std::string GenerateDecodingShader(TextureFormat format, TLUTFormat palette_form return ss.str(); } +std::string GeneratePaletteConversionShader(TLUTFormat palette_format, APIType api_type) +{ + std::stringstream ss; + + ss << R"( +int Convert3To8(int v) +{ + // Swizzle bits: 00000123 -> 12312312 + return (v << 5) | (v << 2) | (v >> 1); +} +int Convert4To8(int v) +{ + // Swizzle bits: 00001234 -> 12341234 + return (v << 4) | v; +} +int Convert5To8(int v) +{ + // Swizzle bits: 00012345 -> 12345123 + return (v << 3) | (v >> 2); +} +int Convert6To8(int v) +{ + // Swizzle bits: 00123456 -> 12345612 + return (v << 2) | (v >> 4); +})"; + + switch (palette_format) + { + case TLUTFormat::IA8: + ss << R"( +float4 DecodePixel(int val) +{ + int i = val & 0xFF; + int a = val >> 8; + return float4(i, i, i, a) / 255.0; +})"; + break; + + case TLUTFormat::RGB565: + ss << R"( +float4 DecodePixel(int val) +{ + int r, g, b, a; + r = Convert5To8((val >> 11) & 0x1f); + g = Convert6To8((val >> 5) & 0x3f); + b = Convert5To8((val) & 0x1f); + a = 0xFF; + return float4(r, g, b, a) / 255.0; +})"; + break; + + case TLUTFormat::RGB5A3: + ss << R"( +float4 DecodePixel(int val) +{ + int r,g,b,a; + if ((val&0x8000) > 0) + { + r=Convert5To8((val>>10) & 0x1f); + g=Convert5To8((val>>5 ) & 0x1f); + b=Convert5To8((val ) & 0x1f); + a=0xFF; + } + else + { + a=Convert3To8((val>>12) & 0x7); + r=Convert4To8((val>>8 ) & 0xf); + g=Convert4To8((val>>4 ) & 0xf); + b=Convert4To8((val ) & 0xf); + } + return float4(r, g, b, a) / 255.0; +})"; + break; + + default: + PanicAlert("Unknown format"); + break; + } + + ss << "\n"; + + if (api_type == APIType::D3D) + { + ss << "Buffer tex0 : register(t0);\n"; + ss << "Texture2DArray tex1 : register(t1);\n"; + ss << "SamplerState samp1 : register(s1);\n"; + ss << "cbuffer PSBlock : register(b0) {\n"; + } + else + { + ss << "TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer samp0;\n"; + ss << "SAMPLER_BINDING(1) uniform sampler2DArray samp1;\n"; + ss << "UBO_BINDING(std140, 1) uniform PSBlock {\n"; + } + + ss << " float multiplier;\n"; + ss << " int texel_buffer_offset;\n"; + ss << "};\n"; + + if (api_type == APIType::D3D) + { + ss << "void main(in float3 v_tex0 : TEXCOORD0, out float4 ocol0 : SV_Target) {\n"; + ss << " int src = int(round(tex1.Sample(samp1, v_tex0).r * multiplier));\n"; + ss << " src = int(tex0.Load(src + texel_buffer_offset).r);\n"; + } + else + { + ss << "VARYING_LOCATION(0) in float3 v_tex0;\n"; + ss << "FRAGMENT_OUTPUT_LOCATION(0) out float4 ocol0;\n"; + ss << "void main() {\n"; + ss << " float3 coords = v_tex0;\n"; + ss << " int src = int(round(texture(samp1, coords).r * multiplier));\n"; + ss << " src = int(texelFetch(samp0, src + texel_buffer_offset).r);\n"; + } + + ss << " src = ((src << 8) & 0xFF00) | (src >> 8);\n"; + ss << " ocol0 = DecodePixel(src);\n"; + ss << "}\n"; + + return ss.str(); +} + } // namespace TextureConversionShaderTiled diff --git a/Source/Core/VideoCommon/TextureConversionShader.h b/Source/Core/VideoCommon/TextureConversionShader.h index 804d59b193..f6c266bd63 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.h +++ b/Source/Core/VideoCommon/TextureConversionShader.h @@ -13,6 +13,7 @@ enum class APIType; enum class TextureFormat; enum class EFBCopyFormat; enum class TLUTFormat; +enum TexelBufferFormat : u32; struct EFBCopyParams; namespace TextureConversionShaderTiled @@ -21,20 +22,10 @@ u16 GetEncodedSampleCount(EFBCopyFormat format); const char* GenerateEncodingShader(const EFBCopyParams& params, APIType ApiType); -// View format of the input data to the texture decoding shader. -enum BufferFormat -{ - BUFFER_FORMAT_R8_UINT, - BUFFER_FORMAT_R16_UINT, - BUFFER_FORMAT_R32G32_UINT, - BUFFER_FORMAT_RGBA8_UINT, - BUFFER_FORMAT_COUNT -}; - // Information required to compile and dispatch a texture decoding shader. struct DecodingShaderInfo { - BufferFormat buffer_format; + TexelBufferFormat buffer_format; u32 palette_size; u32 group_size_x; u32 group_size_y; @@ -46,10 +37,6 @@ struct DecodingShaderInfo // If this format does not have a shader written for it, returns nullptr. const DecodingShaderInfo* GetDecodingShaderInfo(TextureFormat format); -// Determine how many bytes there are in each element of the texel buffer. -// Needed for alignment and stride calculations. -u32 GetBytesPerBufferElement(BufferFormat buffer_format); - // Determine how many thread groups should be dispatched for an image of the specified width/height. // First is the number of X groups, second is the number of Y groups, Z is always one. std::pair GetDispatchCount(const DecodingShaderInfo* info, u32 width, u32 height); @@ -58,4 +45,7 @@ std::pair GetDispatchCount(const DecodingShaderInfo* info, u32 width, std::string GenerateDecodingShader(TextureFormat format, TLUTFormat palette_format, APIType api_type); +// Returns the GLSL string containing the palette conversion shader for the specified format. +std::string GeneratePaletteConversionShader(TLUTFormat palette_format, APIType api_type); + } // namespace TextureConversionShaderTiled diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp index ccb01c20ed..15de7ba9a7 100644 --- a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp +++ b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp @@ -31,72 +31,99 @@ TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_i return out; } -ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) +static void WriteHeader(APIType api_type, ShaderCode& out) { - const bool mono_depth = uid_data->is_depth_copy && g_ActiveConfig.bStereoEFBMonoDepth; - - ShaderCode out; - if (api_type == APIType::OpenGL) + if (api_type == APIType::D3D) { - out.Write("SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n" - "uniform float3 filter_coefficients;\n" - "uniform float gamma_rcp;\n" - "uniform float2 clamp_tb;\n" - "uniform float pixel_height;\n"); - out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n" - " return texture(samp9, float3(uv.x, clamp(uv.y - (y_offset * pixel_height), " - "clamp_tb.x, clamp_tb.y), %s));\n" - "}\n", - mono_depth ? "0.0" : "uv.z"); - out.Write("#define uv0 f_uv0\n" - "in vec3 uv0;\n" - "out vec4 ocol0;\n" - "void main(){\n"); + out.Write("cbuffer PSBlock : register(b0) {\n" + " float2 src_offset, src_size;\n" + " float3 filter_coefficients;\n" + " float gamma_rcp;\n" + " float2 clamp_tb;\n" + " float pixel_height;\n" + "};\n\n"); } - else if (api_type == APIType::Vulkan) + else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) { out.Write("UBO_BINDING(std140, 1) uniform PSBlock {\n" + " float2 src_offset, src_size;\n" " float3 filter_coefficients;\n" " float gamma_rcp;\n" " float2 clamp_tb;\n" " float pixel_height;\n" "};\n"); + } +} + +ShaderCode GenerateVertexShader(APIType api_type) +{ + ShaderCode out; + WriteHeader(api_type, out); + + if (api_type == APIType::D3D) + { + out.Write("void main(in uint id : SV_VertexID, out float3 v_tex0 : TEXCOORD0,\n" + " out float4 opos : SV_Position) {\n"); + } + else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + { + out.Write("VARYING_LOCATION(0) out float3 v_tex0;\n" + "#define id gl_VertexID\n" + "#define opos gl_Position\n" + "void main() {\n"); + } + out.Write(" v_tex0 = float3(float((id << 1) & 2), float(id & 2), 0.0f);\n"); + out.Write( + " opos = float4(v_tex0.xy * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);\n"); + out.Write(" v_tex0 = float3(src_offset + (src_size * v_tex0.xy), 0.0f);\n"); + + // NDC space is flipped in Vulkan + if (api_type == APIType::Vulkan) + out.Write(" opos.y = -opos.y;\n"); + + out.Write("}\n"); + + return out; +} + +ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data) +{ + const bool mono_depth = uid_data->is_depth_copy && g_ActiveConfig.bStereoEFBMonoDepth; + + ShaderCode out; + WriteHeader(api_type, out); + + if (api_type == APIType::D3D) + { + out.Write("Texture2DArray tex0 : register(t0);\n" + "SamplerState samp0 : register(s0);\n" + "float4 SampleEFB(float3 uv, float y_offset) {\n" + " return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " + "clamp_tb.x, clamp_tb.y), %s));\n" + "}\n\n", + mono_depth ? "0.0" : "uv.z"); + out.Write("void main(in float3 v_tex0 : TEXCOORD0, out float4 ocol0 : SV_Target)\n{\n"); + } + else if (api_type == APIType::OpenGL || api_type == APIType::Vulkan) + { out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n" " return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " "clamp_tb.x, clamp_tb.y), %s));\n" "}\n", mono_depth ? "0.0" : "uv.z"); - out.Write("layout(location = 0) in vec3 uv0;\n" - "layout(location = 1) in vec4 col0;\n" - "layout(location = 0) out vec4 ocol0;" - "void main(){\n"); - } - else if (api_type == APIType::D3D) - { - out.Write("Texture2DArray tex0 : register(t0);\n" - "SamplerState samp0 : register(s0);\n" - "uniform float3 filter_coefficients;\n" - "uniform float gamma_rcp;\n" - "uniform float2 clamp_tb;\n" - "uniform float pixel_height;\n\n"); - out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n" - " return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " - "clamp_tb.x, clamp_tb.y), %s));\n" - "}\n", - mono_depth ? "0.0" : "uv.z"); - out.Write("void main(out float4 ocol0 : SV_Target,\n" - " in float4 pos : SV_Position,\n" - " in float3 uv0 : TEXCOORD0) {\n"); + out.Write("VARYING_LOCATION(0) in vec3 v_tex0;\n" + "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;" + "void main()\n{\n"); } // The copy filter applies to both color and depth copies. This has been verified on hardware. // The filter is only applied to the RGB channels, the alpha channel is left intact. if (uid_data->copy_filter) { - out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n" - " float4 current_row = SampleEFB(uv0, 0.0f);\n" - " float4 next_row = SampleEFB(uv0, 1.0f);\n" + out.Write(" float4 prev_row = SampleEFB(v_tex0, -1.0f);\n" + " float4 current_row = SampleEFB(v_tex0, 0.0f);\n" + " float4 next_row = SampleEFB(v_tex0, 1.0f);\n" " float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n" " current_row.rgb * filter_coefficients[1] +\n" " next_row.rgb * filter_coefficients[2], \n" @@ -105,14 +132,14 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) else { out.Write( - " float4 current_row = SampleEFB(uv0, 0.0f);\n" + " float4 current_row = SampleEFB(v_tex0, 0.0f);\n" " float4 texcol = float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n" " current_row.a);\n"); } if (uid_data->is_depth_copy) { - if (api_type == APIType::D3D || api_type == APIType::Vulkan) + if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange) out.Write("texcol.x = 1.0 - texcol.x;\n"); out.Write(" int depth = int(texcol.x * 16777216.0);\n" diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.h b/Source/Core/VideoCommon/TextureConverterShaderGen.h index 1f231f53c3..af8e7af0bc 100644 --- a/Source/Core/VideoCommon/TextureConverterShaderGen.h +++ b/Source/Core/VideoCommon/TextureConverterShaderGen.h @@ -28,7 +28,8 @@ struct UidData using TCShaderUid = ShaderUid; -ShaderCode GenerateShader(APIType api_type, const UidData* uid_data); +ShaderCode GenerateVertexShader(APIType api_type); +ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data); TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity, bool scale_by_half, bool copy_filter); diff --git a/Source/Core/VideoCommon/UberShaderPixel.cpp b/Source/Core/VideoCommon/UberShaderPixel.cpp index 27e69808dc..1b9695f5ce 100644 --- a/Source/Core/VideoCommon/UberShaderPixel.cpp +++ b/Source/Core/VideoCommon/UberShaderPixel.cpp @@ -52,8 +52,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, const bool use_shader_blend = !use_dual_source && host_config.backend_shader_framebuffer_fetch; const bool early_depth = uid_data->early_depth != 0; const bool per_pixel_depth = uid_data->per_pixel_depth != 0; - const bool bounding_box = - host_config.bounding_box && g_ActiveConfig.BBoxUseFragmentShaderImplementation(); + const bool bounding_box = host_config.bounding_box; const u32 numTexgen = uid_data->num_texgens; ShaderCode out; @@ -1058,7 +1057,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, if (host_config.fast_depth_calc) { - if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) + if (!host_config.backend_reversed_depth_range) out.Write(" int zCoord = int((1.0 - rawpos.z) * 16777216.0);\n"); else out.Write(" int zCoord = int(rawpos.z * 16777216.0);\n"); @@ -1113,7 +1112,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config, out.Write(" // If early depth is enabled, write to zbuffer before depth textures\n"); out.Write(" // If early depth isn't enabled, we write to the zbuffer here\n"); out.Write(" int zbuffer_zCoord = bpmem_late_ztest ? zCoord : early_zCoord;\n"); - if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) + if (!host_config.backend_reversed_depth_range) out.Write(" depth = 1.0 - float(zbuffer_zCoord) / 16777216.0;\n"); else out.Write(" depth = float(zbuffer_zCoord) / 16777216.0;\n"); diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index 9cb7d23089..673569f4d9 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -19,6 +19,7 @@ #include "VideoCommon/DataReader.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/NativeVertexFormat.h" +#include "VideoCommon/RenderBase.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/VertexLoaderBase.h" #include "VideoCommon/VertexLoaderManager.h" @@ -131,7 +132,7 @@ NativeVertexFormat* GetOrCreateMatchingFormat(const PortableVertexDeclaration& d auto iter = s_native_vertex_map.find(decl); if (iter == s_native_vertex_map.end()) { - std::unique_ptr fmt = g_vertex_manager->CreateNativeVertexFormat(decl); + std::unique_ptr fmt = g_renderer->CreateNativeVertexFormat(decl); auto ipair = s_native_vertex_map.emplace(decl, std::move(fmt)); iter = ipair.first; } @@ -228,9 +229,7 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal const PortableVertexDeclaration& format = loader->m_native_vtx_decl; std::unique_ptr& native = s_native_vertex_map[format]; if (!native) - { - native = g_vertex_manager->CreateNativeVertexFormat(format); - } + native = g_renderer->CreateNativeVertexFormat(format); loader->m_native_vertex_format = native.get(); } state->vertex_loaders[vtx_attr_group] = loader; diff --git a/Source/Core/VideoCommon/VertexManagerBase.cpp b/Source/Core/VideoCommon/VertexManagerBase.cpp index e50cc1ceed..bbc355a98c 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/VertexManagerBase.cpp @@ -17,8 +17,9 @@ #include "Core/ConfigManager.h" #include "VideoCommon/BPMemory.h" +#include "VideoCommon/BoundingBox.h" #include "VideoCommon/DataReader.h" -#include "VideoCommon/Debugger.h" +#include "VideoCommon/FramebufferManager.h" #include "VideoCommon/GeometryShaderManager.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/NativeVertexFormat.h" @@ -79,11 +80,15 @@ static bool AspectIs16_9(float width, float height) } VertexManagerBase::VertexManagerBase() + : m_cpu_vertex_buffer(MAXVBUFFERSIZE), m_cpu_index_buffer(MAXIBUFFERSIZE) { } -VertexManagerBase::~VertexManagerBase() +VertexManagerBase::~VertexManagerBase() = default; + +bool VertexManagerBase::Initialize() { + return true; } u32 VertexManagerBase::GetRemainingSize() const @@ -94,6 +99,10 @@ u32 VertexManagerBase::GetRemainingSize() const DataReader VertexManagerBase::PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall) { + // Flush all EFB pokes and invalidate the peek cache. + g_framebuffer_manager->InvalidatePeekCache(); + g_framebuffer_manager->FlushEFBPokes(); + // The SSE vertex loader can write up to 4 bytes past the end u32 const needed_vertex_bytes = count * stride + 4; @@ -132,7 +141,18 @@ DataReader VertexManagerBase::PrepareForAdditionalData(int primitive, u32 count, // need to alloc new buffer if (m_is_flushed) { - g_vertex_manager->ResetBuffer(stride, cullall); + if (cullall) + { + // This buffer isn't getting sent to the GPU. Just allocate it on the cpu. + m_cur_buffer_pointer = m_base_buffer_pointer = m_cpu_vertex_buffer.data(); + m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_vertex_buffer.size(); + IndexGenerator::Start(m_cpu_index_buffer.data()); + } + else + { + ResetBuffer(stride); + } + m_is_flushed = false; } @@ -210,6 +230,48 @@ std::pair VertexManagerBase::ResetFlushAspectRatioCount() return val; } +void VertexManagerBase::ResetBuffer(u32 vertex_stride) +{ + m_base_buffer_pointer = m_cpu_vertex_buffer.data(); + m_cur_buffer_pointer = m_cpu_vertex_buffer.data(); + m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_vertex_buffer.size(); + IndexGenerator::Start(m_cpu_index_buffer.data()); +} + +void VertexManagerBase::CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, + u32* out_base_vertex, u32* out_base_index) +{ + *out_base_vertex = 0; + *out_base_index = 0; +} + +void VertexManagerBase::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) +{ + // If bounding box is enabled, we need to flush any changes first, then invalidate what we have. + if (::BoundingBox::active && g_ActiveConfig.bBBoxEnable && + g_ActiveConfig.backend_info.bSupportsBBox) + { + g_renderer->BBoxFlush(); + } + + g_renderer->DrawIndexed(base_index, num_indices, base_vertex); +} + +void VertexManagerBase::UploadUniforms() +{ +} + +void VertexManagerBase::InvalidateConstants() +{ + VertexShaderManager::dirty = true; + GeometryShaderManager::dirty = true; + PixelShaderManager::dirty = true; +} + +void VertexManagerBase::UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) +{ +} + void VertexManagerBase::UploadUtilityVertices(const void* vertices, u32 vertex_stride, u32 num_vertices, const u16* indices, u32 num_indices, u32* out_base_vertex, u32* out_base_index) @@ -218,7 +280,7 @@ void VertexManagerBase::UploadUtilityVertices(const void* vertices, u32 vertex_s ASSERT(m_is_flushed); // Copy into the buffers usually used for GX drawing. - ResetBuffer(std::max(vertex_stride, 1u), false); + ResetBuffer(std::max(vertex_stride, 1u)); if (vertices) { const u32 copy_size = vertex_stride * num_vertices; @@ -232,11 +294,51 @@ void VertexManagerBase::UploadUtilityVertices(const void* vertices, u32 vertex_s CommitBuffer(num_vertices, vertex_stride, num_indices, out_base_vertex, out_base_index); } +u32 VertexManagerBase::GetTexelBufferElementSize(TexelBufferFormat buffer_format) +{ + // R8 - 1, R16 - 2, RGBA8 - 4, R32G32 - 8 + return 1u << static_cast(buffer_format); +} + +bool VertexManagerBase::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset) +{ + return false; +} + +bool VertexManagerBase::UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset, const void* palette_data, + u32 palette_size, TexelBufferFormat palette_format, + u32* palette_offset) +{ + return false; +} + +void VertexManagerBase::LoadTextures() +{ + BitSet32 usedtextures; + for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i) + if (bpmem.tevorders[i / 2].getEnable(i & 1)) + usedtextures[bpmem.tevorders[i / 2].getTexMap(i & 1)] = true; + + if (bpmem.genMode.numindstages > 0) + for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i) + if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) + usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true; + + for (unsigned int i : usedtextures) + g_texture_cache->Load(i); + + g_texture_cache->BindTextures(); +} + void VertexManagerBase::Flush() { if (m_is_flushed) return; + m_is_flushed = true; + // loading a state will invalidate BP, so check for it g_video_backend->CheckInvalidState(); @@ -280,29 +382,6 @@ void VertexManagerBase::Flush() (bpmem.alpha_test.hex >> 16) & 0xff); #endif - // If the primitave is marked CullAll. All we need to do is update the vertex constants and - // calculate the zfreeze refrence slope - if (!m_cull_all) - { - BitSet32 usedtextures; - for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i) - if (bpmem.tevorders[i / 2].getEnable(i & 1)) - usedtextures[bpmem.tevorders[i / 2].getTexMap(i & 1)] = true; - - if (bpmem.genMode.numindstages > 0) - for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i) - if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages) - usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true; - - for (unsigned int i : usedtextures) - g_texture_cache->Load(i); - - g_texture_cache->BindTextures(); - } - - // set global vertex constants - VertexShaderManager::SetConstants(); - // Track some stats used elsewhere by the anamorphic widescreen heuristic. if (!SConfig::GetInstance().bWii) { @@ -322,6 +401,7 @@ void VertexManagerBase::Flush() } // Calculate ZSlope for zfreeze + VertexShaderManager::SetConstants(); if (!bpmem.genMode.zfreeze) { // Must be done after VertexShaderManager::SetConstants() @@ -335,20 +415,24 @@ void VertexManagerBase::Flush() if (!m_cull_all) { - // Update and upload constants. Note for the Vulkan backend, this must occur before the - // vertex/index buffer is committed, otherwise the data will be associated with the - // previous command buffer, instead of the one with the draw if there is an overflow. - GeometryShaderManager::SetConstants(); - PixelShaderManager::SetConstants(); - UploadConstants(); - - // Now the vertices can be flushed to the GPU. + // Now the vertices can be flushed to the GPU. Everything following the CommitBuffer() call + // must be careful to not upload any utility vertices, as the binding will be lost otherwise. const u32 num_indices = IndexGenerator::GetIndexLen(); u32 base_vertex, base_index; CommitBuffer(IndexGenerator::GetNumVerts(), VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(), num_indices, &base_vertex, &base_index); + // Texture loading can cause palettes to be applied (-> uniforms -> draws). + // Palette application does not use vertices, only a full-screen quad, so this is okay. + // Same with GPU texture decoding, which uses compute shaders. + LoadTextures(); + + // Now we can upload uniforms, as nothing else will override them. + GeometryShaderManager::SetConstants(); + PixelShaderManager::SetConstants(); + UploadUniforms(); + // Update the pipeline, or compile one if needed. UpdatePipelineConfig(); UpdatePipelineObject(); @@ -363,18 +447,17 @@ void VertexManagerBase::Flush() if (PerfQueryBase::ShouldEmulate()) g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); + + OnDraw(); } } - GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true); - if (xfmem.numTexGen.numTexGens != bpmem.genMode.numtexgens) + { ERROR_LOG(VIDEO, "xf.numtexgens (%d) does not match bp.numtexgens (%d). Error in command stream.", xfmem.numTexGen.numTexGens, bpmem.genMode.numtexgens.Value()); - - m_is_flushed = true; - m_cull_all = false; + } } void VertexManagerBase::DoState(PointerWrap& p) @@ -577,3 +660,109 @@ void VertexManagerBase::UpdatePipelineObject() break; } } + +void VertexManagerBase::OnDraw() +{ + m_draw_counter++; + + // If we didn't have any CPU access last frame, do nothing. + if (m_scheduled_command_buffer_kicks.empty() || !m_allow_background_execution) + return; + + // Check if this draw is scheduled to kick a command buffer. + // The draw counters will always be sorted so a binary search is possible here. + if (std::binary_search(m_scheduled_command_buffer_kicks.begin(), + m_scheduled_command_buffer_kicks.end(), m_draw_counter)) + { + // Kick a command buffer on the background thread. + g_renderer->Flush(); + } +} + +void VertexManagerBase::OnCPUEFBAccess() +{ + // Check this isn't another access without any draws inbetween. + if (!m_cpu_accesses_this_frame.empty() && m_cpu_accesses_this_frame.back() == m_draw_counter) + return; + + // Store the current draw counter for scheduling in OnEndFrame. + m_cpu_accesses_this_frame.emplace_back(m_draw_counter); +} + +void VertexManagerBase::OnEFBCopyToRAM() +{ + // If we're not deferring, try to preempt it next frame. + if (!g_ActiveConfig.bDeferEFBCopies) + { + OnCPUEFBAccess(); + return; + } + + // Otherwise, only execute if we have at least 10 objects between us and the last copy. + const u32 diff = m_draw_counter - m_last_efb_copy_draw_counter; + m_last_efb_copy_draw_counter = m_draw_counter; + if (diff < MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK) + return; + + g_renderer->Flush(); +} + +void VertexManagerBase::OnEndFrame() +{ + m_draw_counter = 0; + m_last_efb_copy_draw_counter = 0; + m_scheduled_command_buffer_kicks.clear(); + + // If we have no CPU access at all, leave everything in the one command buffer for maximum + // parallelism between CPU/GPU, at the cost of slightly higher latency. + if (m_cpu_accesses_this_frame.empty()) + return; + + // In order to reduce CPU readback latency, we want to kick a command buffer roughly halfway + // between the draw counters that invoked the readback, or every 250 draws, whichever is smaller. + if (g_ActiveConfig.iCommandBufferExecuteInterval > 0) + { + u32 last_draw_counter = 0; + u32 interval = static_cast(g_ActiveConfig.iCommandBufferExecuteInterval); + for (u32 draw_counter : m_cpu_accesses_this_frame) + { + // We don't want to waste executing command buffers for only a few draws, so set a minimum. + // Leave last_draw_counter as-is, so we get the correct number of draws between submissions. + u32 draw_count = draw_counter - last_draw_counter; + if (draw_count < MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK) + continue; + + if (draw_count <= interval) + { + u32 mid_point = draw_count / 2; + m_scheduled_command_buffer_kicks.emplace_back(last_draw_counter + mid_point); + } + else + { + u32 counter = interval; + while (counter < draw_count) + { + m_scheduled_command_buffer_kicks.emplace_back(last_draw_counter + counter); + counter += interval; + } + } + + last_draw_counter = draw_counter; + } + } + +#if 0 + { + std::stringstream ss; + std::for_each(m_cpu_accesses_this_frame.begin(), m_cpu_accesses_this_frame.end(), [&ss](u32 idx) { ss << idx << ","; }); + WARN_LOG(VIDEO, "CPU EFB accesses in last frame: %s", ss.str().c_str()); + } + { + std::stringstream ss; + std::for_each(m_scheduled_command_buffer_kicks.begin(), m_scheduled_command_buffer_kicks.end(), [&ss](u32 idx) { ss << idx << ","; }); + WARN_LOG(VIDEO, "Scheduled command buffer kicks: %s", ss.str().c_str()); + } +#endif + + m_cpu_accesses_this_frame.clear(); +} diff --git a/Source/Core/VideoCommon/VertexManagerBase.h b/Source/Core/VideoCommon/VertexManagerBase.h index cd3e4ed552..9a657bd7f3 100644 --- a/Source/Core/VideoCommon/VertexManagerBase.h +++ b/Source/Core/VideoCommon/VertexManagerBase.h @@ -25,6 +25,16 @@ struct Slope bool dirty; }; +// View format of the input data to the texture decoding shader. +enum TexelBufferFormat : u32 +{ + TEXEL_BUFFER_FORMAT_R8_UINT, + TEXEL_BUFFER_FORMAT_R16_UINT, + TEXEL_BUFFER_FORMAT_RGBA8_UINT, + TEXEL_BUFFER_FORMAT_R32G32_UINT, + NUM_TEXEL_BUFFER_FORMATS +}; + class VertexManagerBase { private: @@ -42,19 +52,24 @@ public: // We may convert triangle-fans to triangle-lists, almost 3x as many indices. static constexpr u32 MAXIBUFFERSIZE = MathUtil::NextPowerOf2(MAX_PRIMITIVES_PER_COMMAND * 3); + // Streaming buffer sizes. + // Texel buffer will fit the maximum size of an encoded GX texture. 1024x1024, RGBA8 = 4MB. + static constexpr u32 VERTEX_STREAM_BUFFER_SIZE = 40 * 1024 * 1024; + static constexpr u32 INDEX_STREAM_BUFFER_SIZE = 4 * 1024 * 1024; + static constexpr u32 UNIFORM_STREAM_BUFFER_SIZE = 16 * 1024 * 1024; + static constexpr u32 TEXEL_STREAM_BUFFER_SIZE = 16 * 1024 * 1024; + VertexManagerBase(); - // needs to be virtual for DX11's dtor virtual ~VertexManagerBase(); + virtual bool Initialize(); + PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; } DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall); void FlushData(u32 count, u32 stride); void Flush(); - virtual std::unique_ptr - CreateNativeVertexFormat(const PortableVertexDeclaration& vtx_decl) = 0; - void DoState(PointerWrap& p); std::pair ResetFlushAspectRatioCount(); @@ -70,38 +85,69 @@ public: } // Utility pipeline drawing (e.g. EFB copies, post-processing, UI). - virtual void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size) = 0; + virtual void UploadUtilityUniforms(const void* uniforms, u32 uniforms_size); void UploadUtilityVertices(const void* vertices, u32 vertex_stride, u32 num_vertices, const u16* indices, u32 num_indices, u32* out_base_vertex, u32* out_base_index); + // Determine how many bytes there are in each element of the texel buffer. + // Needed for alignment and stride calculations. + static u32 GetTexelBufferElementSize(TexelBufferFormat buffer_format); + + // Texel buffer, used for palette conversion. + virtual bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset); + + // The second set of parameters uploads a second blob in the same buffer, used for GPU texture + // decoding for palette textures, as both the texture data and palette must be uploaded. + virtual bool UploadTexelBuffer(const void* data, u32 data_size, TexelBufferFormat format, + u32* out_offset, const void* palette_data, u32 palette_size, + TexelBufferFormat palette_format, u32* out_palette_offset); + + // CPU access tracking - call after a draw call is made. + void OnDraw(); + + // Call after CPU access is requested. + void OnCPUEFBAccess(); + + // Call after an EFB copy to RAM. If true, the current command buffer should be executed. + void OnEFBCopyToRAM(); + + // Call at the end of a frame. + void OnEndFrame(); + protected: - // Vertex buffers/index buffer creation. - virtual void CreateDeviceObjects() {} - virtual void DestroyDeviceObjects() {} + // When utility uniforms are used, the GX uniforms need to be re-written afterwards. + static void InvalidateConstants(); // Prepares the buffer for the next batch of vertices. - virtual void ResetBuffer(u32 vertex_stride, bool cull_all) = 0; + virtual void ResetBuffer(u32 vertex_stride); // Commits/uploads the current batch of vertices. virtual void CommitBuffer(u32 num_vertices, u32 vertex_stride, u32 num_indices, - u32* out_base_vertex, u32* out_base_index) = 0; + u32* out_base_vertex, u32* out_base_index); // Uploads uniform buffers for GX draws. - virtual void UploadConstants() = 0; + virtual void UploadUniforms(); // Issues the draw call for the current batch in the backend. - virtual void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex) = 0; + virtual void DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_vertex); + + u32 GetRemainingSize() const; + static u32 GetRemainingIndices(int primitive); + + void CalculateZSlope(NativeVertexFormat* format); + void LoadTextures(); u8* m_cur_buffer_pointer = nullptr; u8* m_base_buffer_pointer = nullptr; u8* m_end_buffer_pointer = nullptr; - u32 GetRemainingSize() const; - static u32 GetRemainingIndices(int primitive); + // Alternative buffers in CPU memory for primitives we are going to discard. + std::vector m_cpu_vertex_buffer; + std::vector m_cpu_index_buffer; Slope m_zslope = {}; - void CalculateZSlope(NativeVertexFormat* format); VideoCommon::GXPipelineUid m_current_pipeline_config; VideoCommon::GXUberPipelineUid m_current_uber_pipeline_config; @@ -114,12 +160,22 @@ protected: bool m_cull_all = false; private: + // Minimum number of draws per command buffer when attempting to preempt a readback operation. + static constexpr u32 MINIMUM_DRAW_CALLS_PER_COMMAND_BUFFER_FOR_READBACK = 10; + + void UpdatePipelineConfig(); + void UpdatePipelineObject(); + bool m_is_flushed = true; size_t m_flush_count_4_3 = 0; size_t m_flush_count_anamorphic = 0; - void UpdatePipelineConfig(); - void UpdatePipelineObject(); + // CPU access tracking + u32 m_draw_counter = 0; + u32 m_last_efb_copy_draw_counter = 0; + std::vector m_cpu_accesses_this_frame; + std::vector m_scheduled_command_buffer_kicks; + bool m_allow_background_execution = true; }; extern std::unique_ptr g_vertex_manager; diff --git a/Source/Core/VideoCommon/VideoBackendBase.cpp b/Source/Core/VideoCommon/VideoBackendBase.cpp index aa8b3d110f..268a3b70b7 100644 --- a/Source/Core/VideoCommon/VideoBackendBase.cpp +++ b/Source/Core/VideoCommon/VideoBackendBase.cpp @@ -282,7 +282,6 @@ void VideoBackendBase::InitializeShared() m_initialized = true; m_invalid = false; - frameCount = 0; CommandProcessor::Init(); Fifo::Init(); diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj b/Source/Core/VideoCommon/VideoCommon.vcxproj index e2a8d60b1b..f4ab78c032 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj @@ -48,11 +48,11 @@ - - + + @@ -114,11 +114,11 @@ - - + + diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters index 3ce3543380..f804839f54 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters @@ -29,12 +29,6 @@ - - Base - - - Base - Base @@ -197,6 +191,12 @@ Shader Generators + + Shader Generators + + + Base + @@ -206,12 +206,6 @@ - - Base - - - Base - Base @@ -374,7 +368,6 @@ Base - Base @@ -384,6 +377,15 @@ Shader Generators + + Shader Generators + + + Base + + + Shader Generators + diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index b7b8b3203f..6207521769 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -149,8 +149,6 @@ void VideoConfig::Refresh() bEFBAccessEnable = Config::Get(Config::GFX_HACK_EFB_ACCESS_ENABLE); bBBoxEnable = Config::Get(Config::GFX_HACK_BBOX_ENABLE); - bBBoxPreferStencilImplementation = - Config::Get(Config::GFX_HACK_BBOX_PREFER_STENCIL_IMPLEMENTATION); bForceProgressive = Config::Get(Config::GFX_HACK_FORCE_PROGRESSIVE); bSkipEFBCopyToRam = Config::Get(Config::GFX_HACK_SKIP_EFB_COPY_TO_RAM); bSkipXFBCopyToRam = Config::Get(Config::GFX_HACK_SKIP_XFB_COPY_TO_RAM); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index eace03b4a2..9271762015 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -114,7 +114,6 @@ struct VideoConfig final bool bEFBAccessEnable; bool bPerfQueriesEnable; bool bBBoxEnable; - bool bBBoxPreferStencilImplementation; // OpenGL-only, to see how slow it is compared to SSBOs bool bForceProgressive; bool bEFBEmulateFormatChanges; @@ -186,6 +185,7 @@ struct VideoConfig final std::string AdapterName; // for OpenGL u32 MaxTextureSize; + bool bUsesLowerLeftOrigin; bool bSupportsExclusiveFullscreen; bool bSupportsDualSourceBlend; @@ -215,6 +215,7 @@ struct VideoConfig final bool bSupportsBPTCTextures; bool bSupportsFramebufferFetch; // Used as an alternative to dual-source blend on GLES bool bSupportsBackgroundCompiling; + bool bSupportsLargePoints; } backend_info; // Utility @@ -223,12 +224,6 @@ struct VideoConfig final { return backend_info.bSupportsExclusiveFullscreen && !bBorderlessFullscreen; } - bool BBoxUseFragmentShaderImplementation() const - { - if (backend_info.api_type == APIType::OpenGL && bBBoxPreferStencilImplementation) - return false; - return backend_info.bSupportsBBox && backend_info.bSupportsFragmentStoresAndAtomics; - } bool UseGPUTextureDecoding() const { return backend_info.bSupportsGPUTextureDecoding && bEnableGPUTextureDecoding;