From 4c35f3dcd41903878323b9c835eea1943abfa773 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Mon, 7 Aug 2023 22:19:46 +1000 Subject: [PATCH] Reimplement adaptive downsampling --- src/core/gpu/d3d11_device.cpp | 7 +- src/core/gpu/gpu_device.h | 2 + src/core/gpu_hw.cpp | 307 +++++++++++++++------------------- src/core/gpu_hw.h | 30 ++-- src/core/gpu_hw_shadergen.cpp | 45 +++-- src/core/gpu_hw_shadergen.h | 2 + src/core/shadergen.cpp | 2 +- 7 files changed, 186 insertions(+), 209 deletions(-) diff --git a/src/core/gpu/d3d11_device.cpp b/src/core/gpu/d3d11_device.cpp index e4b790b02..b93dd0e6a 100644 --- a/src/core/gpu/d3d11_device.cpp +++ b/src/core/gpu/d3d11_device.cpp @@ -43,8 +43,7 @@ static void SetD3DDebugObjectName(ID3D11DeviceChild* obj, const std::string_view if (SUCCEEDED(hr) && existing_data_size > 0) return; - const std::wstring wname = StringUtil::UTF8StringToWideString(name); - obj->SetPrivateData(guid, static_cast(wname.length()) * 2u, wname.c_str()); + obj->SetPrivateData(guid, static_cast(name.length()), name.data()); #endif } @@ -478,10 +477,6 @@ bool D3D11Device::CreateDevice(const std::string_view& adapter) if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateSwapChain()) return false; - // Render a frame as soon as possible to clear out whatever was previously being displayed. - m_context->ClearRenderTargetView(m_swap_chain_rtv.Get(), s_clear_color.data()); - m_swap_chain->Present(0, m_using_allow_tearing ? DXGI_PRESENT_ALLOW_TEARING : 0); - if (!CreateBuffers()) return false; diff --git a/src/core/gpu/gpu_device.h b/src/core/gpu/gpu_device.h index 8f1149176..ff9357e7d 100644 --- a/src/core/gpu/gpu_device.h +++ b/src/core/gpu/gpu_device.h @@ -74,6 +74,8 @@ public: union Config { + static constexpr u8 LOD_MAX = 15; + BitField min_filter; BitField mag_filter; BitField mip_filter; diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 3e1845a69..2208e30f2 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -116,7 +116,6 @@ bool GPU_HW::Initialize() m_max_resolution_scale = g_gpu_device->GetMaxTextureSize() / VRAM_WIDTH; m_supports_dual_source_blend = features.dual_source_blend; m_supports_per_sample_shading = features.per_sample_shading; - m_supports_adaptive_downsampling = features.mipmapped_render_targets; m_supports_disable_color_perspective = features.noperspective_interpolation; m_resolution_scale = CalculateResolutionScale(); @@ -148,14 +147,6 @@ bool GPU_HW::Initialize() Settings::GetTextureFilterDisplayName(m_texture_filtering)); m_texture_filtering = GPUTextureFilter::Nearest; } - if (!m_supports_adaptive_downsampling && g_settings.gpu_resolution_scale > 1 && - g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive) - { - Host::AddOSDMessage( - Host::TranslateStdString( - "OSDMessage", "Adaptive downsampling is not supported with the current renderer, using box filter instead."), - 20.0f); - } if (!m_supports_disable_color_perspective && !ShouldDisableColorPerspective()) Log_WarningPrint("Disable color perspective not supported, but should be used."); @@ -382,8 +373,7 @@ u32 GPU_HW::CalculateResolutionScale() const scale = static_cast(std::clamp(preferred_scale, 1, m_max_resolution_scale)); } - if (g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive && m_supports_adaptive_downsampling && scale > 1 && - !Common::IsPow2(scale)) + if (g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive && scale > 1 && !Common::IsPow2(scale)) { const u32 new_scale = Common::PreviousPow2(scale); Log_InfoPrintf("Resolution scale %ux not supported for adaptive smoothing, using %ux", scale, new_scale); @@ -412,13 +402,7 @@ void GPU_HW::UpdateResolutionScale() GPUDownsampleMode GPU_HW::GetDownsampleMode(u32 resolution_scale) const { - if (resolution_scale == 1) - return GPUDownsampleMode::Disabled; - - if (g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive) - return m_supports_adaptive_downsampling ? GPUDownsampleMode::Adaptive : GPUDownsampleMode::Box; - - return g_settings.gpu_downsample_mode; + return (resolution_scale == 1) ? GPUDownsampleMode::Disabled : g_settings.gpu_downsample_mode; } std::tuple GPU_HW::GetEffectiveDisplayResolution(bool scaled /* = true */) @@ -499,46 +483,28 @@ bool GPU_HW::CreateBuffers() Log_InfoPrintf("Created HW framebuffer of %ux%u", texture_width, texture_height); -#if 0 if (m_downsample_mode == GPUDownsampleMode::Adaptive) { const u32 levels = GetAdaptiveDownsamplingMipLevels(); - if (!m_downsample_texture.Create(m_device.Get(), texture_width, texture_height, 1, static_cast(levels), 1, - GPUTexture::Type::RenderTarget, texture_format) || - !m_downsample_weight_texture.Create(m_device.Get(), texture_width >> (levels - 1), - texture_height >> (levels - 1), 1, 1, 1, GPUTexture::Type::RenderTarget, - GPUTexture::Format::R8)) + if (!(m_downsample_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, levels, 1, + GPUTexture::Type::Texture, VRAM_RT_FORMAT)) || + !(m_downsample_render_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, 1, 1, + GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) || + !(m_downsample_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_render_texture.get())) || + !(m_downsample_weight_texture = + g_gpu_device->CreateTexture(texture_width >> (levels - 1), texture_height >> (levels - 1), 1, 1, 1, + GPUTexture::Type::RenderTarget, GPUTexture::Format::R8)) || + !(m_downsample_weight_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_weight_texture.get()))) { return false; } - - m_downsample_mip_views.resize(levels); - for (u32 i = 0; i < levels; i++) - { - const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(m_downsample_texture, D3D11_SRV_DIMENSION_TEXTURE2D, - m_downsample_texture.GetDXGIFormat(), i, 1); - const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(m_downsample_texture, D3D11_RTV_DIMENSION_TEXTURE2D, - m_downsample_texture.GetDXGIFormat(), i, 1); - - HRESULT hr = m_device->CreateShaderResourceView(m_downsample_texture, &srv_desc, - m_downsample_mip_views[i].first.GetAddressOf()); - if (FAILED(hr)) - return false; - - hr = m_device->CreateRenderTargetView(m_downsample_texture, &rtv_desc, - m_downsample_mip_views[i].second.GetAddressOf()); - if (FAILED(hr)) - return false; - } } - else -#endif - if (m_downsample_mode == GPUDownsampleMode::Box) + else if (m_downsample_mode == GPUDownsampleMode::Box) { - if (!(m_downsample_texture = g_gpu_device->CreateTexture(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, 1, - GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) || - !(m_downsample_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_texture.get()))) + if (!(m_downsample_render_texture = g_gpu_device->CreateTexture(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, 1, + GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) || + !(m_downsample_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_render_texture.get()))) { return false; } @@ -561,7 +527,10 @@ void GPU_HW::ClearFramebuffer() void GPU_HW::DestroyBuffers() { m_vram_upload_buffer.reset(); + m_downsample_weight_framebuffer.reset(); + m_downsample_weight_texture.reset(); m_downsample_framebuffer.reset(); + m_downsample_render_texture.reset(); m_downsample_texture.reset(); m_display_framebuffer.reset(); m_vram_readback_framebuffer.reset(); @@ -759,6 +728,8 @@ bool GPU_HW::CompilePipelines() progress.Increment(); // common state + plconfig.input_layout.vertex_attributes = {}; + plconfig.input_layout.vertex_stride = 0; plconfig.layout = GPUPipeline::Layout::SingleTexturePushConstants; plconfig.per_sample_shading = false; plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); @@ -909,77 +880,65 @@ bool GPU_HW::CompilePipelines() return false; } -#if 0 if (m_downsample_mode == GPUDownsampleMode::Adaptive) { - gpbuilder.Clear(); - gpbuilder.SetRenderPass(m_downsample_render_pass, 0); - gpbuilder.SetPipelineLayout(m_downsample_pipeline_layout); - gpbuilder.SetVertexShader(uv_quad_vertex_shader); - gpbuilder.SetNoCullRasterizationState(); - gpbuilder.SetNoDepthTestState(); - gpbuilder.SetNoBlendingState(); - gpbuilder.SetDynamicViewportAndScissorState(); - - std::unique_ptr fs = g_host_display->CreateShaderFromSource( - GPUShader::Stage::Pixel, shadergen.GenerateAdaptiveDownsampleMipFragmentShader(true)); - if (fs == VK_NULL_HANDLE) + std::unique_ptr vs = + g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GenerateAdaptiveDownsampleVertexShader()); + std::unique_ptr fs = + g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateAdaptiveDownsampleMipFragmentShader(true)); + if (!vs || !fs) return false; - - gpbuilder.SetFragmentShader(fs); - m_downsample_first_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false); - vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr); - if (m_downsample_first_pass_pipeline == VK_NULL_HANDLE) + GL_OBJECT_NAME(fs, "Downsample Vertex Shader"); + GL_OBJECT_NAME(fs, "Downsample First Pass Fragment Shader"); + plconfig.vertex_shader = vs.get(); + plconfig.fragment_shader = fs.get(); + if (!(m_downsample_first_pass_pipeline = g_gpu_device->CreatePipeline(plconfig))) return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_first_pass_pipeline, - "Downsample First Pass Pipeline"); + GL_OBJECT_NAME(m_downsample_first_pass_pipeline, "Downsample First Pass Pipeline"); - fs = g_host_display->CreateShaderFromSource(GPUShader::Stage::Pixel, - shadergen.GenerateAdaptiveDownsampleMipFragmentShader(false)); - if (fs == VK_NULL_HANDLE) + fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, + shadergen.GenerateAdaptiveDownsampleMipFragmentShader(false)); + if (!fs) return false; - - gpbuilder.SetFragmentShader(fs); - m_downsample_mid_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false); - vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr); - if (m_downsample_mid_pass_pipeline == VK_NULL_HANDLE) + GL_OBJECT_NAME(fs, "Downsample Mid Pass Fragment Shader"); + plconfig.fragment_shader = fs.get(); + if (!(m_downsample_mid_pass_pipeline = g_gpu_device->CreatePipeline(plconfig))) return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_mid_pass_pipeline, - "Downsample Mid Pass Pipeline"); + GL_OBJECT_NAME(m_downsample_mid_pass_pipeline, "Downsample Mid Pass Pipeline"); - fs = g_host_display->CreateShaderFromSource(GPUShader::Stage::Pixel, - shadergen.GenerateAdaptiveDownsampleBlurFragmentShader()); - if (fs == VK_NULL_HANDLE) + fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateAdaptiveDownsampleBlurFragmentShader()); + if (!fs) return false; - - gpbuilder.SetFragmentShader(fs); - gpbuilder.SetRenderPass(m_downsample_weight_render_pass, 0); - m_downsample_blur_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false); - vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr); - if (m_downsample_blur_pass_pipeline == VK_NULL_HANDLE) + GL_OBJECT_NAME(fs, "Downsample Blur Pass Fragment Shader"); + plconfig.fragment_shader = fs.get(); + plconfig.color_format = GPUTexture::Format::R8; + if (!(m_downsample_blur_pass_pipeline = g_gpu_device->CreatePipeline(plconfig))) return false; - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_blur_pass_pipeline, - "Downsample Blur Pass Pipeline"); + GL_OBJECT_NAME(m_downsample_blur_pass_pipeline, "Downsample Blur Pass Pipeline"); - fs = g_host_display->CreateShaderFromSource(GPUShader::Stage::Pixel, - shadergen.GenerateAdaptiveDownsampleCompositeFragmentShader()); - if (fs == VK_NULL_HANDLE) + fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, + shadergen.GenerateAdaptiveDownsampleCompositeFragmentShader()); + if (!fs) return false; - - gpbuilder.SetFragmentShader(fs); - gpbuilder.SetPipelineLayout(m_downsample_composite_pipeline_layout); - gpbuilder.SetRenderPass(m_display_load_render_pass, 0); - m_downsample_composite_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false); - vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr); - if (m_downsample_composite_pass_pipeline == VK_NULL_HANDLE) + GL_OBJECT_NAME(fs, "Downsample Composite Pass Fragment Shader"); + plconfig.fragment_shader = fs.get(); + plconfig.color_format = VRAM_RT_FORMAT; + if (!(m_downsample_composite_pass_pipeline = g_gpu_device->CreatePipeline(plconfig))) return false; + GL_OBJECT_NAME(m_downsample_composite_pass_pipeline, "Downsample Blur Pass Pipeline"); - Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_composite_pass_pipeline, - "Downsample Composite Pass Pipeline"); + GPUSampler::Config config = GPUSampler::GetLinearConfig(); + config.min_lod = 0; + config.max_lod = GPUSampler::Config::LOD_MAX; + if (!(m_downsample_lod_sampler = g_gpu_device->CreateSampler(config))) + return false; + GL_OBJECT_NAME(m_downsample_lod_sampler, "Downsample LOD Sampler"); + config.mip_filter = GPUSampler::Filter::Linear; + if (!(m_downsample_composite_sampler = g_gpu_device->CreateSampler(config))) + return false; + GL_OBJECT_NAME(m_downsample_composite_sampler, "Downsample Trilinear Sampler"); } - else -#endif - if (m_downsample_mode == GPUDownsampleMode::Box) + else if (m_downsample_mode == GPUDownsampleMode::Box) { std::unique_ptr fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateBoxSampleDownsampleFragmentShader()); @@ -1023,6 +982,7 @@ void GPU_HW::DestroyPipelines() destroy(m_downsample_mid_pass_pipeline); destroy(m_downsample_blur_pass_pipeline); destroy(m_downsample_composite_pass_pipeline); + m_downsample_composite_sampler.reset(); m_display_pipelines.enumerate(destroy); } @@ -1283,23 +1243,6 @@ u32 GPU_HW::GetAdaptiveDownsamplingMipLevels() const return levels; } -GPU_HW::SmoothingUBOData GPU_HW::GetSmoothingUBO(u32 level, u32 left, u32 top, u32 width, u32 height, u32 tex_width, - u32 tex_height) const -{ - const float rcp_width = 1.0f / static_cast(tex_width >> level); - const float rcp_height = 1.0f / static_cast(tex_height >> level); - - SmoothingUBOData data; - data.min_uv[0] = static_cast(left >> level) * rcp_width; - data.min_uv[1] = static_cast(top >> level) * rcp_height; - data.max_uv[0] = static_cast((left + width) >> level) * rcp_width; - data.max_uv[1] = static_cast((top + height) >> level) * rcp_height; - data.rcp_size[0] = rcp_width; - data.rcp_size[1] = rcp_height; - - return data; -} - void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth) { const float dx = x1 - x0; @@ -2476,75 +2419,97 @@ void GPU_HW::DownsampleFramebuffer(GPUTexture* source, u32 left, u32 top, u32 wi void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top, u32 width, u32 height) { -#if 0 - CD3D11_BOX src_box(left, top, 0, left + width, top + height, 1); - m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); - m_context->OMSetBlendState(m_blend_disabled_state.Get(), nullptr, 0xFFFFFFFFu); - m_context->CopySubresourceRegion(m_downsample_texture, 0, left, top, 0, source->GetD3DTexture(), 0, &src_box); - m_context->PSSetSamplers(0, 1, m_point_sampler_state.GetAddressOf()); - m_context->VSSetShader(m_uv_quad_vertex_shader.Get(), nullptr, 0); + GL_PUSH("DownsampleFramebufferAdaptive (%u,%u => %u,%d)", left, top, left + width, left + height); + + struct SmoothingUBOData + { + float min_uv[2]; + float max_uv[2]; + float rcp_size[2]; + float lod; + }; + + g_gpu_device->CopyTextureRegion(m_downsample_texture.get(), 0, 0, 0, 0, source, left, top, 0, 0, width, height); + g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), m_downsample_lod_sampler.get()); + + const u32 levels = m_downsample_texture->GetLevels(); + SmoothingUBOData uniforms; // create mip chain - const u32 levels = m_downsample_texture.GetLevels(); for (u32 level = 1; level < levels; level++) { - static constexpr float clear_color[4] = {}; + GL_SCOPE("Create miplevel %u", level); - SetViewportAndScissor(left >> level, top >> level, width >> level, height >> level); - m_context->ClearRenderTargetView(m_downsample_mip_views[level].second.Get(), clear_color); - m_context->OMSetRenderTargets(1, m_downsample_mip_views[level].second.GetAddressOf(), nullptr); - m_context->PSSetShaderResources(0, 1, m_downsample_mip_views[level - 1].first.GetAddressOf()); + const u32 level_width = width >> level; + const u32 level_height = height >> level; + const float rcp_width = 1.0f / static_cast(m_downsample_texture->GetMipWidth(level)); + const float rcp_height = 1.0f / static_cast(m_downsample_texture->GetMipHeight(level)); + uniforms.min_uv[0] = 0.0f; + uniforms.min_uv[1] = 0.0f; + uniforms.max_uv[0] = static_cast(level_width) * rcp_width; + uniforms.max_uv[1] = static_cast(level_height) * rcp_height; + uniforms.rcp_size[0] = rcp_width; + uniforms.rcp_size[1] = rcp_height; + uniforms.lod = static_cast(level - 1); - const SmoothingUBOData ubo = GetSmoothingUBO(level, left, top, width, height, m_downsample_texture.GetWidth(), - m_downsample_texture.GetHeight()); - m_context->PSSetShader( - (level == 1) ? m_downsample_first_pass_pixel_shader.Get() : m_downsample_mid_pass_pixel_shader.Get(), nullptr, 0); - UploadUniformBuffer(&ubo, sizeof(ubo)); - m_context->Draw(3, 0); + g_gpu_device->InvalidateRenderTarget(m_downsample_render_texture.get()); + g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get()); + g_gpu_device->SetViewportAndScissor(0, 0, level_width, level_height); + g_gpu_device->SetPipeline((level == 1) ? m_downsample_first_pass_pipeline.get() : + m_downsample_mid_pass_pipeline.get()); + g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); + g_gpu_device->Draw(3, 0); + g_gpu_device->CopyTextureRegion(m_downsample_texture.get(), 0, 0, 0, level, m_downsample_render_texture.get(), 0, 0, + 0, 0, level_width, level_height); } // blur pass at lowest level { + GL_SCOPE("Blur"); + const u32 last_level = levels - 1; - static constexpr float clear_color[4] = {}; + const u32 last_width = width >> last_level; + const u32 last_height = height >> last_level; + const float rcp_width = 1.0f / static_cast(m_downsample_render_texture->GetWidth()); + const float rcp_height = 1.0f / static_cast(m_downsample_render_texture->GetHeight()); + uniforms.min_uv[0] = 0.0f; + uniforms.min_uv[1] = 0.0f; + uniforms.max_uv[0] = static_cast(last_width) * rcp_width; + uniforms.max_uv[1] = static_cast(last_height) * rcp_height; + uniforms.rcp_size[0] = rcp_width; + uniforms.rcp_size[1] = rcp_height; + uniforms.lod = 0.0f; - SetViewportAndScissor(left >> last_level, top >> last_level, width >> last_level, height >> last_level); - m_context->ClearRenderTargetView(m_downsample_weight_texture.GetD3DRTV(), clear_color); - m_context->OMSetRenderTargets(1, m_downsample_weight_texture.GetD3DRTVArray(), nullptr); - m_context->PSSetShaderResources(0, 1, m_downsample_mip_views.back().first.GetAddressOf()); - m_context->PSSetShader(m_downsample_blur_pass_pixel_shader.Get(), nullptr, 0); - - const SmoothingUBOData ubo = GetSmoothingUBO(last_level, left, top, width, height, m_downsample_texture.GetWidth(), - m_downsample_texture.GetHeight()); - m_context->PSSetShader(m_downsample_blur_pass_pixel_shader.Get(), nullptr, 0); - UploadUniformBuffer(&ubo, sizeof(ubo)); - m_context->Draw(3, 0); + m_downsample_render_texture->MakeReadyForSampling(); + g_gpu_device->InvalidateRenderTarget(m_downsample_weight_texture.get()); + g_gpu_device->SetFramebuffer(m_downsample_weight_framebuffer.get()); + g_gpu_device->SetTextureSampler(0, m_downsample_render_texture.get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetViewportAndScissor(0, 0, last_width, last_height); + g_gpu_device->SetPipeline(m_downsample_blur_pass_pipeline.get()); + g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); + g_gpu_device->Draw(3, 0); + m_downsample_weight_texture->MakeReadyForSampling(); } // composite downsampled and upsampled images together { - SetViewportAndScissor(left, top, width, height); - m_context->OMSetRenderTargets(1, GetDisplayTexture()->GetD3DRTVArray(), nullptr); + GL_SCOPE("Composite"); - ID3D11ShaderResourceView* const srvs[2] = {m_downsample_texture.GetD3DSRV(), - m_downsample_weight_texture.GetD3DSRV()}; - ID3D11SamplerState* const samplers[2] = {m_trilinear_sampler_state.Get(), m_linear_sampler_state.Get()}; - m_context->PSSetShaderResources(0, countof(srvs), srvs); - m_context->PSSetSamplers(0, countof(samplers), samplers); - m_context->PSSetShader(m_downsample_composite_pixel_shader.Get(), nullptr, 0); - m_context->Draw(3, 0); + g_gpu_device->InvalidateRenderTarget(m_downsample_render_texture.get()); + g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get()); + g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), m_downsample_composite_sampler.get()); + g_gpu_device->SetTextureSampler(1, m_downsample_weight_texture.get(), m_downsample_lod_sampler.get()); + g_gpu_device->SetViewportAndScissor(0, 0, width, height); + g_gpu_device->SetPipeline(m_downsample_composite_pass_pipeline.get()); + g_gpu_device->Draw(3, 0); + m_downsample_render_texture->MakeReadyForSampling(); } - ID3D11ShaderResourceView* const null_srvs[2] = {}; - m_context->PSSetShaderResources(0, countof(null_srvs), null_srvs); - m_batch_ubo_dirty = true; + GL_POP(); RestoreGraphicsAPIState(); - g_host_display->SetDisplayTexture(m_display_texture.get(), left, top, width, height); -#else - Panic("Not implemented"); -#endif + g_gpu_device->SetDisplayTexture(m_downsample_render_texture.get(), 0, 0, width, height); } void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height) @@ -2556,7 +2521,7 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to source->MakeReadyForSampling(); - g_gpu_device->ClearRenderTarget(m_downsample_texture.get(), 0); + g_gpu_device->ClearRenderTarget(m_downsample_render_texture.get(), 0); g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get()); g_gpu_device->SetPipeline(m_downsample_first_pass_pipeline.get()); g_gpu_device->SetTextureSampler(0, source, g_gpu_device->GetNearestSampler()); @@ -2565,7 +2530,7 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to RestoreGraphicsAPIState(); - g_gpu_device->SetDisplayTexture(m_downsample_texture.get(), ds_left, ds_top, ds_width, ds_height); + g_gpu_device->SetDisplayTexture(m_downsample_render_texture.get(), ds_left, ds_top, ds_width, ds_height); } void GPU_HW::DrawRendererStats(bool is_idle_frame) diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index b25b2dbe7..3a5ea09ab 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -341,21 +341,9 @@ protected: void SetBatchDepthBuffer(bool enabled); void CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices); - /// UBO data for adaptive smoothing. - struct SmoothingUBOData - { - float min_uv[2]; - float max_uv[2]; - float rcp_size[2]; - }; - /// Returns the number of mipmap levels used for adaptive smoothing. u32 GetAdaptiveDownsamplingMipLevels() const; - /// Returns the UBO data for an adaptive smoothing pass. - SmoothingUBOData GetSmoothingUBO(u32 level, u32 left, u32 top, u32 width, u32 height, u32 tex_width, - u32 tex_height) const; - void DownsampleFramebuffer(GPUTexture* source, u32 left, u32 top, u32 width, u32 height); void DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top, u32 width, u32 height); void DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height); @@ -397,12 +385,11 @@ protected: { BitField m_supports_per_sample_shading; BitField m_supports_dual_source_blend; - BitField m_supports_adaptive_downsampling; - BitField m_supports_disable_color_perspective; - BitField m_per_sample_shading; - BitField m_scaled_dithering; - BitField m_chroma_smoothing; - BitField m_disable_color_perspective; + BitField m_supports_disable_color_perspective; + BitField m_per_sample_shading; + BitField m_scaled_dithering; + BitField m_chroma_smoothing; + BitField m_disable_color_perspective; u8 bits = 0; }; @@ -441,13 +428,16 @@ protected: std::unique_ptr m_copy_pipeline; std::unique_ptr m_downsample_texture; + std::unique_ptr m_downsample_render_texture; std::unique_ptr m_downsample_framebuffer; - // std::unique_ptr m_downsample_weight_texture; - // std::unique_ptr m_downsample_weight_framebuffer; + std::unique_ptr m_downsample_weight_texture; + std::unique_ptr m_downsample_weight_framebuffer; std::unique_ptr m_downsample_first_pass_pipeline; std::unique_ptr m_downsample_mid_pass_pipeline; std::unique_ptr m_downsample_blur_pass_pipeline; std::unique_ptr m_downsample_composite_pass_pipeline; + std::unique_ptr m_downsample_lod_sampler; + std::unique_ptr m_downsample_composite_sampler; // Statistics RendererStats m_renderer_stats = {}; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 4e2380308..4acb72713 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1329,13 +1329,37 @@ std::string GPU_HW_ShaderGen::GenerateVRAMUpdateDepthFragmentShader() return ss.str(); } +void GPU_HW_ShaderGen::WriteAdaptiveDownsampleUniformBuffer(std::stringstream& ss) +{ + DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_rcp_resolution", "float u_lod"}, true); +} + +std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleVertexShader() +{ + std::stringstream ss; + WriteHeader(ss); + WriteAdaptiveDownsampleUniformBuffer(ss); + DeclareVertexEntryPoint(ss, {}, 0, 1, {}, true); + ss << R"( +{ + v_tex0 = float2(float((v_id << 1) & 2u), float(v_id & 2u)); + v_pos = float4(v_tex0 * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f); + v_tex0 = u_uv_min + (u_uv_max - u_uv_min) * v_tex0; + #if API_OPENGL || API_OPENGL_ES || API_VULKAN + v_pos.y = -v_pos.y; + #endif +} +)"; + return ss.str(); +} + std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass) { std::stringstream ss; WriteHeader(ss); WriteCommonFunctions(ss); + WriteAdaptiveDownsampleUniformBuffer(ss); DeclareTexture(ss, "samp0", 0, false); - DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_rcp_resolution"}, true); DefineMacro(ss, "FIRST_PASS", first_pass); // mipmap_energy.glsl ported from parallel-rsx. @@ -1370,16 +1394,16 @@ float4 get_bias(float4 c00, float4 c01, float4 c10, float4 c11) { float2 uv = v_tex0 - (u_rcp_resolution * 0.25); #ifdef FIRST_PASS - vec3 c00 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(0, 0)).rgb; - vec3 c01 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(0, 1)).rgb; - vec3 c10 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(1, 0)).rgb; - vec3 c11 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(1, 1)).rgb; + vec3 c00 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 0)).rgb; + vec3 c01 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 1)).rgb; + vec3 c10 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 0)).rgb; + vec3 c11 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 1)).rgb; o_col0 = get_bias(c00, c01, c10, c11); #else - vec4 c00 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(0, 0)); - vec4 c01 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(0, 1)); - vec4 c10 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(1, 0)); - vec4 c11 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(1, 1)); + vec4 c00 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 0)); + vec4 c01 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 1)); + vec4 c10 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 0)); + vec4 c11 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 1)); o_col0 = get_bias(c00, c01, c10, c11); #endif } @@ -1394,8 +1418,7 @@ std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleBlurFragmentShader() WriteHeader(ss); WriteCommonFunctions(ss); DeclareTexture(ss, "samp0", 0, false); - DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_rcp_resolution", "float sample_level"}, - true); + WriteAdaptiveDownsampleUniformBuffer(ss); // mipmap_blur.glsl ported from parallel-rsx. DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1, false, false, false, false); diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index 099e2f518..bd13b465f 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -24,6 +24,7 @@ public: std::string GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced); std::string GenerateVRAMUpdateDepthFragmentShader(); + std::string GenerateAdaptiveDownsampleVertexShader(); std::string GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass); std::string GenerateAdaptiveDownsampleBlurFragmentShader(); std::string GenerateAdaptiveDownsampleCompositeFragmentShader(); @@ -36,6 +37,7 @@ private: void WriteCommonFunctions(std::stringstream& ss); void WriteBatchUniformBuffer(std::stringstream& ss); void WriteBatchTextureFilter(std::stringstream& ss, GPUTextureFilter texture_filter); + void WriteAdaptiveDownsampleUniformBuffer(std::stringstream& ss); u32 m_resolution_scale; u32 m_multisamples; diff --git a/src/core/shadergen.cpp b/src/core/shadergen.cpp index 7a425b59e..13ac24997 100644 --- a/src/core/shadergen.cpp +++ b/src/core/shadergen.cpp @@ -210,7 +210,7 @@ void ShaderGen::WriteHeader(std::stringstream& ss) ss << "#define SAMPLE_TEXTURE(name, coords) texture(name, coords)\n"; ss << "#define SAMPLE_TEXTURE_OFFSET(name, coords, offset) textureOffset(name, coords, offset)\n"; ss << "#define SAMPLE_TEXTURE_LEVEL(name, coords, level) textureLod(name, coords, level)\n"; - ss << "#define SAMPLE_TEXTURE_LEVEL_OFFSET(name, coords, level, offset) textureLod(name, coords, level, offset)\n"; + ss << "#define SAMPLE_TEXTURE_LEVEL_OFFSET(name, coords, level, offset) textureLodOffset(name, coords, level, offset)\n"; ss << "#define LOAD_TEXTURE(name, coords, mip) texelFetch(name, coords, mip)\n"; ss << "#define LOAD_TEXTURE_MS(name, coords, sample) texelFetch(name, coords, int(sample))\n"; ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) texelFetchOffset(name, coords, mip, offset)\n";