diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 792cef7e8..818c2839f 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -3,10 +3,10 @@ #include "gpu_hw.h" #include "cpu_core.h" +#include "cpu_pgxp.h" #include "gpu_hw_shadergen.h" #include "gpu_sw_backend.h" #include "host.h" -#include "cpu_pgxp.h" #include "settings.h" #include "system.h" @@ -653,30 +653,9 @@ bool GPU_HW::CreateBuffers() Log_InfoFmt("Created HW framebuffer of {}x{}", texture_width, texture_height); if (m_downsample_mode == GPUDownsampleMode::Adaptive) - { - const u32 levels = GetAdaptiveDownsamplingMipLevels(); - - if (!(m_downsample_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, levels, 1, - GPUTexture::Type::Texture, VRAM_RT_FORMAT)) || - !(m_downsample_render_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, 1, - GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) || - !(m_downsample_weight_texture = - g_gpu_device->FetchTexture(texture_width >> (levels - 1), texture_height >> (levels - 1), 1, 1, 1, - GPUTexture::Type::RenderTarget, GPUTexture::Format::R8))) - { - return false; - } - } + m_downsample_scale_or_levels = GetAdaptiveDownsamplingMipLevels(); else if (m_downsample_mode == GPUDownsampleMode::Box) - { - const u32 downsample_scale = GetBoxDownsampleScale(m_resolution_scale); - if (!(m_downsample_render_texture = - g_gpu_device->FetchTexture(VRAM_WIDTH * downsample_scale, VRAM_HEIGHT * downsample_scale, 1, 1, 1, - GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT))) - { - return false; - } - } + m_downsample_scale_or_levels = m_resolution_scale / GetBoxDownsampleScale(m_resolution_scale); g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get()); SetFullVRAMDirtyRectangle(); @@ -700,8 +679,6 @@ void GPU_HW::DestroyBuffers() ClearDisplayTexture(); m_vram_upload_buffer.reset(); - g_gpu_device->RecycleTexture(std::move(m_downsample_weight_texture)); - g_gpu_device->RecycleTexture(std::move(m_downsample_render_texture)); g_gpu_device->RecycleTexture(std::move(m_downsample_texture)); g_gpu_device->RecycleTexture(std::move(m_vram_read_texture)); g_gpu_device->RecycleTexture(std::move(m_vram_depth_texture)); @@ -2846,21 +2823,39 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top float lod; }; - g_gpu_device->CopyTextureRegion(m_downsample_texture.get(), 0, 0, 0, 0, source, left, top, 0, 0, width, height); - g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), m_downsample_lod_sampler.get()); + if (!m_downsample_texture || m_downsample_texture->GetWidth() != width || m_downsample_texture->GetHeight() != height) + { + g_gpu_device->RecycleTexture(std::move(m_downsample_texture)); + m_downsample_texture = + g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT); + } + std::unique_ptr level_texture = g_gpu_device->FetchAutoRecycleTexture( + width, height, 1, m_downsample_scale_or_levels, 1, GPUTexture::Type::Texture, VRAM_RT_FORMAT); + std::unique_ptr weight_texture = + g_gpu_device->FetchAutoRecycleTexture(std::max(width >> (m_downsample_scale_or_levels - 1), 1u), + std::max(height >> (m_downsample_scale_or_levels - 1), 1u), 1, 1, 1, + GPUTexture::Type::RenderTarget, GPUTexture::Format::R8); + if (!m_downsample_texture || !level_texture || !weight_texture) + { + Log_ErrorFmt("Failed to create {}x{} RTs for adaptive downsampling", width, height); + SetDisplayTexture(source, left, top, width, height); + return; + } + + g_gpu_device->CopyTextureRegion(level_texture.get(), 0, 0, 0, 0, source, left, top, 0, 0, width, height); + g_gpu_device->SetTextureSampler(0, level_texture.get(), m_downsample_lod_sampler.get()); - const u32 levels = m_downsample_texture->GetLevels(); SmoothingUBOData uniforms; // create mip chain - for (u32 level = 1; level < levels; level++) + for (u32 level = 1; level < m_downsample_scale_or_levels; level++) { GL_SCOPE_FMT("Create miplevel {}", level); const u32 level_width = width >> level; const u32 level_height = height >> level; - const float rcp_width = 1.0f / static_cast(m_downsample_texture->GetMipWidth(level)); - const float rcp_height = 1.0f / static_cast(m_downsample_texture->GetMipHeight(level)); + const float rcp_width = 1.0f / static_cast(level_texture->GetMipWidth(level)); + const float rcp_height = 1.0f / static_cast(level_texture->GetMipHeight(level)); uniforms.min_uv[0] = 0.0f; uniforms.min_uv[1] = 0.0f; uniforms.max_uv[0] = static_cast(level_width) * rcp_width; @@ -2869,26 +2864,26 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top uniforms.rcp_size[1] = rcp_height; uniforms.lod = static_cast(level - 1); - g_gpu_device->ClearRenderTarget(m_downsample_render_texture.get(), 0); - g_gpu_device->SetRenderTarget(m_downsample_render_texture.get()); + g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get()); + g_gpu_device->SetRenderTarget(m_downsample_texture.get()); g_gpu_device->SetViewportAndScissor(0, 0, level_width, level_height); g_gpu_device->SetPipeline((level == 1) ? m_downsample_first_pass_pipeline.get() : m_downsample_mid_pass_pipeline.get()); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); - g_gpu_device->CopyTextureRegion(m_downsample_texture.get(), 0, 0, 0, level, m_downsample_render_texture.get(), 0, 0, - 0, 0, level_width, level_height); + g_gpu_device->CopyTextureRegion(level_texture.get(), 0, 0, 0, level, m_downsample_texture.get(), 0, 0, 0, 0, + level_width, level_height); } // blur pass at lowest level { GL_SCOPE("Blur"); - const u32 last_level = levels - 1; - const u32 last_width = width >> last_level; - const u32 last_height = height >> last_level; - const float rcp_width = 1.0f / static_cast(m_downsample_render_texture->GetWidth()); - const float rcp_height = 1.0f / static_cast(m_downsample_render_texture->GetHeight()); + const u32 last_level = m_downsample_scale_or_levels - 1; + const u32 last_width = level_texture->GetMipWidth(last_level); + const u32 last_height = level_texture->GetMipHeight(last_level); + const float rcp_width = 1.0f / static_cast(m_downsample_texture->GetWidth()); + const float rcp_height = 1.0f / static_cast(m_downsample_texture->GetHeight()); uniforms.min_uv[0] = 0.0f; uniforms.min_uv[1] = 0.0f; uniforms.max_uv[0] = static_cast(last_width) * rcp_width; @@ -2897,58 +2892,81 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top uniforms.rcp_size[1] = rcp_height; uniforms.lod = 0.0f; - m_downsample_render_texture->MakeReadyForSampling(); - g_gpu_device->ClearRenderTarget(m_downsample_weight_texture.get(), 0); - g_gpu_device->SetRenderTarget(m_downsample_weight_texture.get()); - g_gpu_device->SetTextureSampler(0, m_downsample_render_texture.get(), g_gpu_device->GetNearestSampler()); + m_downsample_texture->MakeReadyForSampling(); + g_gpu_device->InvalidateRenderTarget(weight_texture.get()); + g_gpu_device->SetRenderTarget(weight_texture.get()); + g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), g_gpu_device->GetNearestSampler()); g_gpu_device->SetViewportAndScissor(0, 0, last_width, last_height); g_gpu_device->SetPipeline(m_downsample_blur_pass_pipeline.get()); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); - m_downsample_weight_texture->MakeReadyForSampling(); + weight_texture->MakeReadyForSampling(); } // composite downsampled and upsampled images together { GL_SCOPE("Composite"); - g_gpu_device->ClearRenderTarget(m_downsample_render_texture.get(), 0); - g_gpu_device->SetRenderTarget(m_downsample_render_texture.get()); - g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), m_downsample_composite_sampler.get()); - g_gpu_device->SetTextureSampler(1, m_downsample_weight_texture.get(), m_downsample_lod_sampler.get()); + uniforms.min_uv[0] = 0.0f; + uniforms.min_uv[1] = 0.0f; + uniforms.max_uv[0] = 1.0f; + uniforms.max_uv[1] = 1.0f; + + g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get()); + g_gpu_device->SetRenderTarget(m_downsample_texture.get()); + g_gpu_device->SetTextureSampler(0, level_texture.get(), m_downsample_composite_sampler.get()); + g_gpu_device->SetTextureSampler(1, weight_texture.get(), m_downsample_lod_sampler.get()); g_gpu_device->SetViewportAndScissor(0, 0, width, height); g_gpu_device->SetPipeline(m_downsample_composite_pass_pipeline.get()); + g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); - m_downsample_render_texture->MakeReadyForSampling(); + m_downsample_texture->MakeReadyForSampling(); } GL_POP(); RestoreDeviceContext(); - SetDisplayTexture(m_downsample_render_texture.get(), 0, 0, width, height); + SetDisplayTexture(m_downsample_texture.get(), 0, 0, width, height); } void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height) { - const u32 factor = m_resolution_scale / GetBoxDownsampleScale(m_resolution_scale); - const u32 ds_left = left / factor; - const u32 ds_top = top / factor; - const u32 ds_width = width / factor; - const u32 ds_height = height / factor; + GL_SCOPE_FMT("DownsampleFramebufferBoxFilter({},{} => {},{} ({}x{})", left, top, left + width, top + height, width, + height); + + const u32 ds_width = width / m_downsample_scale_or_levels; + const u32 ds_height = height / m_downsample_scale_or_levels; + + if (!m_downsample_texture || m_downsample_texture->GetWidth() != ds_width || + m_downsample_texture->GetHeight() != ds_height) + { + g_gpu_device->RecycleTexture(std::move(m_downsample_texture)); + m_downsample_texture = + g_gpu_device->FetchTexture(ds_width, ds_height, 1, 1, 1, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT); + } + if (!m_downsample_texture) + { + Log_ErrorFmt("Failed to create {}x{} RT for box downsampling", width, height); + SetDisplayTexture(source, left, top, width, height); + return; + } source->MakeReadyForSampling(); - g_gpu_device->ClearRenderTarget(m_downsample_render_texture.get(), 0); - g_gpu_device->SetRenderTarget(m_downsample_render_texture.get()); + const u32 uniforms[4] = {left, top, 0u, 0u}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + + g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get()); + g_gpu_device->SetRenderTarget(m_downsample_texture.get()); g_gpu_device->SetPipeline(m_downsample_first_pass_pipeline.get()); g_gpu_device->SetTextureSampler(0, source, g_gpu_device->GetNearestSampler()); - g_gpu_device->SetViewportAndScissor(ds_left, ds_top, ds_width, ds_height); + g_gpu_device->SetViewportAndScissor(0, 0, ds_width, ds_height); g_gpu_device->Draw(3, 0); RestoreDeviceContext(); - SetDisplayTexture(m_downsample_render_texture.get(), ds_left, ds_top, ds_width, ds_height); + SetDisplayTexture(m_downsample_texture.get(), 0, 0, ds_width, ds_height); } void GPU_HW::DrawRendererStats(bool is_idle_frame) diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 64857c82c..83ba3225d 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -282,14 +282,13 @@ private: std::unique_ptr m_copy_pipeline; std::unique_ptr m_downsample_texture; - std::unique_ptr m_downsample_render_texture; - std::unique_ptr m_downsample_weight_texture; std::unique_ptr m_downsample_first_pass_pipeline; std::unique_ptr m_downsample_mid_pass_pipeline; std::unique_ptr m_downsample_blur_pass_pipeline; std::unique_ptr m_downsample_composite_pass_pipeline; std::unique_ptr m_downsample_lod_sampler; std::unique_ptr m_downsample_composite_sampler; + u32 m_downsample_scale_or_levels = 0; // Statistics RendererStats m_renderer_stats = {}; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 6c8800567..06c117ab9 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1582,10 +1582,9 @@ std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleCompositeFragmentShader( DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, false, false, false, false); ss << R"( { - float2 uv = v_pos.xy * RCP_VRAM_SIZE; - float bias = SAMPLE_TEXTURE(samp1, uv).r; + float bias = SAMPLE_TEXTURE(samp1, v_tex0).r; float mip = float(RESOLUTION_SCALE - 1u) * bias; - float3 color = SAMPLE_TEXTURE_LEVEL(samp0, uv, mip).rgb; + float3 color = SAMPLE_TEXTURE_LEVEL(samp0, v_tex0, mip).rgb; o_col0 = float4(color, 1.0); } )"; @@ -1598,6 +1597,7 @@ std::string GPU_HW_ShaderGen::GenerateBoxSampleDownsampleFragmentShader(u32 fact std::stringstream ss; WriteHeader(ss); WriteCommonFunctions(ss); + DeclareUniformBuffer(ss, {"uint2 u_base_coords"}, true); DeclareTexture(ss, "samp0", 0, false); ss << "#define FACTOR " << factor << "\n"; @@ -1606,7 +1606,7 @@ std::string GPU_HW_ShaderGen::GenerateBoxSampleDownsampleFragmentShader(u32 fact ss << R"( { float3 color = float3(0.0, 0.0, 0.0); - uint2 base_coords = uint2(v_pos.xy) * uint2(FACTOR, FACTOR); + uint2 base_coords = u_base_coords + uint2(v_pos.xy) * uint2(FACTOR, FACTOR); for (uint offset_x = 0u; offset_x < FACTOR; offset_x++) { for (uint offset_y = 0u; offset_y < FACTOR; offset_y++)