Reimplement adaptive downsampling

This commit is contained in:
Stenzek 2023-08-07 22:19:46 +10:00
parent 3291675e54
commit 4c35f3dcd4
7 changed files with 186 additions and 209 deletions

View File

@ -43,8 +43,7 @@ static void SetD3DDebugObjectName(ID3D11DeviceChild* obj, const std::string_view
if (SUCCEEDED(hr) && existing_data_size > 0) if (SUCCEEDED(hr) && existing_data_size > 0)
return; return;
const std::wstring wname = StringUtil::UTF8StringToWideString(name); obj->SetPrivateData(guid, static_cast<UINT>(name.length()), name.data());
obj->SetPrivateData(guid, static_cast<UINT>(wname.length()) * 2u, wname.c_str());
#endif #endif
} }
@ -478,10 +477,6 @@ bool D3D11Device::CreateDevice(const std::string_view& adapter)
if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateSwapChain()) if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateSwapChain())
return false; return false;
// Render a frame as soon as possible to clear out whatever was previously being displayed.
m_context->ClearRenderTargetView(m_swap_chain_rtv.Get(), s_clear_color.data());
m_swap_chain->Present(0, m_using_allow_tearing ? DXGI_PRESENT_ALLOW_TEARING : 0);
if (!CreateBuffers()) if (!CreateBuffers())
return false; return false;

View File

@ -74,6 +74,8 @@ public:
union Config union Config
{ {
static constexpr u8 LOD_MAX = 15;
BitField<u64, Filter, 0, 1> min_filter; BitField<u64, Filter, 0, 1> min_filter;
BitField<u64, Filter, 1, 1> mag_filter; BitField<u64, Filter, 1, 1> mag_filter;
BitField<u64, Filter, 2, 1> mip_filter; BitField<u64, Filter, 2, 1> mip_filter;

View File

@ -116,7 +116,6 @@ bool GPU_HW::Initialize()
m_max_resolution_scale = g_gpu_device->GetMaxTextureSize() / VRAM_WIDTH; m_max_resolution_scale = g_gpu_device->GetMaxTextureSize() / VRAM_WIDTH;
m_supports_dual_source_blend = features.dual_source_blend; m_supports_dual_source_blend = features.dual_source_blend;
m_supports_per_sample_shading = features.per_sample_shading; m_supports_per_sample_shading = features.per_sample_shading;
m_supports_adaptive_downsampling = features.mipmapped_render_targets;
m_supports_disable_color_perspective = features.noperspective_interpolation; m_supports_disable_color_perspective = features.noperspective_interpolation;
m_resolution_scale = CalculateResolutionScale(); m_resolution_scale = CalculateResolutionScale();
@ -148,14 +147,6 @@ bool GPU_HW::Initialize()
Settings::GetTextureFilterDisplayName(m_texture_filtering)); Settings::GetTextureFilterDisplayName(m_texture_filtering));
m_texture_filtering = GPUTextureFilter::Nearest; m_texture_filtering = GPUTextureFilter::Nearest;
} }
if (!m_supports_adaptive_downsampling && g_settings.gpu_resolution_scale > 1 &&
g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive)
{
Host::AddOSDMessage(
Host::TranslateStdString(
"OSDMessage", "Adaptive downsampling is not supported with the current renderer, using box filter instead."),
20.0f);
}
if (!m_supports_disable_color_perspective && !ShouldDisableColorPerspective()) if (!m_supports_disable_color_perspective && !ShouldDisableColorPerspective())
Log_WarningPrint("Disable color perspective not supported, but should be used."); Log_WarningPrint("Disable color perspective not supported, but should be used.");
@ -382,8 +373,7 @@ u32 GPU_HW::CalculateResolutionScale() const
scale = static_cast<u32>(std::clamp<s32>(preferred_scale, 1, m_max_resolution_scale)); scale = static_cast<u32>(std::clamp<s32>(preferred_scale, 1, m_max_resolution_scale));
} }
if (g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive && m_supports_adaptive_downsampling && scale > 1 && if (g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive && scale > 1 && !Common::IsPow2(scale))
!Common::IsPow2(scale))
{ {
const u32 new_scale = Common::PreviousPow2(scale); const u32 new_scale = Common::PreviousPow2(scale);
Log_InfoPrintf("Resolution scale %ux not supported for adaptive smoothing, using %ux", scale, new_scale); Log_InfoPrintf("Resolution scale %ux not supported for adaptive smoothing, using %ux", scale, new_scale);
@ -412,13 +402,7 @@ void GPU_HW::UpdateResolutionScale()
GPUDownsampleMode GPU_HW::GetDownsampleMode(u32 resolution_scale) const GPUDownsampleMode GPU_HW::GetDownsampleMode(u32 resolution_scale) const
{ {
if (resolution_scale == 1) return (resolution_scale == 1) ? GPUDownsampleMode::Disabled : g_settings.gpu_downsample_mode;
return GPUDownsampleMode::Disabled;
if (g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive)
return m_supports_adaptive_downsampling ? GPUDownsampleMode::Adaptive : GPUDownsampleMode::Box;
return g_settings.gpu_downsample_mode;
} }
std::tuple<u32, u32> GPU_HW::GetEffectiveDisplayResolution(bool scaled /* = true */) std::tuple<u32, u32> GPU_HW::GetEffectiveDisplayResolution(bool scaled /* = true */)
@ -499,46 +483,28 @@ bool GPU_HW::CreateBuffers()
Log_InfoPrintf("Created HW framebuffer of %ux%u", texture_width, texture_height); Log_InfoPrintf("Created HW framebuffer of %ux%u", texture_width, texture_height);
#if 0
if (m_downsample_mode == GPUDownsampleMode::Adaptive) if (m_downsample_mode == GPUDownsampleMode::Adaptive)
{ {
const u32 levels = GetAdaptiveDownsamplingMipLevels(); const u32 levels = GetAdaptiveDownsamplingMipLevels();
if (!m_downsample_texture.Create(m_device.Get(), texture_width, texture_height, 1, static_cast<u16>(levels), 1, if (!(m_downsample_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, levels, 1,
GPUTexture::Type::RenderTarget, texture_format) || GPUTexture::Type::Texture, VRAM_RT_FORMAT)) ||
!m_downsample_weight_texture.Create(m_device.Get(), texture_width >> (levels - 1), !(m_downsample_render_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, 1, 1,
texture_height >> (levels - 1), 1, 1, 1, GPUTexture::Type::RenderTarget,
GPUTexture::Format::R8))
{
return false;
}
m_downsample_mip_views.resize(levels);
for (u32 i = 0; i < levels; i++)
{
const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(m_downsample_texture, D3D11_SRV_DIMENSION_TEXTURE2D,
m_downsample_texture.GetDXGIFormat(), i, 1);
const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(m_downsample_texture, D3D11_RTV_DIMENSION_TEXTURE2D,
m_downsample_texture.GetDXGIFormat(), i, 1);
HRESULT hr = m_device->CreateShaderResourceView(m_downsample_texture, &srv_desc,
m_downsample_mip_views[i].first.GetAddressOf());
if (FAILED(hr))
return false;
hr = m_device->CreateRenderTargetView(m_downsample_texture, &rtv_desc,
m_downsample_mip_views[i].second.GetAddressOf());
if (FAILED(hr))
return false;
}
}
else
#endif
if (m_downsample_mode == GPUDownsampleMode::Box)
{
if (!(m_downsample_texture = g_gpu_device->CreateTexture(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, 1,
GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) || GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) ||
!(m_downsample_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_texture.get()))) !(m_downsample_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_render_texture.get())) ||
!(m_downsample_weight_texture =
g_gpu_device->CreateTexture(texture_width >> (levels - 1), texture_height >> (levels - 1), 1, 1, 1,
GPUTexture::Type::RenderTarget, GPUTexture::Format::R8)) ||
!(m_downsample_weight_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_weight_texture.get())))
{
return false;
}
}
else if (m_downsample_mode == GPUDownsampleMode::Box)
{
if (!(m_downsample_render_texture = g_gpu_device->CreateTexture(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, 1,
GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) ||
!(m_downsample_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_render_texture.get())))
{ {
return false; return false;
} }
@ -561,7 +527,10 @@ void GPU_HW::ClearFramebuffer()
void GPU_HW::DestroyBuffers() void GPU_HW::DestroyBuffers()
{ {
m_vram_upload_buffer.reset(); m_vram_upload_buffer.reset();
m_downsample_weight_framebuffer.reset();
m_downsample_weight_texture.reset();
m_downsample_framebuffer.reset(); m_downsample_framebuffer.reset();
m_downsample_render_texture.reset();
m_downsample_texture.reset(); m_downsample_texture.reset();
m_display_framebuffer.reset(); m_display_framebuffer.reset();
m_vram_readback_framebuffer.reset(); m_vram_readback_framebuffer.reset();
@ -759,6 +728,8 @@ bool GPU_HW::CompilePipelines()
progress.Increment(); progress.Increment();
// common state // common state
plconfig.input_layout.vertex_attributes = {};
plconfig.input_layout.vertex_stride = 0;
plconfig.layout = GPUPipeline::Layout::SingleTexturePushConstants; plconfig.layout = GPUPipeline::Layout::SingleTexturePushConstants;
plconfig.per_sample_shading = false; plconfig.per_sample_shading = false;
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
@ -909,77 +880,65 @@ bool GPU_HW::CompilePipelines()
return false; return false;
} }
#if 0
if (m_downsample_mode == GPUDownsampleMode::Adaptive) if (m_downsample_mode == GPUDownsampleMode::Adaptive)
{ {
gpbuilder.Clear(); std::unique_ptr<GPUShader> vs =
gpbuilder.SetRenderPass(m_downsample_render_pass, 0); g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GenerateAdaptiveDownsampleVertexShader());
gpbuilder.SetPipelineLayout(m_downsample_pipeline_layout); std::unique_ptr<GPUShader> fs =
gpbuilder.SetVertexShader(uv_quad_vertex_shader); g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateAdaptiveDownsampleMipFragmentShader(true));
gpbuilder.SetNoCullRasterizationState(); if (!vs || !fs)
gpbuilder.SetNoDepthTestState();
gpbuilder.SetNoBlendingState();
gpbuilder.SetDynamicViewportAndScissorState();
std::unique_ptr<GPUShader> fs = g_host_display->CreateShaderFromSource(
GPUShader::Stage::Pixel, shadergen.GenerateAdaptiveDownsampleMipFragmentShader(true));
if (fs == VK_NULL_HANDLE)
return false; return false;
GL_OBJECT_NAME(fs, "Downsample Vertex Shader");
gpbuilder.SetFragmentShader(fs); GL_OBJECT_NAME(fs, "Downsample First Pass Fragment Shader");
m_downsample_first_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false); plconfig.vertex_shader = vs.get();
vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr); plconfig.fragment_shader = fs.get();
if (m_downsample_first_pass_pipeline == VK_NULL_HANDLE) if (!(m_downsample_first_pass_pipeline = g_gpu_device->CreatePipeline(plconfig)))
return false; return false;
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_first_pass_pipeline, GL_OBJECT_NAME(m_downsample_first_pass_pipeline, "Downsample First Pass Pipeline");
"Downsample First Pass Pipeline");
fs = g_host_display->CreateShaderFromSource(GPUShader::Stage::Pixel, fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment,
shadergen.GenerateAdaptiveDownsampleMipFragmentShader(false)); shadergen.GenerateAdaptiveDownsampleMipFragmentShader(false));
if (fs == VK_NULL_HANDLE) if (!fs)
return false; return false;
GL_OBJECT_NAME(fs, "Downsample Mid Pass Fragment Shader");
gpbuilder.SetFragmentShader(fs); plconfig.fragment_shader = fs.get();
m_downsample_mid_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false); if (!(m_downsample_mid_pass_pipeline = g_gpu_device->CreatePipeline(plconfig)))
vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr);
if (m_downsample_mid_pass_pipeline == VK_NULL_HANDLE)
return false; return false;
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_mid_pass_pipeline, GL_OBJECT_NAME(m_downsample_mid_pass_pipeline, "Downsample Mid Pass Pipeline");
"Downsample Mid Pass Pipeline");
fs = g_host_display->CreateShaderFromSource(GPUShader::Stage::Pixel, fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateAdaptiveDownsampleBlurFragmentShader());
shadergen.GenerateAdaptiveDownsampleBlurFragmentShader()); if (!fs)
if (fs == VK_NULL_HANDLE)
return false; return false;
GL_OBJECT_NAME(fs, "Downsample Blur Pass Fragment Shader");
gpbuilder.SetFragmentShader(fs); plconfig.fragment_shader = fs.get();
gpbuilder.SetRenderPass(m_downsample_weight_render_pass, 0); plconfig.color_format = GPUTexture::Format::R8;
m_downsample_blur_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false); if (!(m_downsample_blur_pass_pipeline = g_gpu_device->CreatePipeline(plconfig)))
vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr);
if (m_downsample_blur_pass_pipeline == VK_NULL_HANDLE)
return false; return false;
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_blur_pass_pipeline, GL_OBJECT_NAME(m_downsample_blur_pass_pipeline, "Downsample Blur Pass Pipeline");
"Downsample Blur Pass Pipeline");
fs = g_host_display->CreateShaderFromSource(GPUShader::Stage::Pixel, fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment,
shadergen.GenerateAdaptiveDownsampleCompositeFragmentShader()); shadergen.GenerateAdaptiveDownsampleCompositeFragmentShader());
if (fs == VK_NULL_HANDLE) if (!fs)
return false; return false;
GL_OBJECT_NAME(fs, "Downsample Composite Pass Fragment Shader");
gpbuilder.SetFragmentShader(fs); plconfig.fragment_shader = fs.get();
gpbuilder.SetPipelineLayout(m_downsample_composite_pipeline_layout); plconfig.color_format = VRAM_RT_FORMAT;
gpbuilder.SetRenderPass(m_display_load_render_pass, 0); if (!(m_downsample_composite_pass_pipeline = g_gpu_device->CreatePipeline(plconfig)))
m_downsample_composite_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false);
vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr);
if (m_downsample_composite_pass_pipeline == VK_NULL_HANDLE)
return false; return false;
GL_OBJECT_NAME(m_downsample_composite_pass_pipeline, "Downsample Blur Pass Pipeline");
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_composite_pass_pipeline, GPUSampler::Config config = GPUSampler::GetLinearConfig();
"Downsample Composite Pass Pipeline"); config.min_lod = 0;
config.max_lod = GPUSampler::Config::LOD_MAX;
if (!(m_downsample_lod_sampler = g_gpu_device->CreateSampler(config)))
return false;
GL_OBJECT_NAME(m_downsample_lod_sampler, "Downsample LOD Sampler");
config.mip_filter = GPUSampler::Filter::Linear;
if (!(m_downsample_composite_sampler = g_gpu_device->CreateSampler(config)))
return false;
GL_OBJECT_NAME(m_downsample_composite_sampler, "Downsample Trilinear Sampler");
} }
else else if (m_downsample_mode == GPUDownsampleMode::Box)
#endif
if (m_downsample_mode == GPUDownsampleMode::Box)
{ {
std::unique_ptr<GPUShader> fs = std::unique_ptr<GPUShader> fs =
g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateBoxSampleDownsampleFragmentShader()); g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateBoxSampleDownsampleFragmentShader());
@ -1023,6 +982,7 @@ void GPU_HW::DestroyPipelines()
destroy(m_downsample_mid_pass_pipeline); destroy(m_downsample_mid_pass_pipeline);
destroy(m_downsample_blur_pass_pipeline); destroy(m_downsample_blur_pass_pipeline);
destroy(m_downsample_composite_pass_pipeline); destroy(m_downsample_composite_pass_pipeline);
m_downsample_composite_sampler.reset();
m_display_pipelines.enumerate(destroy); m_display_pipelines.enumerate(destroy);
} }
@ -1283,23 +1243,6 @@ u32 GPU_HW::GetAdaptiveDownsamplingMipLevels() const
return levels; return levels;
} }
GPU_HW::SmoothingUBOData GPU_HW::GetSmoothingUBO(u32 level, u32 left, u32 top, u32 width, u32 height, u32 tex_width,
u32 tex_height) const
{
const float rcp_width = 1.0f / static_cast<float>(tex_width >> level);
const float rcp_height = 1.0f / static_cast<float>(tex_height >> level);
SmoothingUBOData data;
data.min_uv[0] = static_cast<float>(left >> level) * rcp_width;
data.min_uv[1] = static_cast<float>(top >> level) * rcp_height;
data.max_uv[0] = static_cast<float>((left + width) >> level) * rcp_width;
data.max_uv[1] = static_cast<float>((top + height) >> level) * rcp_height;
data.rcp_size[0] = rcp_width;
data.rcp_size[1] = rcp_height;
return data;
}
void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth) void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth)
{ {
const float dx = x1 - x0; const float dx = x1 - x0;
@ -2476,75 +2419,97 @@ void GPU_HW::DownsampleFramebuffer(GPUTexture* source, u32 left, u32 top, u32 wi
void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top, u32 width, u32 height) void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top, u32 width, u32 height)
{ {
#if 0 GL_PUSH("DownsampleFramebufferAdaptive (%u,%u => %u,%d)", left, top, left + width, left + height);
CD3D11_BOX src_box(left, top, 0, left + width, top + height, 1);
m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); struct SmoothingUBOData
m_context->OMSetBlendState(m_blend_disabled_state.Get(), nullptr, 0xFFFFFFFFu); {
m_context->CopySubresourceRegion(m_downsample_texture, 0, left, top, 0, source->GetD3DTexture(), 0, &src_box); float min_uv[2];
m_context->PSSetSamplers(0, 1, m_point_sampler_state.GetAddressOf()); float max_uv[2];
m_context->VSSetShader(m_uv_quad_vertex_shader.Get(), nullptr, 0); float rcp_size[2];
float lod;
};
g_gpu_device->CopyTextureRegion(m_downsample_texture.get(), 0, 0, 0, 0, source, left, top, 0, 0, width, height);
g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), m_downsample_lod_sampler.get());
const u32 levels = m_downsample_texture->GetLevels();
SmoothingUBOData uniforms;
// create mip chain // create mip chain
const u32 levels = m_downsample_texture.GetLevels();
for (u32 level = 1; level < levels; level++) for (u32 level = 1; level < levels; level++)
{ {
static constexpr float clear_color[4] = {}; GL_SCOPE("Create miplevel %u", level);
SetViewportAndScissor(left >> level, top >> level, width >> level, height >> level); const u32 level_width = width >> level;
m_context->ClearRenderTargetView(m_downsample_mip_views[level].second.Get(), clear_color); const u32 level_height = height >> level;
m_context->OMSetRenderTargets(1, m_downsample_mip_views[level].second.GetAddressOf(), nullptr); const float rcp_width = 1.0f / static_cast<float>(m_downsample_texture->GetMipWidth(level));
m_context->PSSetShaderResources(0, 1, m_downsample_mip_views[level - 1].first.GetAddressOf()); const float rcp_height = 1.0f / static_cast<float>(m_downsample_texture->GetMipHeight(level));
uniforms.min_uv[0] = 0.0f;
uniforms.min_uv[1] = 0.0f;
uniforms.max_uv[0] = static_cast<float>(level_width) * rcp_width;
uniforms.max_uv[1] = static_cast<float>(level_height) * rcp_height;
uniforms.rcp_size[0] = rcp_width;
uniforms.rcp_size[1] = rcp_height;
uniforms.lod = static_cast<float>(level - 1);
const SmoothingUBOData ubo = GetSmoothingUBO(level, left, top, width, height, m_downsample_texture.GetWidth(), g_gpu_device->InvalidateRenderTarget(m_downsample_render_texture.get());
m_downsample_texture.GetHeight()); g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get());
m_context->PSSetShader( g_gpu_device->SetViewportAndScissor(0, 0, level_width, level_height);
(level == 1) ? m_downsample_first_pass_pixel_shader.Get() : m_downsample_mid_pass_pixel_shader.Get(), nullptr, 0); g_gpu_device->SetPipeline((level == 1) ? m_downsample_first_pass_pipeline.get() :
UploadUniformBuffer(&ubo, sizeof(ubo)); m_downsample_mid_pass_pipeline.get());
m_context->Draw(3, 0); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
g_gpu_device->Draw(3, 0);
g_gpu_device->CopyTextureRegion(m_downsample_texture.get(), 0, 0, 0, level, m_downsample_render_texture.get(), 0, 0,
0, 0, level_width, level_height);
} }
// blur pass at lowest level // blur pass at lowest level
{ {
GL_SCOPE("Blur");
const u32 last_level = levels - 1; const u32 last_level = levels - 1;
static constexpr float clear_color[4] = {}; const u32 last_width = width >> last_level;
const u32 last_height = height >> last_level;
const float rcp_width = 1.0f / static_cast<float>(m_downsample_render_texture->GetWidth());
const float rcp_height = 1.0f / static_cast<float>(m_downsample_render_texture->GetHeight());
uniforms.min_uv[0] = 0.0f;
uniforms.min_uv[1] = 0.0f;
uniforms.max_uv[0] = static_cast<float>(last_width) * rcp_width;
uniforms.max_uv[1] = static_cast<float>(last_height) * rcp_height;
uniforms.rcp_size[0] = rcp_width;
uniforms.rcp_size[1] = rcp_height;
uniforms.lod = 0.0f;
SetViewportAndScissor(left >> last_level, top >> last_level, width >> last_level, height >> last_level); m_downsample_render_texture->MakeReadyForSampling();
m_context->ClearRenderTargetView(m_downsample_weight_texture.GetD3DRTV(), clear_color); g_gpu_device->InvalidateRenderTarget(m_downsample_weight_texture.get());
m_context->OMSetRenderTargets(1, m_downsample_weight_texture.GetD3DRTVArray(), nullptr); g_gpu_device->SetFramebuffer(m_downsample_weight_framebuffer.get());
m_context->PSSetShaderResources(0, 1, m_downsample_mip_views.back().first.GetAddressOf()); g_gpu_device->SetTextureSampler(0, m_downsample_render_texture.get(), g_gpu_device->GetNearestSampler());
m_context->PSSetShader(m_downsample_blur_pass_pixel_shader.Get(), nullptr, 0); g_gpu_device->SetViewportAndScissor(0, 0, last_width, last_height);
g_gpu_device->SetPipeline(m_downsample_blur_pass_pipeline.get());
const SmoothingUBOData ubo = GetSmoothingUBO(last_level, left, top, width, height, m_downsample_texture.GetWidth(), g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
m_downsample_texture.GetHeight()); g_gpu_device->Draw(3, 0);
m_context->PSSetShader(m_downsample_blur_pass_pixel_shader.Get(), nullptr, 0); m_downsample_weight_texture->MakeReadyForSampling();
UploadUniformBuffer(&ubo, sizeof(ubo));
m_context->Draw(3, 0);
} }
// composite downsampled and upsampled images together // composite downsampled and upsampled images together
{ {
SetViewportAndScissor(left, top, width, height); GL_SCOPE("Composite");
m_context->OMSetRenderTargets(1, GetDisplayTexture()->GetD3DRTVArray(), nullptr);
ID3D11ShaderResourceView* const srvs[2] = {m_downsample_texture.GetD3DSRV(), g_gpu_device->InvalidateRenderTarget(m_downsample_render_texture.get());
m_downsample_weight_texture.GetD3DSRV()}; g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get());
ID3D11SamplerState* const samplers[2] = {m_trilinear_sampler_state.Get(), m_linear_sampler_state.Get()}; g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), m_downsample_composite_sampler.get());
m_context->PSSetShaderResources(0, countof(srvs), srvs); g_gpu_device->SetTextureSampler(1, m_downsample_weight_texture.get(), m_downsample_lod_sampler.get());
m_context->PSSetSamplers(0, countof(samplers), samplers); g_gpu_device->SetViewportAndScissor(0, 0, width, height);
m_context->PSSetShader(m_downsample_composite_pixel_shader.Get(), nullptr, 0); g_gpu_device->SetPipeline(m_downsample_composite_pass_pipeline.get());
m_context->Draw(3, 0); g_gpu_device->Draw(3, 0);
m_downsample_render_texture->MakeReadyForSampling();
} }
ID3D11ShaderResourceView* const null_srvs[2] = {}; GL_POP();
m_context->PSSetShaderResources(0, countof(null_srvs), null_srvs);
m_batch_ubo_dirty = true;
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
g_host_display->SetDisplayTexture(m_display_texture.get(), left, top, width, height); g_gpu_device->SetDisplayTexture(m_downsample_render_texture.get(), 0, 0, width, height);
#else
Panic("Not implemented");
#endif
} }
void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height) void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height)
@ -2556,7 +2521,7 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to
source->MakeReadyForSampling(); source->MakeReadyForSampling();
g_gpu_device->ClearRenderTarget(m_downsample_texture.get(), 0); g_gpu_device->ClearRenderTarget(m_downsample_render_texture.get(), 0);
g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get()); g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get());
g_gpu_device->SetPipeline(m_downsample_first_pass_pipeline.get()); g_gpu_device->SetPipeline(m_downsample_first_pass_pipeline.get());
g_gpu_device->SetTextureSampler(0, source, g_gpu_device->GetNearestSampler()); g_gpu_device->SetTextureSampler(0, source, g_gpu_device->GetNearestSampler());
@ -2565,7 +2530,7 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
g_gpu_device->SetDisplayTexture(m_downsample_texture.get(), ds_left, ds_top, ds_width, ds_height); g_gpu_device->SetDisplayTexture(m_downsample_render_texture.get(), ds_left, ds_top, ds_width, ds_height);
} }
void GPU_HW::DrawRendererStats(bool is_idle_frame) void GPU_HW::DrawRendererStats(bool is_idle_frame)

View File

@ -341,21 +341,9 @@ protected:
void SetBatchDepthBuffer(bool enabled); void SetBatchDepthBuffer(bool enabled);
void CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices); void CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices);
/// UBO data for adaptive smoothing.
struct SmoothingUBOData
{
float min_uv[2];
float max_uv[2];
float rcp_size[2];
};
/// Returns the number of mipmap levels used for adaptive smoothing. /// Returns the number of mipmap levels used for adaptive smoothing.
u32 GetAdaptiveDownsamplingMipLevels() const; u32 GetAdaptiveDownsamplingMipLevels() const;
/// Returns the UBO data for an adaptive smoothing pass.
SmoothingUBOData GetSmoothingUBO(u32 level, u32 left, u32 top, u32 width, u32 height, u32 tex_width,
u32 tex_height) const;
void DownsampleFramebuffer(GPUTexture* source, u32 left, u32 top, u32 width, u32 height); void DownsampleFramebuffer(GPUTexture* source, u32 left, u32 top, u32 width, u32 height);
void DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top, u32 width, u32 height); void DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top, u32 width, u32 height);
void DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height); void DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height);
@ -397,12 +385,11 @@ protected:
{ {
BitField<u8, bool, 0, 1> m_supports_per_sample_shading; BitField<u8, bool, 0, 1> m_supports_per_sample_shading;
BitField<u8, bool, 1, 1> m_supports_dual_source_blend; BitField<u8, bool, 1, 1> m_supports_dual_source_blend;
BitField<u8, bool, 2, 1> m_supports_adaptive_downsampling; BitField<u8, bool, 2, 1> m_supports_disable_color_perspective;
BitField<u8, bool, 3, 1> m_supports_disable_color_perspective; BitField<u8, bool, 3, 1> m_per_sample_shading;
BitField<u8, bool, 4, 1> m_per_sample_shading; BitField<u8, bool, 4, 1> m_scaled_dithering;
BitField<u8, bool, 5, 1> m_scaled_dithering; BitField<u8, bool, 5, 1> m_chroma_smoothing;
BitField<u8, bool, 6, 1> m_chroma_smoothing; BitField<u8, bool, 6, 1> m_disable_color_perspective;
BitField<u8, bool, 7, 1> m_disable_color_perspective;
u8 bits = 0; u8 bits = 0;
}; };
@ -441,13 +428,16 @@ protected:
std::unique_ptr<GPUPipeline> m_copy_pipeline; std::unique_ptr<GPUPipeline> m_copy_pipeline;
std::unique_ptr<GPUTexture> m_downsample_texture; std::unique_ptr<GPUTexture> m_downsample_texture;
std::unique_ptr<GPUTexture> m_downsample_render_texture;
std::unique_ptr<GPUFramebuffer> m_downsample_framebuffer; std::unique_ptr<GPUFramebuffer> m_downsample_framebuffer;
// std::unique_ptr<GPUTexture> m_downsample_weight_texture; std::unique_ptr<GPUTexture> m_downsample_weight_texture;
// std::unique_ptr<GPUFramebuffer> m_downsample_weight_framebuffer; std::unique_ptr<GPUFramebuffer> m_downsample_weight_framebuffer;
std::unique_ptr<GPUPipeline> m_downsample_first_pass_pipeline; std::unique_ptr<GPUPipeline> m_downsample_first_pass_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_mid_pass_pipeline; std::unique_ptr<GPUPipeline> m_downsample_mid_pass_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_blur_pass_pipeline; std::unique_ptr<GPUPipeline> m_downsample_blur_pass_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_composite_pass_pipeline; std::unique_ptr<GPUPipeline> m_downsample_composite_pass_pipeline;
std::unique_ptr<GPUSampler> m_downsample_lod_sampler;
std::unique_ptr<GPUSampler> m_downsample_composite_sampler;
// Statistics // Statistics
RendererStats m_renderer_stats = {}; RendererStats m_renderer_stats = {};

View File

@ -1329,13 +1329,37 @@ std::string GPU_HW_ShaderGen::GenerateVRAMUpdateDepthFragmentShader()
return ss.str(); return ss.str();
} }
void GPU_HW_ShaderGen::WriteAdaptiveDownsampleUniformBuffer(std::stringstream& ss)
{
DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_rcp_resolution", "float u_lod"}, true);
}
std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleVertexShader()
{
std::stringstream ss;
WriteHeader(ss);
WriteAdaptiveDownsampleUniformBuffer(ss);
DeclareVertexEntryPoint(ss, {}, 0, 1, {}, true);
ss << R"(
{
v_tex0 = float2(float((v_id << 1) & 2u), float(v_id & 2u));
v_pos = float4(v_tex0 * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);
v_tex0 = u_uv_min + (u_uv_max - u_uv_min) * v_tex0;
#if API_OPENGL || API_OPENGL_ES || API_VULKAN
v_pos.y = -v_pos.y;
#endif
}
)";
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass) std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass)
{ {
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
WriteCommonFunctions(ss); WriteCommonFunctions(ss);
WriteAdaptiveDownsampleUniformBuffer(ss);
DeclareTexture(ss, "samp0", 0, false); DeclareTexture(ss, "samp0", 0, false);
DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_rcp_resolution"}, true);
DefineMacro(ss, "FIRST_PASS", first_pass); DefineMacro(ss, "FIRST_PASS", first_pass);
// mipmap_energy.glsl ported from parallel-rsx. // mipmap_energy.glsl ported from parallel-rsx.
@ -1370,16 +1394,16 @@ float4 get_bias(float4 c00, float4 c01, float4 c10, float4 c11)
{ {
float2 uv = v_tex0 - (u_rcp_resolution * 0.25); float2 uv = v_tex0 - (u_rcp_resolution * 0.25);
#ifdef FIRST_PASS #ifdef FIRST_PASS
vec3 c00 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(0, 0)).rgb; vec3 c00 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 0)).rgb;
vec3 c01 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(0, 1)).rgb; vec3 c01 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 1)).rgb;
vec3 c10 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(1, 0)).rgb; vec3 c10 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 0)).rgb;
vec3 c11 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(1, 1)).rgb; vec3 c11 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 1)).rgb;
o_col0 = get_bias(c00, c01, c10, c11); o_col0 = get_bias(c00, c01, c10, c11);
#else #else
vec4 c00 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(0, 0)); vec4 c00 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 0));
vec4 c01 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(0, 1)); vec4 c01 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 1));
vec4 c10 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(1, 0)); vec4 c10 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 0));
vec4 c11 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(1, 1)); vec4 c11 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 1));
o_col0 = get_bias(c00, c01, c10, c11); o_col0 = get_bias(c00, c01, c10, c11);
#endif #endif
} }
@ -1394,8 +1418,7 @@ std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleBlurFragmentShader()
WriteHeader(ss); WriteHeader(ss);
WriteCommonFunctions(ss); WriteCommonFunctions(ss);
DeclareTexture(ss, "samp0", 0, false); DeclareTexture(ss, "samp0", 0, false);
DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_rcp_resolution", "float sample_level"}, WriteAdaptiveDownsampleUniformBuffer(ss);
true);
// mipmap_blur.glsl ported from parallel-rsx. // mipmap_blur.glsl ported from parallel-rsx.
DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1, false, false, false, false); DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1, false, false, false, false);

View File

@ -24,6 +24,7 @@ public:
std::string GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced); std::string GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced);
std::string GenerateVRAMUpdateDepthFragmentShader(); std::string GenerateVRAMUpdateDepthFragmentShader();
std::string GenerateAdaptiveDownsampleVertexShader();
std::string GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass); std::string GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass);
std::string GenerateAdaptiveDownsampleBlurFragmentShader(); std::string GenerateAdaptiveDownsampleBlurFragmentShader();
std::string GenerateAdaptiveDownsampleCompositeFragmentShader(); std::string GenerateAdaptiveDownsampleCompositeFragmentShader();
@ -36,6 +37,7 @@ private:
void WriteCommonFunctions(std::stringstream& ss); void WriteCommonFunctions(std::stringstream& ss);
void WriteBatchUniformBuffer(std::stringstream& ss); void WriteBatchUniformBuffer(std::stringstream& ss);
void WriteBatchTextureFilter(std::stringstream& ss, GPUTextureFilter texture_filter); void WriteBatchTextureFilter(std::stringstream& ss, GPUTextureFilter texture_filter);
void WriteAdaptiveDownsampleUniformBuffer(std::stringstream& ss);
u32 m_resolution_scale; u32 m_resolution_scale;
u32 m_multisamples; u32 m_multisamples;

View File

@ -210,7 +210,7 @@ void ShaderGen::WriteHeader(std::stringstream& ss)
ss << "#define SAMPLE_TEXTURE(name, coords) texture(name, coords)\n"; ss << "#define SAMPLE_TEXTURE(name, coords) texture(name, coords)\n";
ss << "#define SAMPLE_TEXTURE_OFFSET(name, coords, offset) textureOffset(name, coords, offset)\n"; ss << "#define SAMPLE_TEXTURE_OFFSET(name, coords, offset) textureOffset(name, coords, offset)\n";
ss << "#define SAMPLE_TEXTURE_LEVEL(name, coords, level) textureLod(name, coords, level)\n"; ss << "#define SAMPLE_TEXTURE_LEVEL(name, coords, level) textureLod(name, coords, level)\n";
ss << "#define SAMPLE_TEXTURE_LEVEL_OFFSET(name, coords, level, offset) textureLod(name, coords, level, offset)\n"; ss << "#define SAMPLE_TEXTURE_LEVEL_OFFSET(name, coords, level, offset) textureLodOffset(name, coords, level, offset)\n";
ss << "#define LOAD_TEXTURE(name, coords, mip) texelFetch(name, coords, mip)\n"; ss << "#define LOAD_TEXTURE(name, coords, mip) texelFetch(name, coords, mip)\n";
ss << "#define LOAD_TEXTURE_MS(name, coords, sample) texelFetch(name, coords, int(sample))\n"; ss << "#define LOAD_TEXTURE_MS(name, coords, sample) texelFetch(name, coords, int(sample))\n";
ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) texelFetchOffset(name, coords, mip, offset)\n"; ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) texelFetchOffset(name, coords, mip, offset)\n";