Reimplement adaptive downsampling

This commit is contained in:
Stenzek 2023-08-07 22:19:46 +10:00
parent 3291675e54
commit 4c35f3dcd4
7 changed files with 186 additions and 209 deletions

View File

@ -43,8 +43,7 @@ static void SetD3DDebugObjectName(ID3D11DeviceChild* obj, const std::string_view
if (SUCCEEDED(hr) && existing_data_size > 0)
return;
const std::wstring wname = StringUtil::UTF8StringToWideString(name);
obj->SetPrivateData(guid, static_cast<UINT>(wname.length()) * 2u, wname.c_str());
obj->SetPrivateData(guid, static_cast<UINT>(name.length()), name.data());
#endif
}
@ -478,10 +477,6 @@ bool D3D11Device::CreateDevice(const std::string_view& adapter)
if (m_window_info.type != WindowInfo::Type::Surfaceless && !CreateSwapChain())
return false;
// Render a frame as soon as possible to clear out whatever was previously being displayed.
m_context->ClearRenderTargetView(m_swap_chain_rtv.Get(), s_clear_color.data());
m_swap_chain->Present(0, m_using_allow_tearing ? DXGI_PRESENT_ALLOW_TEARING : 0);
if (!CreateBuffers())
return false;

View File

@ -74,6 +74,8 @@ public:
union Config
{
static constexpr u8 LOD_MAX = 15;
BitField<u64, Filter, 0, 1> min_filter;
BitField<u64, Filter, 1, 1> mag_filter;
BitField<u64, Filter, 2, 1> mip_filter;

View File

@ -116,7 +116,6 @@ bool GPU_HW::Initialize()
m_max_resolution_scale = g_gpu_device->GetMaxTextureSize() / VRAM_WIDTH;
m_supports_dual_source_blend = features.dual_source_blend;
m_supports_per_sample_shading = features.per_sample_shading;
m_supports_adaptive_downsampling = features.mipmapped_render_targets;
m_supports_disable_color_perspective = features.noperspective_interpolation;
m_resolution_scale = CalculateResolutionScale();
@ -148,14 +147,6 @@ bool GPU_HW::Initialize()
Settings::GetTextureFilterDisplayName(m_texture_filtering));
m_texture_filtering = GPUTextureFilter::Nearest;
}
if (!m_supports_adaptive_downsampling && g_settings.gpu_resolution_scale > 1 &&
g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive)
{
Host::AddOSDMessage(
Host::TranslateStdString(
"OSDMessage", "Adaptive downsampling is not supported with the current renderer, using box filter instead."),
20.0f);
}
if (!m_supports_disable_color_perspective && !ShouldDisableColorPerspective())
Log_WarningPrint("Disable color perspective not supported, but should be used.");
@ -382,8 +373,7 @@ u32 GPU_HW::CalculateResolutionScale() const
scale = static_cast<u32>(std::clamp<s32>(preferred_scale, 1, m_max_resolution_scale));
}
if (g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive && m_supports_adaptive_downsampling && scale > 1 &&
!Common::IsPow2(scale))
if (g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive && scale > 1 && !Common::IsPow2(scale))
{
const u32 new_scale = Common::PreviousPow2(scale);
Log_InfoPrintf("Resolution scale %ux not supported for adaptive smoothing, using %ux", scale, new_scale);
@ -412,13 +402,7 @@ void GPU_HW::UpdateResolutionScale()
GPUDownsampleMode GPU_HW::GetDownsampleMode(u32 resolution_scale) const
{
if (resolution_scale == 1)
return GPUDownsampleMode::Disabled;
if (g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive)
return m_supports_adaptive_downsampling ? GPUDownsampleMode::Adaptive : GPUDownsampleMode::Box;
return g_settings.gpu_downsample_mode;
return (resolution_scale == 1) ? GPUDownsampleMode::Disabled : g_settings.gpu_downsample_mode;
}
std::tuple<u32, u32> GPU_HW::GetEffectiveDisplayResolution(bool scaled /* = true */)
@ -499,46 +483,28 @@ bool GPU_HW::CreateBuffers()
Log_InfoPrintf("Created HW framebuffer of %ux%u", texture_width, texture_height);
#if 0
if (m_downsample_mode == GPUDownsampleMode::Adaptive)
{
const u32 levels = GetAdaptiveDownsamplingMipLevels();
if (!m_downsample_texture.Create(m_device.Get(), texture_width, texture_height, 1, static_cast<u16>(levels), 1,
GPUTexture::Type::RenderTarget, texture_format) ||
!m_downsample_weight_texture.Create(m_device.Get(), texture_width >> (levels - 1),
texture_height >> (levels - 1), 1, 1, 1, GPUTexture::Type::RenderTarget,
GPUTexture::Format::R8))
if (!(m_downsample_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, levels, 1,
GPUTexture::Type::Texture, VRAM_RT_FORMAT)) ||
!(m_downsample_render_texture = g_gpu_device->CreateTexture(texture_width, texture_height, 1, 1, 1,
GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) ||
!(m_downsample_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_render_texture.get())) ||
!(m_downsample_weight_texture =
g_gpu_device->CreateTexture(texture_width >> (levels - 1), texture_height >> (levels - 1), 1, 1, 1,
GPUTexture::Type::RenderTarget, GPUTexture::Format::R8)) ||
!(m_downsample_weight_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_weight_texture.get())))
{
return false;
}
m_downsample_mip_views.resize(levels);
for (u32 i = 0; i < levels; i++)
{
const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(m_downsample_texture, D3D11_SRV_DIMENSION_TEXTURE2D,
m_downsample_texture.GetDXGIFormat(), i, 1);
const CD3D11_RENDER_TARGET_VIEW_DESC rtv_desc(m_downsample_texture, D3D11_RTV_DIMENSION_TEXTURE2D,
m_downsample_texture.GetDXGIFormat(), i, 1);
HRESULT hr = m_device->CreateShaderResourceView(m_downsample_texture, &srv_desc,
m_downsample_mip_views[i].first.GetAddressOf());
if (FAILED(hr))
return false;
hr = m_device->CreateRenderTargetView(m_downsample_texture, &rtv_desc,
m_downsample_mip_views[i].second.GetAddressOf());
if (FAILED(hr))
return false;
}
}
else
#endif
if (m_downsample_mode == GPUDownsampleMode::Box)
else if (m_downsample_mode == GPUDownsampleMode::Box)
{
if (!(m_downsample_texture = g_gpu_device->CreateTexture(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, 1,
GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) ||
!(m_downsample_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_texture.get())))
if (!(m_downsample_render_texture = g_gpu_device->CreateTexture(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, 1,
GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) ||
!(m_downsample_framebuffer = g_gpu_device->CreateFramebuffer(m_downsample_render_texture.get())))
{
return false;
}
@ -561,7 +527,10 @@ void GPU_HW::ClearFramebuffer()
void GPU_HW::DestroyBuffers()
{
m_vram_upload_buffer.reset();
m_downsample_weight_framebuffer.reset();
m_downsample_weight_texture.reset();
m_downsample_framebuffer.reset();
m_downsample_render_texture.reset();
m_downsample_texture.reset();
m_display_framebuffer.reset();
m_vram_readback_framebuffer.reset();
@ -759,6 +728,8 @@ bool GPU_HW::CompilePipelines()
progress.Increment();
// common state
plconfig.input_layout.vertex_attributes = {};
plconfig.input_layout.vertex_stride = 0;
plconfig.layout = GPUPipeline::Layout::SingleTexturePushConstants;
plconfig.per_sample_shading = false;
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
@ -909,77 +880,65 @@ bool GPU_HW::CompilePipelines()
return false;
}
#if 0
if (m_downsample_mode == GPUDownsampleMode::Adaptive)
{
gpbuilder.Clear();
gpbuilder.SetRenderPass(m_downsample_render_pass, 0);
gpbuilder.SetPipelineLayout(m_downsample_pipeline_layout);
gpbuilder.SetVertexShader(uv_quad_vertex_shader);
gpbuilder.SetNoCullRasterizationState();
gpbuilder.SetNoDepthTestState();
gpbuilder.SetNoBlendingState();
gpbuilder.SetDynamicViewportAndScissorState();
std::unique_ptr<GPUShader> fs = g_host_display->CreateShaderFromSource(
GPUShader::Stage::Pixel, shadergen.GenerateAdaptiveDownsampleMipFragmentShader(true));
if (fs == VK_NULL_HANDLE)
std::unique_ptr<GPUShader> vs =
g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GenerateAdaptiveDownsampleVertexShader());
std::unique_ptr<GPUShader> fs =
g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateAdaptiveDownsampleMipFragmentShader(true));
if (!vs || !fs)
return false;
gpbuilder.SetFragmentShader(fs);
m_downsample_first_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false);
vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr);
if (m_downsample_first_pass_pipeline == VK_NULL_HANDLE)
GL_OBJECT_NAME(fs, "Downsample Vertex Shader");
GL_OBJECT_NAME(fs, "Downsample First Pass Fragment Shader");
plconfig.vertex_shader = vs.get();
plconfig.fragment_shader = fs.get();
if (!(m_downsample_first_pass_pipeline = g_gpu_device->CreatePipeline(plconfig)))
return false;
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_first_pass_pipeline,
"Downsample First Pass Pipeline");
GL_OBJECT_NAME(m_downsample_first_pass_pipeline, "Downsample First Pass Pipeline");
fs = g_host_display->CreateShaderFromSource(GPUShader::Stage::Pixel,
shadergen.GenerateAdaptiveDownsampleMipFragmentShader(false));
if (fs == VK_NULL_HANDLE)
fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment,
shadergen.GenerateAdaptiveDownsampleMipFragmentShader(false));
if (!fs)
return false;
gpbuilder.SetFragmentShader(fs);
m_downsample_mid_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false);
vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr);
if (m_downsample_mid_pass_pipeline == VK_NULL_HANDLE)
GL_OBJECT_NAME(fs, "Downsample Mid Pass Fragment Shader");
plconfig.fragment_shader = fs.get();
if (!(m_downsample_mid_pass_pipeline = g_gpu_device->CreatePipeline(plconfig)))
return false;
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_mid_pass_pipeline,
"Downsample Mid Pass Pipeline");
GL_OBJECT_NAME(m_downsample_mid_pass_pipeline, "Downsample Mid Pass Pipeline");
fs = g_host_display->CreateShaderFromSource(GPUShader::Stage::Pixel,
shadergen.GenerateAdaptiveDownsampleBlurFragmentShader());
if (fs == VK_NULL_HANDLE)
fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateAdaptiveDownsampleBlurFragmentShader());
if (!fs)
return false;
gpbuilder.SetFragmentShader(fs);
gpbuilder.SetRenderPass(m_downsample_weight_render_pass, 0);
m_downsample_blur_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false);
vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr);
if (m_downsample_blur_pass_pipeline == VK_NULL_HANDLE)
GL_OBJECT_NAME(fs, "Downsample Blur Pass Fragment Shader");
plconfig.fragment_shader = fs.get();
plconfig.color_format = GPUTexture::Format::R8;
if (!(m_downsample_blur_pass_pipeline = g_gpu_device->CreatePipeline(plconfig)))
return false;
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_blur_pass_pipeline,
"Downsample Blur Pass Pipeline");
GL_OBJECT_NAME(m_downsample_blur_pass_pipeline, "Downsample Blur Pass Pipeline");
fs = g_host_display->CreateShaderFromSource(GPUShader::Stage::Pixel,
shadergen.GenerateAdaptiveDownsampleCompositeFragmentShader());
if (fs == VK_NULL_HANDLE)
fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment,
shadergen.GenerateAdaptiveDownsampleCompositeFragmentShader());
if (!fs)
return false;
gpbuilder.SetFragmentShader(fs);
gpbuilder.SetPipelineLayout(m_downsample_composite_pipeline_layout);
gpbuilder.SetRenderPass(m_display_load_render_pass, 0);
m_downsample_composite_pass_pipeline = gpbuilder.Create(device, pipeline_cache, false);
vkDestroyShaderModule(g_vulkan_context->GetDevice(), fs, nullptr);
if (m_downsample_composite_pass_pipeline == VK_NULL_HANDLE)
GL_OBJECT_NAME(fs, "Downsample Composite Pass Fragment Shader");
plconfig.fragment_shader = fs.get();
plconfig.color_format = VRAM_RT_FORMAT;
if (!(m_downsample_composite_pass_pipeline = g_gpu_device->CreatePipeline(plconfig)))
return false;
GL_OBJECT_NAME(m_downsample_composite_pass_pipeline, "Downsample Blur Pass Pipeline");
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_downsample_composite_pass_pipeline,
"Downsample Composite Pass Pipeline");
GPUSampler::Config config = GPUSampler::GetLinearConfig();
config.min_lod = 0;
config.max_lod = GPUSampler::Config::LOD_MAX;
if (!(m_downsample_lod_sampler = g_gpu_device->CreateSampler(config)))
return false;
GL_OBJECT_NAME(m_downsample_lod_sampler, "Downsample LOD Sampler");
config.mip_filter = GPUSampler::Filter::Linear;
if (!(m_downsample_composite_sampler = g_gpu_device->CreateSampler(config)))
return false;
GL_OBJECT_NAME(m_downsample_composite_sampler, "Downsample Trilinear Sampler");
}
else
#endif
if (m_downsample_mode == GPUDownsampleMode::Box)
else if (m_downsample_mode == GPUDownsampleMode::Box)
{
std::unique_ptr<GPUShader> fs =
g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateBoxSampleDownsampleFragmentShader());
@ -1023,6 +982,7 @@ void GPU_HW::DestroyPipelines()
destroy(m_downsample_mid_pass_pipeline);
destroy(m_downsample_blur_pass_pipeline);
destroy(m_downsample_composite_pass_pipeline);
m_downsample_composite_sampler.reset();
m_display_pipelines.enumerate(destroy);
}
@ -1283,23 +1243,6 @@ u32 GPU_HW::GetAdaptiveDownsamplingMipLevels() const
return levels;
}
GPU_HW::SmoothingUBOData GPU_HW::GetSmoothingUBO(u32 level, u32 left, u32 top, u32 width, u32 height, u32 tex_width,
u32 tex_height) const
{
const float rcp_width = 1.0f / static_cast<float>(tex_width >> level);
const float rcp_height = 1.0f / static_cast<float>(tex_height >> level);
SmoothingUBOData data;
data.min_uv[0] = static_cast<float>(left >> level) * rcp_width;
data.min_uv[1] = static_cast<float>(top >> level) * rcp_height;
data.max_uv[0] = static_cast<float>((left + width) >> level) * rcp_width;
data.max_uv[1] = static_cast<float>((top + height) >> level) * rcp_height;
data.rcp_size[0] = rcp_width;
data.rcp_size[1] = rcp_height;
return data;
}
void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth)
{
const float dx = x1 - x0;
@ -2476,75 +2419,97 @@ void GPU_HW::DownsampleFramebuffer(GPUTexture* source, u32 left, u32 top, u32 wi
void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top, u32 width, u32 height)
{
#if 0
CD3D11_BOX src_box(left, top, 0, left + width, top + height, 1);
m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0);
m_context->OMSetBlendState(m_blend_disabled_state.Get(), nullptr, 0xFFFFFFFFu);
m_context->CopySubresourceRegion(m_downsample_texture, 0, left, top, 0, source->GetD3DTexture(), 0, &src_box);
m_context->PSSetSamplers(0, 1, m_point_sampler_state.GetAddressOf());
m_context->VSSetShader(m_uv_quad_vertex_shader.Get(), nullptr, 0);
GL_PUSH("DownsampleFramebufferAdaptive (%u,%u => %u,%d)", left, top, left + width, left + height);
struct SmoothingUBOData
{
float min_uv[2];
float max_uv[2];
float rcp_size[2];
float lod;
};
g_gpu_device->CopyTextureRegion(m_downsample_texture.get(), 0, 0, 0, 0, source, left, top, 0, 0, width, height);
g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), m_downsample_lod_sampler.get());
const u32 levels = m_downsample_texture->GetLevels();
SmoothingUBOData uniforms;
// create mip chain
const u32 levels = m_downsample_texture.GetLevels();
for (u32 level = 1; level < levels; level++)
{
static constexpr float clear_color[4] = {};
GL_SCOPE("Create miplevel %u", level);
SetViewportAndScissor(left >> level, top >> level, width >> level, height >> level);
m_context->ClearRenderTargetView(m_downsample_mip_views[level].second.Get(), clear_color);
m_context->OMSetRenderTargets(1, m_downsample_mip_views[level].second.GetAddressOf(), nullptr);
m_context->PSSetShaderResources(0, 1, m_downsample_mip_views[level - 1].first.GetAddressOf());
const u32 level_width = width >> level;
const u32 level_height = height >> level;
const float rcp_width = 1.0f / static_cast<float>(m_downsample_texture->GetMipWidth(level));
const float rcp_height = 1.0f / static_cast<float>(m_downsample_texture->GetMipHeight(level));
uniforms.min_uv[0] = 0.0f;
uniforms.min_uv[1] = 0.0f;
uniforms.max_uv[0] = static_cast<float>(level_width) * rcp_width;
uniforms.max_uv[1] = static_cast<float>(level_height) * rcp_height;
uniforms.rcp_size[0] = rcp_width;
uniforms.rcp_size[1] = rcp_height;
uniforms.lod = static_cast<float>(level - 1);
const SmoothingUBOData ubo = GetSmoothingUBO(level, left, top, width, height, m_downsample_texture.GetWidth(),
m_downsample_texture.GetHeight());
m_context->PSSetShader(
(level == 1) ? m_downsample_first_pass_pixel_shader.Get() : m_downsample_mid_pass_pixel_shader.Get(), nullptr, 0);
UploadUniformBuffer(&ubo, sizeof(ubo));
m_context->Draw(3, 0);
g_gpu_device->InvalidateRenderTarget(m_downsample_render_texture.get());
g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get());
g_gpu_device->SetViewportAndScissor(0, 0, level_width, level_height);
g_gpu_device->SetPipeline((level == 1) ? m_downsample_first_pass_pipeline.get() :
m_downsample_mid_pass_pipeline.get());
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
g_gpu_device->Draw(3, 0);
g_gpu_device->CopyTextureRegion(m_downsample_texture.get(), 0, 0, 0, level, m_downsample_render_texture.get(), 0, 0,
0, 0, level_width, level_height);
}
// blur pass at lowest level
{
GL_SCOPE("Blur");
const u32 last_level = levels - 1;
static constexpr float clear_color[4] = {};
const u32 last_width = width >> last_level;
const u32 last_height = height >> last_level;
const float rcp_width = 1.0f / static_cast<float>(m_downsample_render_texture->GetWidth());
const float rcp_height = 1.0f / static_cast<float>(m_downsample_render_texture->GetHeight());
uniforms.min_uv[0] = 0.0f;
uniforms.min_uv[1] = 0.0f;
uniforms.max_uv[0] = static_cast<float>(last_width) * rcp_width;
uniforms.max_uv[1] = static_cast<float>(last_height) * rcp_height;
uniforms.rcp_size[0] = rcp_width;
uniforms.rcp_size[1] = rcp_height;
uniforms.lod = 0.0f;
SetViewportAndScissor(left >> last_level, top >> last_level, width >> last_level, height >> last_level);
m_context->ClearRenderTargetView(m_downsample_weight_texture.GetD3DRTV(), clear_color);
m_context->OMSetRenderTargets(1, m_downsample_weight_texture.GetD3DRTVArray(), nullptr);
m_context->PSSetShaderResources(0, 1, m_downsample_mip_views.back().first.GetAddressOf());
m_context->PSSetShader(m_downsample_blur_pass_pixel_shader.Get(), nullptr, 0);
const SmoothingUBOData ubo = GetSmoothingUBO(last_level, left, top, width, height, m_downsample_texture.GetWidth(),
m_downsample_texture.GetHeight());
m_context->PSSetShader(m_downsample_blur_pass_pixel_shader.Get(), nullptr, 0);
UploadUniformBuffer(&ubo, sizeof(ubo));
m_context->Draw(3, 0);
m_downsample_render_texture->MakeReadyForSampling();
g_gpu_device->InvalidateRenderTarget(m_downsample_weight_texture.get());
g_gpu_device->SetFramebuffer(m_downsample_weight_framebuffer.get());
g_gpu_device->SetTextureSampler(0, m_downsample_render_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->SetViewportAndScissor(0, 0, last_width, last_height);
g_gpu_device->SetPipeline(m_downsample_blur_pass_pipeline.get());
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
g_gpu_device->Draw(3, 0);
m_downsample_weight_texture->MakeReadyForSampling();
}
// composite downsampled and upsampled images together
{
SetViewportAndScissor(left, top, width, height);
m_context->OMSetRenderTargets(1, GetDisplayTexture()->GetD3DRTVArray(), nullptr);
GL_SCOPE("Composite");
ID3D11ShaderResourceView* const srvs[2] = {m_downsample_texture.GetD3DSRV(),
m_downsample_weight_texture.GetD3DSRV()};
ID3D11SamplerState* const samplers[2] = {m_trilinear_sampler_state.Get(), m_linear_sampler_state.Get()};
m_context->PSSetShaderResources(0, countof(srvs), srvs);
m_context->PSSetSamplers(0, countof(samplers), samplers);
m_context->PSSetShader(m_downsample_composite_pixel_shader.Get(), nullptr, 0);
m_context->Draw(3, 0);
g_gpu_device->InvalidateRenderTarget(m_downsample_render_texture.get());
g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get());
g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), m_downsample_composite_sampler.get());
g_gpu_device->SetTextureSampler(1, m_downsample_weight_texture.get(), m_downsample_lod_sampler.get());
g_gpu_device->SetViewportAndScissor(0, 0, width, height);
g_gpu_device->SetPipeline(m_downsample_composite_pass_pipeline.get());
g_gpu_device->Draw(3, 0);
m_downsample_render_texture->MakeReadyForSampling();
}
ID3D11ShaderResourceView* const null_srvs[2] = {};
m_context->PSSetShaderResources(0, countof(null_srvs), null_srvs);
m_batch_ubo_dirty = true;
GL_POP();
RestoreGraphicsAPIState();
g_host_display->SetDisplayTexture(m_display_texture.get(), left, top, width, height);
#else
Panic("Not implemented");
#endif
g_gpu_device->SetDisplayTexture(m_downsample_render_texture.get(), 0, 0, width, height);
}
void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height)
@ -2556,7 +2521,7 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to
source->MakeReadyForSampling();
g_gpu_device->ClearRenderTarget(m_downsample_texture.get(), 0);
g_gpu_device->ClearRenderTarget(m_downsample_render_texture.get(), 0);
g_gpu_device->SetFramebuffer(m_downsample_framebuffer.get());
g_gpu_device->SetPipeline(m_downsample_first_pass_pipeline.get());
g_gpu_device->SetTextureSampler(0, source, g_gpu_device->GetNearestSampler());
@ -2565,7 +2530,7 @@ void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 to
RestoreGraphicsAPIState();
g_gpu_device->SetDisplayTexture(m_downsample_texture.get(), ds_left, ds_top, ds_width, ds_height);
g_gpu_device->SetDisplayTexture(m_downsample_render_texture.get(), ds_left, ds_top, ds_width, ds_height);
}
void GPU_HW::DrawRendererStats(bool is_idle_frame)

View File

@ -341,21 +341,9 @@ protected:
void SetBatchDepthBuffer(bool enabled);
void CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices);
/// UBO data for adaptive smoothing.
struct SmoothingUBOData
{
float min_uv[2];
float max_uv[2];
float rcp_size[2];
};
/// Returns the number of mipmap levels used for adaptive smoothing.
u32 GetAdaptiveDownsamplingMipLevels() const;
/// Returns the UBO data for an adaptive smoothing pass.
SmoothingUBOData GetSmoothingUBO(u32 level, u32 left, u32 top, u32 width, u32 height, u32 tex_width,
u32 tex_height) const;
void DownsampleFramebuffer(GPUTexture* source, u32 left, u32 top, u32 width, u32 height);
void DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top, u32 width, u32 height);
void DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height);
@ -397,12 +385,11 @@ protected:
{
BitField<u8, bool, 0, 1> m_supports_per_sample_shading;
BitField<u8, bool, 1, 1> m_supports_dual_source_blend;
BitField<u8, bool, 2, 1> m_supports_adaptive_downsampling;
BitField<u8, bool, 3, 1> m_supports_disable_color_perspective;
BitField<u8, bool, 4, 1> m_per_sample_shading;
BitField<u8, bool, 5, 1> m_scaled_dithering;
BitField<u8, bool, 6, 1> m_chroma_smoothing;
BitField<u8, bool, 7, 1> m_disable_color_perspective;
BitField<u8, bool, 2, 1> m_supports_disable_color_perspective;
BitField<u8, bool, 3, 1> m_per_sample_shading;
BitField<u8, bool, 4, 1> m_scaled_dithering;
BitField<u8, bool, 5, 1> m_chroma_smoothing;
BitField<u8, bool, 6, 1> m_disable_color_perspective;
u8 bits = 0;
};
@ -441,13 +428,16 @@ protected:
std::unique_ptr<GPUPipeline> m_copy_pipeline;
std::unique_ptr<GPUTexture> m_downsample_texture;
std::unique_ptr<GPUTexture> m_downsample_render_texture;
std::unique_ptr<GPUFramebuffer> m_downsample_framebuffer;
// std::unique_ptr<GPUTexture> m_downsample_weight_texture;
// std::unique_ptr<GPUFramebuffer> m_downsample_weight_framebuffer;
std::unique_ptr<GPUTexture> m_downsample_weight_texture;
std::unique_ptr<GPUFramebuffer> m_downsample_weight_framebuffer;
std::unique_ptr<GPUPipeline> m_downsample_first_pass_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_mid_pass_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_blur_pass_pipeline;
std::unique_ptr<GPUPipeline> m_downsample_composite_pass_pipeline;
std::unique_ptr<GPUSampler> m_downsample_lod_sampler;
std::unique_ptr<GPUSampler> m_downsample_composite_sampler;
// Statistics
RendererStats m_renderer_stats = {};

View File

@ -1329,13 +1329,37 @@ std::string GPU_HW_ShaderGen::GenerateVRAMUpdateDepthFragmentShader()
return ss.str();
}
void GPU_HW_ShaderGen::WriteAdaptiveDownsampleUniformBuffer(std::stringstream& ss)
{
DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_rcp_resolution", "float u_lod"}, true);
}
std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleVertexShader()
{
std::stringstream ss;
WriteHeader(ss);
WriteAdaptiveDownsampleUniformBuffer(ss);
DeclareVertexEntryPoint(ss, {}, 0, 1, {}, true);
ss << R"(
{
v_tex0 = float2(float((v_id << 1) & 2u), float(v_id & 2u));
v_pos = float4(v_tex0 * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), 0.0f, 1.0f);
v_tex0 = u_uv_min + (u_uv_max - u_uv_min) * v_tex0;
#if API_OPENGL || API_OPENGL_ES || API_VULKAN
v_pos.y = -v_pos.y;
#endif
}
)";
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass)
{
std::stringstream ss;
WriteHeader(ss);
WriteCommonFunctions(ss);
WriteAdaptiveDownsampleUniformBuffer(ss);
DeclareTexture(ss, "samp0", 0, false);
DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_rcp_resolution"}, true);
DefineMacro(ss, "FIRST_PASS", first_pass);
// mipmap_energy.glsl ported from parallel-rsx.
@ -1370,16 +1394,16 @@ float4 get_bias(float4 c00, float4 c01, float4 c10, float4 c11)
{
float2 uv = v_tex0 - (u_rcp_resolution * 0.25);
#ifdef FIRST_PASS
vec3 c00 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(0, 0)).rgb;
vec3 c01 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(0, 1)).rgb;
vec3 c10 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(1, 0)).rgb;
vec3 c11 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(1, 1)).rgb;
vec3 c00 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 0)).rgb;
vec3 c01 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 1)).rgb;
vec3 c10 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 0)).rgb;
vec3 c11 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 1)).rgb;
o_col0 = get_bias(c00, c01, c10, c11);
#else
vec4 c00 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(0, 0));
vec4 c01 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(0, 1));
vec4 c10 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(1, 0));
vec4 c11 = SAMPLE_TEXTURE_OFFSET(samp0, uv, int2(1, 1));
vec4 c00 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 0));
vec4 c01 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(0, 1));
vec4 c10 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 0));
vec4 c11 = SAMPLE_TEXTURE_LEVEL_OFFSET(samp0, uv, u_lod, int2(1, 1));
o_col0 = get_bias(c00, c01, c10, c11);
#endif
}
@ -1394,8 +1418,7 @@ std::string GPU_HW_ShaderGen::GenerateAdaptiveDownsampleBlurFragmentShader()
WriteHeader(ss);
WriteCommonFunctions(ss);
DeclareTexture(ss, "samp0", 0, false);
DeclareUniformBuffer(ss, {"float2 u_uv_min", "float2 u_uv_max", "float2 u_rcp_resolution", "float sample_level"},
true);
WriteAdaptiveDownsampleUniformBuffer(ss);
// mipmap_blur.glsl ported from parallel-rsx.
DeclareFragmentEntryPoint(ss, 0, 1, {}, false, 1, false, false, false, false);

View File

@ -24,6 +24,7 @@ public:
std::string GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced);
std::string GenerateVRAMUpdateDepthFragmentShader();
std::string GenerateAdaptiveDownsampleVertexShader();
std::string GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass);
std::string GenerateAdaptiveDownsampleBlurFragmentShader();
std::string GenerateAdaptiveDownsampleCompositeFragmentShader();
@ -36,6 +37,7 @@ private:
void WriteCommonFunctions(std::stringstream& ss);
void WriteBatchUniformBuffer(std::stringstream& ss);
void WriteBatchTextureFilter(std::stringstream& ss, GPUTextureFilter texture_filter);
void WriteAdaptiveDownsampleUniformBuffer(std::stringstream& ss);
u32 m_resolution_scale;
u32 m_multisamples;

View File

@ -210,7 +210,7 @@ void ShaderGen::WriteHeader(std::stringstream& ss)
ss << "#define SAMPLE_TEXTURE(name, coords) texture(name, coords)\n";
ss << "#define SAMPLE_TEXTURE_OFFSET(name, coords, offset) textureOffset(name, coords, offset)\n";
ss << "#define SAMPLE_TEXTURE_LEVEL(name, coords, level) textureLod(name, coords, level)\n";
ss << "#define SAMPLE_TEXTURE_LEVEL_OFFSET(name, coords, level, offset) textureLod(name, coords, level, offset)\n";
ss << "#define SAMPLE_TEXTURE_LEVEL_OFFSET(name, coords, level, offset) textureLodOffset(name, coords, level, offset)\n";
ss << "#define LOAD_TEXTURE(name, coords, mip) texelFetch(name, coords, mip)\n";
ss << "#define LOAD_TEXTURE_MS(name, coords, sample) texelFetch(name, coords, int(sample))\n";
ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) texelFetchOffset(name, coords, mip, offset)\n";