This commit is contained in:
Stenzek 2025-01-07 21:54:15 +10:00
parent ac41ace972
commit 113614c2f5
No known key found for this signature in database
6 changed files with 337 additions and 31 deletions

View File

@ -41,8 +41,8 @@ LOG_CHANNEL(GPU_HW);
// TODO: instead of full state restore, only restore what changed
static constexpr GPUTexture::Format VRAM_RT_FORMAT = GPUTexture::Format::RGBA8;
static constexpr GPUTexture::Format VRAM_DS_FORMAT = GPUTexture::Format::D16;
static constexpr GPUTexture::Format VRAM_DS_DEPTH_FORMAT = GPUTexture::Format::D32F;
static constexpr GPUTexture::Format VRAM_DS_FORMAT = GPUTexture::Format::D32FS8;
static constexpr GPUTexture::Format VRAM_DS_DEPTH_FORMAT = GPUTexture::Format::D32FS8;
static constexpr GPUTexture::Format VRAM_DS_COLOR_FORMAT = GPUTexture::Format::R32F;
#if defined(_DEBUG) || defined(_DEVEL)
@ -427,11 +427,13 @@ void GPU_HW::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss)
void GPU_HW::RestoreDeviceContext()
{
m_batch_ubo_dirty = true;
g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler());
SetVRAMRenderTarget();
if (UseStencilBuffer())
g_gpu_device->SetStencilRef(m_batch.stencil_reference);
g_gpu_device->SetViewport(m_vram_texture->GetRect());
SetScissor();
m_batch_ubo_dirty = true;
}
void GPU_HW::UpdateSettings(const GPUSettings& old_settings)
@ -447,7 +449,9 @@ void GPU_HW::UpdateSettings(const GPUSettings& old_settings)
(m_resolution_scale != resolution_scale || m_multisamples != multisamples ||
g_gpu_settings.IsUsingAccurateBlending() != old_settings.IsUsingAccurateBlending() ||
m_pgxp_depth_buffer != g_gpu_settings.UsingPGXPDepthBuffer() ||
(!old_settings.gpu_texture_cache && g_gpu_settings.gpu_texture_cache));
(!old_settings.gpu_texture_cache && g_gpu_settings.gpu_texture_cache) ||
(GetDownsampleMode(resolution_scale) == GPUDownsampleMode::AdaptiveStencil) !=
(m_downsample_mode == GPUDownsampleMode::AdaptiveStencil));
const bool shaders_changed =
((m_resolution_scale > 1) != (resolution_scale > 1) || m_multisamples != multisamples ||
m_true_color != g_gpu_settings.gpu_true_color ||
@ -466,7 +470,9 @@ void GPU_HW::UpdateSettings(const GPUSettings& old_settings)
g_gpu_settings.gpu_pgxp_color_correction != old_settings.gpu_pgxp_color_correction) ||
m_allow_sprite_mode != ShouldAllowSpriteMode(m_resolution_scale, g_gpu_settings.gpu_texture_filter,
g_gpu_settings.gpu_sprite_texture_filter) ||
(!old_settings.gpu_texture_cache && g_gpu_settings.gpu_texture_cache));
(!old_settings.gpu_texture_cache && g_gpu_settings.gpu_texture_cache) ||
(GetDownsampleMode(resolution_scale) == GPUDownsampleMode::AdaptiveStencil) !=
(m_downsample_mode == GPUDownsampleMode::AdaptiveStencil));
const bool resolution_dependent_shaders_changed =
(m_resolution_scale != resolution_scale || m_multisamples != multisamples);
const bool downsampling_shaders_changed =
@ -889,6 +895,7 @@ void GPU_HW::PrintSettingsToLog()
GPUTexture::Format GPU_HW::GetDepthBufferFormat() const
{
// Use 32-bit depth for PGXP depth buffer, otherwise 16-bit for mask bit.
// TODO: AMD doesn't support D24S8
return m_pgxp_depth_buffer ? (m_use_rov_for_shader_blend ? VRAM_DS_COLOR_FORMAT : VRAM_DS_DEPTH_FORMAT) :
VRAM_DS_FORMAT;
}
@ -978,6 +985,10 @@ bool GPU_HW::CreateBuffers(Error* error)
SetVRAMRenderTarget();
SetFullVRAMDirtyRectangle();
if (UseStencilBuffer())
g_gpu_device->ClearStencil(m_vram_depth_texture.get(), 0);
return true;
}
@ -990,6 +1001,9 @@ void GPU_HW::ClearFramebuffer()
g_gpu_device->ClearRenderTarget(m_vram_depth_texture.get(), 0xFF);
else
g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f);
if (UseStencilBuffer())
g_gpu_device->ClearStencil(m_vram_depth_texture.get(), 0);
}
ClearVRAMDirtyRectangle();
if (m_use_texture_cache)
@ -1419,13 +1433,27 @@ bool GPU_HW::CompilePipelines(Error* error)
{
plconfig.depth.depth_test =
m_pgxp_depth_buffer ?
(depth_test ? GPUPipeline::DepthFunc::LessEqual : GPUPipeline::DepthFunc::Always) :
(check_mask ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always);
(depth_test ? GPUPipeline::ComparisonFunc::LessEqual : GPUPipeline::ComparisonFunc::Always) :
(check_mask ? GPUPipeline::ComparisonFunc::GreaterEqual : GPUPipeline::ComparisonFunc::Always);
// Don't write for transparent, but still test.
plconfig.depth.depth_write =
!m_pgxp_depth_buffer ||
(depth_test && transparency_mode == static_cast<u8>(GPUTransparencyMode::Disabled));
if (UseStencilBuffer())
{
const bool replace = (transparency_mode == static_cast<u8>(GPUTransparencyMode::Disabled) ||
render_mode == static_cast<u8>(BatchRenderMode::TransparencyDisabled) ||
render_mode == static_cast<u8>(BatchRenderMode::OnlyOpaque));
plconfig.depth.stencil_enable = true;
plconfig.depth.back_stencil_func = GPUPipeline::ComparisonFunc::Always;
plconfig.depth.back_stencil_pass_op =
replace ? GPUPipeline::StencilOp::Replace : GPUPipeline::StencilOp::Keep;
plconfig.depth.front_stencil_func = GPUPipeline::ComparisonFunc::Always;
plconfig.depth.front_stencil_pass_op =
replace ? GPUPipeline::StencilOp::Replace : GPUPipeline::StencilOp::Keep;
}
}
plconfig.SetTargetFormats(use_rov ? GPUTexture::Format::Unknown : VRAM_RT_FORMAT,
@ -1563,6 +1591,15 @@ bool GPU_HW::CompilePipelines(Error* error)
plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState();
plconfig.color_formats[1] = needs_rov_depth ? VRAM_DS_COLOR_FORMAT : GPUTexture::Format::Unknown;
if (UseStencilBuffer())
{
plconfig.depth.stencil_enable = true;
plconfig.depth.back_stencil_func = GPUPipeline::ComparisonFunc::Always;
plconfig.depth.back_stencil_pass_op = GPUPipeline::StencilOp::Replace;
plconfig.depth.front_stencil_func = GPUPipeline::ComparisonFunc::Always;
plconfig.depth.front_stencil_pass_op = GPUPipeline::StencilOp::Replace;
}
// VRAM fill
for (u8 wrapped = 0; wrapped < 2; wrapped++)
{
@ -1577,8 +1614,9 @@ bool GPU_HW::CompilePipelines(Error* error)
return false;
plconfig.fragment_shader = fs.get();
plconfig.depth = needs_real_depth_buffer ? GPUPipeline::DepthState::GetAlwaysWriteState() :
GPUPipeline::DepthState::GetNoTestsState();
plconfig.depth.depth_test =
needs_real_depth_buffer ? GPUPipeline::ComparisonFunc::Always : GPUPipeline::ComparisonFunc::Never;
plconfig.depth.depth_write = needs_real_depth_buffer;
if (!(m_vram_fill_pipelines[wrapped][interlaced] = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
@ -1604,7 +1642,7 @@ bool GPU_HW::CompilePipelines(Error* error)
plconfig.depth.depth_write = needs_real_depth_buffer;
plconfig.depth.depth_test =
(depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always;
(depth_test != 0) ? GPUPipeline::ComparisonFunc::GreaterEqual : GPUPipeline::ComparisonFunc::Always;
if (!(m_vram_copy_pipelines[depth_test] = g_gpu_device->CreatePipeline(plconfig), error))
return false;
@ -1636,7 +1674,7 @@ bool GPU_HW::CompilePipelines(Error* error)
plconfig.depth.depth_write = needs_real_depth_buffer;
plconfig.depth.depth_test =
(depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always;
(depth_test != 0) ? GPUPipeline::ComparisonFunc::GreaterEqual : GPUPipeline::ComparisonFunc::Always;
if (!(m_vram_write_pipelines[depth_test] = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
@ -1657,7 +1695,9 @@ bool GPU_HW::CompilePipelines(Error* error)
plconfig.fragment_shader = fs.get();
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
plconfig.depth.depth_write = needs_real_depth_buffer;
plconfig.depth.depth_test = GPUPipeline::ComparisonFunc::Always;
if (!(m_vram_write_replacement_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
@ -1669,6 +1709,7 @@ bool GPU_HW::CompilePipelines(Error* error)
plconfig.primitive = GPUPipeline::Primitive::Triangles;
plconfig.input_layout.vertex_attributes = {};
plconfig.input_layout.vertex_stride = 0;
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
// VRAM update depth
if (m_write_mask_as_depth)
@ -1882,6 +1923,61 @@ bool GPU_HW::CompileDownsamplePipelines(Error* error)
}
GL_OBJECT_NAME(m_downsample_composite_sampler, "Downsample Trilinear Sampler");
}
else if (m_downsample_mode == GPUDownsampleMode::AdaptiveStencil)
{
std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(
GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateAdaptiveStencilDownsampleBlurFragmentShader(m_resolution_scale, m_multisamples), error);
if (!fs)
return false;
GL_OBJECT_NAME(fs, "Adaptive Stencil Downsample Fragment Shader");
plconfig.fragment_shader = fs.get();
plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants;
if (!(m_downsample_blur_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
GL_OBJECT_NAME(m_downsample_blur_pipeline, "Adaptive Stencil Downsample Pipeline");
fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateAdaptiveStencilDownsampleCompositeFragmentShader(), error);
if (!fs)
return false;
GL_OBJECT_NAME(fs, "Adaptive Stencil Composite Fragment Shader");
plconfig.fragment_shader = fs.get();
plconfig.SetTargetFormats(VRAM_RT_FORMAT);
if (!(m_downsample_composite_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
GL_OBJECT_NAME(m_downsample_composite_pipeline, "Adaptive Stencil Composite Pipeline");
fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(),
shadergen.GenerateFillFragmentShader(), error);
if (!fs)
return false;
GL_OBJECT_NAME(fs, "Adaptive Stencil Mark Fragment Shader");
plconfig.fragment_shader = fs.get();
plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants;
plconfig.SetTargetFormats(GPUTexture::Format::R8, GetDepthBufferFormat());
plconfig.samples = m_multisamples;
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
plconfig.depth.stencil_enable = true;
plconfig.depth.front_stencil_pass_op = GPUPipeline::StencilOp::Keep;
plconfig.depth.front_stencil_fail_op = GPUPipeline::StencilOp::Keep;
plconfig.depth.front_stencil_func = GPUPipeline::ComparisonFunc::Equal;
plconfig.depth.back_stencil_pass_op = GPUPipeline::StencilOp::Keep;
plconfig.depth.back_stencil_fail_op = GPUPipeline::StencilOp::Keep;
plconfig.depth.back_stencil_func = GPUPipeline::ComparisonFunc::Equal;
if (!(m_downsample_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error)))
return false;
GL_OBJECT_NAME(m_downsample_pass_pipeline, "Adaptive Stencil Downsample Pipeline");
}
else if (m_downsample_mode == GPUDownsampleMode::Box)
{
std::unique_ptr<GPUShader> fs =
@ -2471,10 +2567,28 @@ void GPU_HW::SetBatchSpriteMode(const GPUBackendDrawCommand* cmd, bool enabled)
m_batch.sprite_mode = enabled;
}
void GPU_HW::SetBatchStencilReference(const GPUBackendDrawCommand* cmd, u8 value)
{
if (!UseStencilBuffer() || m_batch.stencil_reference == value)
return;
if (m_batch_index_count > 0)
{
FlushRender();
EnsureVertexBufferSpaceForCommand(cmd);
}
GL_INS_FMT("Stencil reference is now {}", value);
m_batch.stencil_reference = value;
g_gpu_device->SetStencilRef(m_batch.stencil_reference);
}
void GPU_HW::DrawLine(const GPUBackendDrawLineCommand* cmd)
{
PrepareDraw(cmd);
SetBatchDepthBuffer(cmd, false);
SetBatchStencilReference(cmd, 0);
const u32 num_vertices = cmd->num_vertices;
DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6));
@ -2519,6 +2633,7 @@ void GPU_HW::DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd)
const bool use_depth = m_pgxp_depth_buffer && cmd->valid_w;
SetBatchDepthBuffer(cmd, use_depth);
SetBatchStencilReference(cmd, BoolToUInt8(use_depth));
const u32 num_vertices = cmd->num_vertices;
DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6));
@ -2678,6 +2793,7 @@ void GPU_HW::DrawSprite(const GPUBackendDrawRectangleCommand* cmd)
PrepareDraw(cmd);
SetBatchDepthBuffer(cmd, false);
SetBatchSpriteMode(cmd, m_allow_sprite_mode);
SetBatchStencilReference(cmd, 0);
DebugAssert(m_batch_vertex_space >= MAX_VERTICES_FOR_RECTANGLE && m_batch_index_space >= MAX_VERTICES_FOR_RECTANGLE);
const s32 pos_x = cmd->x;
@ -2955,10 +3071,12 @@ ALWAYS_INLINE_RELEASE void GPU_HW::FinishPolygonDraw(const GPUBackendDrawCommand
const GSVector4i clamped_draw_rect_123)
{
// Use PGXP to exclude primitives that are definitely 3D.
const bool really_3d = is_precise ? is_3d : IsPossibleSpritePolygon(vertices.data());
if (m_resolution_scale > 1 && !is_3d && cmd->quad_polygon)
HandleFlippedQuadTextureCoordinates(cmd, vertices.data());
else if (m_allow_sprite_mode)
SetBatchSpriteMode(cmd, is_precise ? !is_3d : IsPossibleSpritePolygon(vertices.data()));
SetBatchSpriteMode(cmd, !really_3d);
SetBatchStencilReference(cmd, BoolToUInt8(really_3d));
if (cmd->texture_enable && m_compute_uv_range)
ComputePolygonUVLimits(cmd, vertices.data(), num_vertices);
@ -3020,6 +3138,9 @@ bool GPU_HW::BlitVRAMReplacementTexture(GPUTexture* tex, u32 dst_x, u32 dst_y, u
g_gpu_device->SetTextureSampler(0, tex, g_gpu_device->GetLinearSampler());
g_gpu_device->SetPipeline(m_vram_write_replacement_pipeline.get());
if (UseStencilBuffer())
g_gpu_device->SetStencilRef(0);
const GSVector4i rect(dst_x, dst_y, dst_x + width, dst_y + height);
g_gpu_device->SetScissor(rect);
DrawScreenQuad(rect);
@ -3213,6 +3334,11 @@ void GPU_HW::ResetBatchVertexDepth()
m_current_depth = 1;
}
ALWAYS_INLINE bool GPU_HW::UseStencilBuffer() const
{
return (m_downsample_mode == GPUDownsampleMode::AdaptiveStencil);
}
ALWAYS_INLINE float GPU_HW::GetCurrentNormalizedVertexDepth() const
{
return 1.0f - (static_cast<float>(m_current_depth) / 65535.0f);
@ -3291,6 +3417,9 @@ void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool inter
uniforms.u_interlaced_displayed_field = active_line_lsb;
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
if (UseStencilBuffer())
g_gpu_device->SetStencilRef(0);
const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale));
g_gpu_device->SetScissor(scaled_bounds);
DrawScreenQuad(scaled_bounds);
@ -3456,6 +3585,9 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da
g_gpu_device->SetPipeline(m_vram_write_pipelines[BoolToUInt8(check_mask && m_write_mask_as_depth)].get());
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
if (UseStencilBuffer())
g_gpu_device->SetStencilRef(0);
if (upload_texture)
g_gpu_device->SetTextureSampler(0, upload_texture.get(), g_gpu_device->GetNearestSampler());
else
@ -3539,6 +3671,9 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32
g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms));
if (UseStencilBuffer())
g_gpu_device->SetStencilRef(0);
const GSVector4i dst_bounds_scaled = dst_bounds.mul32l(GSVector4i(m_resolution_scale));
g_gpu_device->SetScissor(dst_bounds_scaled);
DrawScreenQuad(dst_bounds_scaled);
@ -4012,7 +4147,23 @@ void GPU_HW::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd)
if (m_downsample_mode != GPUDownsampleMode::Disabled && !cmd->display_24bit)
{
DebugAssert(m_display_texture);
DownsampleFramebuffer();
if (m_downsample_mode == GPUDownsampleMode::Adaptive)
{
DownsampleFramebufferAdaptive(m_display_texture, m_display_texture_view_x, m_display_texture_view_y,
m_display_texture_view_width, m_display_texture_view_height);
}
else if (m_downsample_mode == GPUDownsampleMode::AdaptiveStencil)
{
DownsampleFramebufferAdaptiveStencil(m_display_texture, m_display_texture_view_x, m_display_texture_view_y,
m_display_texture_view_width, m_display_texture_view_height,
scaled_vram_offset_x, scaled_vram_offset_y, line_skip);
}
else
{
DownsampleFramebufferBoxFilter(m_display_texture, m_display_texture_view_x, m_display_texture_view_y,
m_display_texture_view_width, m_display_texture_view_height);
}
}
if (drew_anything)
@ -4031,6 +4182,10 @@ void GPU_HW::UpdateDownsamplingLevels()
current_width /= 2;
}
}
else if (m_downsample_mode == GPUDownsampleMode::AdaptiveStencil)
{
m_downsample_scale_or_levels = m_resolution_scale;
}
else if (m_downsample_mode == GPUDownsampleMode::Box)
{
m_downsample_scale_or_levels = m_resolution_scale / GetBoxDownsampleScale(m_resolution_scale);
@ -4050,20 +4205,6 @@ void GPU_HW::OnBufferSwapped()
m_depth_was_copied = false;
}
void GPU_HW::DownsampleFramebuffer()
{
GPUTexture* source = m_display_texture;
const u32 left = m_display_texture_view_x;
const u32 top = m_display_texture_view_y;
const u32 width = m_display_texture_view_width;
const u32 height = m_display_texture_view_height;
if (m_downsample_mode == GPUDownsampleMode::Adaptive)
DownsampleFramebufferAdaptive(source, left, top, width, height);
else
DownsampleFramebufferBoxFilter(source, left, top, width, height);
}
void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top, u32 width, u32 height)
{
GL_PUSH_FMT("DownsampleFramebufferAdaptive ({},{} => {},{})", left, top, left + width, left + height);
@ -4184,6 +4325,94 @@ void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top
SetDisplayTexture(m_downsample_texture.get(), m_display_depth_buffer, 0, 0, width, height);
}
void GPU_HW::DownsampleFramebufferAdaptiveStencil(GPUTexture* source, u32 left, u32 top, u32 width, u32 height,
u32 fb_left, u32 fb_top, u32 line_skip)
{
GL_PUSH_FMT("DownsampleFramebufferAdaptiveStencil({},{} => {},{} ({}x{})", left, top, left + width, top + height,
width, height);
const u32 ds_width = width / m_downsample_scale_or_levels;
const u32 ds_height = height / m_downsample_scale_or_levels;
// TODO: Weight texture is broken with MSAA
const bool output_texture_ok =
g_gpu_device->ResizeTexture(&m_downsample_texture, width, height, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT,
GPUTexture::Flags::None, false);
GPUDevice::AutoRecycleTexture downsample_texture = g_gpu_device->FetchAutoRecycleTexture(
ds_width, ds_height, 1, 1, 1, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT, GPUTexture::Flags::None);
GPUDevice::AutoRecycleTexture weight_texture = g_gpu_device->FetchAutoRecycleTexture(
m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), 1, 1, m_vram_texture->GetSamples(),
GPUTexture::Type::RenderTarget, GPUTexture::Format::R8, GPUTexture::Flags::None);
if (!output_texture_ok || !downsample_texture || !weight_texture)
{
ERROR_LOG("Failed to create {}x{} RT for adaptive stencil downsampling", width, height);
return;
}
{
// fill weight texture
GL_SCOPE("Weights");
const float fill_uniforms_unmarked[4] = {0.0f, 0.0f, 0.0f, 0.0f};
const float fill_uniforms_marked[4] = {1.0f, 1.0f, 1.0f, 1.0f};
g_gpu_device->SetViewportAndScissor(fb_left, fb_top, width, height << line_skip);
g_gpu_device->InvalidateRenderTarget(weight_texture.get());
g_gpu_device->SetRenderTarget(weight_texture.get(), m_vram_depth_texture.get());
g_gpu_device->SetPipeline(m_downsample_pass_pipeline.get());
g_gpu_device->SetStencilRef(0);
g_gpu_device->PushUniformBuffer(fill_uniforms_unmarked, sizeof(fill_uniforms_unmarked));
g_gpu_device->Draw(3, 0);
g_gpu_device->SetStencilRef(1);
g_gpu_device->PushUniformBuffer(fill_uniforms_marked, sizeof(fill_uniforms_marked));
g_gpu_device->Draw(3, 0);
}
// box downsample
{
GL_SCOPE("Box downsample");
source->MakeReadyForSampling();
const u32 uniforms[9] = {left, top, fb_left, fb_top, line_skip};
g_gpu_device->InvalidateRenderTarget(downsample_texture.get());
g_gpu_device->SetRenderTarget(downsample_texture.get());
g_gpu_device->SetPipeline(m_downsample_blur_pipeline.get());
g_gpu_device->SetTextureSampler(0, source, g_gpu_device->GetNearestSampler());
g_gpu_device->SetTextureSampler(1, weight_texture.get(), g_gpu_device->GetNearestSampler());
g_gpu_device->SetViewportAndScissor(0, 0, ds_width, ds_height);
g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms));
g_gpu_device->Draw(3, 0);
}
// composite
{
GL_SCOPE("Composite");
const GSVector4 nat_uniforms =
GSVector4(GSVector4i(left, top, width, height)) / GSVector4(GSVector4i::xyxy(source->GetSizeVec()));
g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get());
g_gpu_device->SetRenderTarget(m_downsample_texture.get());
g_gpu_device->SetPipeline(m_downsample_composite_pipeline.get());
g_gpu_device->SetTextureSampler(0, downsample_texture.get(), g_gpu_device->GetLinearSampler());
g_gpu_device->SetTextureSampler(1, source, g_gpu_device->GetNearestSampler());
g_gpu_device->SetViewportAndScissor(0, 0, width, height);
g_gpu_device->PushUniformBuffer(&nat_uniforms, sizeof(nat_uniforms));
g_gpu_device->Draw(3, 0);
m_downsample_texture->MakeReadyForSampling();
}
GL_POP();
RestoreDeviceContext();
SetDisplayTexture(m_downsample_texture.get(), m_display_depth_buffer, 0, 0, width, height);
}
void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height)
{
GL_SCOPE_FMT("DownsampleFramebufferBoxFilter({},{} => {},{} ({}x{})", left, top, left + width, top + height, width,

View File

@ -151,6 +151,7 @@ private:
bool check_mask_before_draw = false;
bool use_depth_buffer = false;
bool sprite_mode = false;
u8 stencil_reference = 0;
// Returns the render mode for this batch.
BatchRenderMode GetRenderMode() const;
@ -234,6 +235,9 @@ private:
const GSVector4i clamped_draw_rect_123);
void ResetBatchVertexDepth();
/// Returns true if the stencil buffer should be filled.
bool UseStencilBuffer() const;
/// Returns the value to be written to the depth buffer for the current operation for mask bit emulation.
float GetCurrentNormalizedVertexDepth() const;
@ -267,11 +271,13 @@ private:
void SetBatchDepthBuffer(const GPUBackendDrawCommand* cmd, bool enabled);
void CheckForDepthClear(const GPUBackendDrawCommand* cmd, const BatchVertex* vertices, u32 num_vertices);
void SetBatchSpriteMode(const GPUBackendDrawCommand* cmd, bool enabled);
void SetBatchStencilReference(const GPUBackendDrawCommand* cmd, u8 value);
void UpdateDownsamplingLevels();
void DownsampleFramebuffer();
void DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top, u32 width, u32 height);
void DownsampleFramebufferAdaptiveStencil(GPUTexture* source, u32 left, u32 top, u32 width, u32 height, u32 fb_left,
u32 fb_top, u32 line_skip);
void DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height);
std::unique_ptr<GPUTexture> m_vram_texture;

View File

@ -1824,6 +1824,73 @@ std::string GPU_HW_ShaderGen::GenerateBoxSampleDownsampleFragmentShader(u32 fact
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateAdaptiveStencilDownsampleBlurFragmentShader(u32 factor, u8 multisamples) const
{
std::stringstream ss;
WriteHeader(ss);
DefineMacro(ss, "MULTISAMPLES", multisamples);
DeclareUniformBuffer(ss, {"uint2 u_base_coords", "uint2 u_fb_base_coords", "uint u_line_skip"}, true);
DeclareTexture(ss, "samp0", 0, false);
DeclareTexture(ss, "samp1", 1, (multisamples > 1));
ss << "#define FACTOR " << factor << "u\n";
DeclareFragmentEntryPoint(ss, 0, 1, {}, true);
ss << R"(
{
float3 color = float3(0.0, 0.0, 0.0);
float weight = 0.0;
uint2 base_coords = u_base_coords + uint2(v_pos.xy) * uint2(FACTOR, FACTOR);
uint2 fb_base_coords = u_fb_base_coords + uint2(uint(v_pos.x) * FACTOR, (uint(v_pos.y) << u_line_skip) * FACTOR);
for (uint offset_x = 0u; offset_x < FACTOR; offset_x++)
{
for (uint offset_y = 0u; offset_y < FACTOR; offset_y++)
{
int2 lcoords = int2(base_coords + uint2(offset_x, offset_y));
color += LOAD_TEXTURE(samp0, lcoords, 0).rgb;
int2 fbcoords = int2(fb_base_coords + uint2(offset_x, offset_y << u_line_skip));
#if MULTISAMPLES > 1
for (int i = 0; i < MULTISAMPLES; i++)
weight += LOAD_TEXTURE_MS(samp1, fbcoords, i).r;
#else
weight += LOAD_TEXTURE(samp1, fbcoords, 0).r;
#endif
}
}
color /= float(FACTOR * FACTOR);
o_col0 = float4(color, float(weight != 0.0));
}
)";
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateAdaptiveStencilDownsampleCompositeFragmentShader() const
{
std::stringstream ss;
WriteHeader(ss);
DeclareUniformBuffer(ss, {"float4 u_native_rect"}, true);
DeclareTexture(ss, "samp0", 0, false);
DeclareTexture(ss, "samp1", 1, false);
DeclareTexture(ss, "samp2", 2, false);
DeclareFragmentEntryPoint(ss, 0, 1, {}, true);
ss << R"(
{
float4 downsample_color = SAMPLE_TEXTURE(samp0, v_tex0);
float native_weight = float(downsample_color.a != 0.0);
float2 native_coords = u_native_rect.xy + v_tex0 * u_native_rect.zw;
float4 native_color = SAMPLE_TEXTURE(samp1, native_coords);
o_col0 = lerp(downsample_color, native_color, native_weight);
})";
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateReplacementMergeFragmentShader(bool semitransparent, bool bilinear_filter) const
{
std::stringstream ss;

View File

@ -43,6 +43,8 @@ public:
std::string GenerateAdaptiveDownsampleBlurFragmentShader() const;
std::string GenerateAdaptiveDownsampleCompositeFragmentShader() const;
std::string GenerateBoxSampleDownsampleFragmentShader(u32 factor) const;
std::string GenerateAdaptiveStencilDownsampleBlurFragmentShader(u32 factor, u8 msaa) const;
std::string GenerateAdaptiveStencilDownsampleCompositeFragmentShader() const;
std::string GenerateReplacementMergeFragmentShader(bool semitransparent, bool bilinear_filter) const;

View File

@ -1565,11 +1565,12 @@ const char* Settings::GetLineDetectModeDisplayName(GPULineDetectMode mode)
"GPULineDetectMode");
}
static constexpr const std::array s_downsample_mode_names = {"Disabled", "Box", "Adaptive"};
static constexpr const std::array s_downsample_mode_names = {"Disabled", "Box", "Adaptive", "AdaptiveStencil"};
static constexpr const std::array s_downsample_mode_display_names = {
TRANSLATE_DISAMBIG_NOOP("Settings", "Disabled", "GPUDownsampleMode"),
TRANSLATE_DISAMBIG_NOOP("Settings", "Box (Downsample 3D/Smooth All)", "GPUDownsampleMode"),
TRANSLATE_DISAMBIG_NOOP("Settings", "Adaptive (Preserve 3D/Smooth 2D)", "GPUDownsampleMode")};
TRANSLATE_DISAMBIG_NOOP("Settings", "Adaptive (Preserve 3D/Smooth 2D)", "GPUDownsampleMode"),
TRANSLATE_DISAMBIG_NOOP("Settings", "Adaptive Sharp (Preserve 3D/Smooth 2D)", "GPUDownsampleMode")};
std::optional<GPUDownsampleMode> Settings::ParseDownsampleModeName(const char* str)
{

View File

@ -105,6 +105,7 @@ enum class GPUDownsampleMode : u8
Disabled,
Box,
Adaptive,
AdaptiveStencil,
Count
};