From 5f14c1a0c20af827234d5adf22926b8665b71a6d Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sat, 16 Mar 2024 02:02:03 +1000 Subject: [PATCH] GPU: Rewrite deinterlacing and add adaptive/blend modes --- src/core/fullscreen_ui.cpp | 8 + src/core/gpu.cpp | 429 ++++++++++++++++-- src/core/gpu.h | 20 +- src/core/gpu_hw.cpp | 214 +++++---- src/core/gpu_hw.h | 20 +- src/core/gpu_hw_shadergen.cpp | 74 +-- src/core/gpu_hw_shadergen.h | 3 +- src/core/gpu_shadergen.cpp | 178 ++++++++ src/core/gpu_shadergen.h | 7 + src/core/gpu_sw.cpp | 220 +++++---- src/core/gpu_sw.h | 16 +- src/core/settings.cpp | 44 ++ src/core/settings.h | 6 + src/core/system.cpp | 1 + src/core/types.h | 9 + src/duckstation-qt/graphicssettingswidget.cpp | 38 +- src/duckstation-qt/graphicssettingswidget.ui | 26 +- src/util/gpu_device.cpp | 40 ++ src/util/gpu_device.h | 2 + 19 files changed, 970 insertions(+), 385 deletions(-) diff --git a/src/core/fullscreen_ui.cpp b/src/core/fullscreen_ui.cpp index d32218d90..d869fc2c8 100644 --- a/src/core/fullscreen_ui.cpp +++ b/src/core/fullscreen_ui.cpp @@ -3941,6 +3941,14 @@ void FullscreenUI::DrawDisplaySettingsPage() &Settings::GetDisplayAspectRatioName, &Settings::GetDisplayAspectRatioDisplayName, DisplayAspectRatio::Count); + DrawEnumSetting( + bsi, FSUI_CSTR("Deinterlacing Mode"), + FSUI_CSTR( + "Determines which algorithm is used to convert interlaced frames to progressive for display on your system."), + "Display", "DeinterlacingMode", Settings::DEFAULT_DISPLAY_DEINTERLACING_MODE, + &Settings::ParseDisplayDeinterlacingMode, &Settings::GetDisplayDeinterlacingModeName, + &Settings::GetDisplayDeinterlacingModeDisplayName, DisplayDeinterlacingMode::Count); + DrawEnumSetting(bsi, FSUI_CSTR("Crop Mode"), FSUI_CSTR("Determines how much of the area typically not visible on a consumer TV set to crop/hide."), "Display", "CropMode", Settings::DEFAULT_DISPLAY_CROP_MODE, &Settings::ParseDisplayCropMode, diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 829567104..e1154652e 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -56,6 +56,8 @@ GPU::GPU() GPU::~GPU() { JoinScreenshotThreads(); + DestroyDeinterlaceTextures(); + g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); if (g_gpu_device) g_gpu_device->SetGPUTimingEnabled(false); @@ -78,7 +80,7 @@ bool GPU::Initialize() m_console_is_pal = System::IsPALRegion(); UpdateCRTCConfig(); - if (!CompileDisplayPipeline()) + if (!CompileDisplayPipelines(true, true, g_settings.gpu_24bit_chroma_smoothing)) { Host::ReportErrorAsync("Error", "Failed to compile base GPU pipelines."); return false; @@ -107,10 +109,20 @@ void GPU::UpdateSettings(const Settings& old_settings) // Crop mode calls this, so recalculate the display area UpdateCRTCDisplayParameters(); - if (g_settings.display_scaling != old_settings.display_scaling) + if (g_settings.display_scaling != old_settings.display_scaling || + g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode || + g_settings.gpu_24bit_chroma_smoothing != old_settings.gpu_24bit_chroma_smoothing) { - if (!CompileDisplayPipeline()) + // Toss buffers on mode change. + if (g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode) + DestroyDeinterlaceTextures(); + + if (!CompileDisplayPipelines(g_settings.display_scaling != old_settings.display_scaling, + g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode, + g_settings.gpu_24bit_chroma_smoothing != old_settings.gpu_24bit_chroma_smoothing)) + { Panic("Failed to compile display pipeline on settings change."); + } } g_gpu_device->SetGPUTimingEnabled(g_settings.display_show_gpu_usage); @@ -1359,6 +1371,10 @@ void GPU::HandleGetGPUInfoCommand(u32 value) void GPU::ClearDisplay() { + ClearDisplayTexture(); + + // Just recycle the textures, it'll get re-fetched. + DestroyDeinterlaceTextures(); } void GPU::UpdateDisplay() @@ -1587,56 +1603,183 @@ void GPU::SetTextureWindow(u32 value) m_draw_mode.texture_window_changed = true; } -bool GPU::CompileDisplayPipeline() +bool GPU::CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing) { GPUShaderGen shadergen(g_gpu_device->GetRenderAPI(), g_gpu_device->GetFeatures().dual_source_blend, g_gpu_device->GetFeatures().framebuffer_fetch); GPUPipeline::GraphicsConfig plconfig; - plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; plconfig.input_layout.vertex_stride = 0; plconfig.primitive = GPUPipeline::Primitive::Triangles; plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); - plconfig.SetTargetFormats(g_gpu_device->HasSurface() ? g_gpu_device->GetWindowFormat() : GPUTexture::Format::RGBA8); plconfig.depth_format = GPUTexture::Format::Unknown; plconfig.samples = 1; plconfig.per_sample_shading = false; + plconfig.geometry_shader = nullptr; - std::string vs = shadergen.GenerateDisplayVertexShader(); - std::string fs; - switch (g_settings.display_scaling) + if (display) { - case DisplayScalingMode::BilinearSharp: - fs = shadergen.GenerateDisplaySharpBilinearFragmentShader(); - break; + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.SetTargetFormats(g_gpu_device->HasSurface() ? g_gpu_device->GetWindowFormat() : GPUTexture::Format::RGBA8); - case DisplayScalingMode::BilinearSmooth: - fs = shadergen.GenerateDisplayFragmentShader(true); - break; + std::string vs = shadergen.GenerateDisplayVertexShader(); + std::string fs; + switch (g_settings.display_scaling) + { + case DisplayScalingMode::BilinearSharp: + fs = shadergen.GenerateDisplaySharpBilinearFragmentShader(); + break; - case DisplayScalingMode::Nearest: - case DisplayScalingMode::NearestInteger: - default: - fs = shadergen.GenerateDisplayFragmentShader(false); - break; + case DisplayScalingMode::BilinearSmooth: + fs = shadergen.GenerateDisplayFragmentShader(true); + break; + + case DisplayScalingMode::Nearest: + case DisplayScalingMode::NearestInteger: + default: + fs = shadergen.GenerateDisplayFragmentShader(false); + break; + } + + std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, vs); + std::unique_ptr fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, fs); + if (!vso || !fso) + return false; + GL_OBJECT_NAME(vso, "Display Vertex Shader"); + GL_OBJECT_NAME_FMT(fso, "Display Fragment Shader [{}]", + Settings::GetDisplayScalingName(g_settings.display_scaling)); + + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_display_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + GL_OBJECT_NAME_FMT(m_display_pipeline, "Display Pipeline [{}]", + Settings::GetDisplayScalingName(g_settings.display_scaling)); } - std::unique_ptr vso = g_gpu_device->CreateShader(GPUShaderStage::Vertex, vs); - std::unique_ptr fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, fs); - if (!vso || !fso) - return false; - GL_OBJECT_NAME(vso, "Display Vertex Shader"); - GL_OBJECT_NAME_FMT(fso, "Display Fragment Shader [{}]", Settings::GetDisplayScalingName(g_settings.display_scaling)); + if (deinterlace) + { + plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); - plconfig.vertex_shader = vso.get(); - plconfig.fragment_shader = fso.get(); - plconfig.geometry_shader = nullptr; - if (!(m_display_pipeline = g_gpu_device->CreatePipeline(plconfig))) - return false; - GL_OBJECT_NAME_FMT(m_display_pipeline, "Display Pipeline [{}]", - Settings::GetDisplayScalingName(g_settings.display_scaling)); + std::unique_ptr vso = + g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GenerateScreenQuadVertexShader()); + if (!vso) + return false; + GL_OBJECT_NAME(vso, "Deinterlace Vertex Shader"); + + std::unique_ptr fso; + if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, + shadergen.GenerateInterleavedFieldExtractFragmentShader()))) + { + return false; + } + + GL_OBJECT_NAME(fso, "Deinterlace Field Extract Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_extract_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + + GL_OBJECT_NAME(m_deinterlace_extract_pipeline, "Deinterlace Field Extract Pipeline"); + + switch (g_settings.display_deinterlacing_mode) + { + case DisplayDeinterlacingMode::Disabled: + break; + + case DisplayDeinterlacingMode::Weave: + { + if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, + shadergen.GenerateDeinterlaceWeaveFragmentShader()))) + { + return false; + } + + GL_OBJECT_NAME(fso, "Weave Deinterlace Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + + GL_OBJECT_NAME(m_deinterlace_pipeline, "Weave Deinterlace Pipeline"); + } + break; + + case DisplayDeinterlacingMode::Blend: + { + if (!(fso = g_gpu_device->CreateShader(GPUShaderStage::Fragment, + shadergen.GenerateDeinterlaceBlendFragmentShader()))) + { + return false; + } + + GL_OBJECT_NAME(fso, "Blend Deinterlace Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + + GL_OBJECT_NAME(m_deinterlace_pipeline, "Blend Deinterlace Pipeline"); + } + break; + + case DisplayDeinterlacingMode::Adaptive: + { + fso = + g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateFastMADReconstructFragmentShader()); + if (!fso) + return false; + + GL_OBJECT_NAME(fso, "FastMAD Reconstruct Fragment Shader"); + + plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; + plconfig.fragment_shader = fso.get(); + if (!(m_deinterlace_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + + GL_OBJECT_NAME(m_deinterlace_pipeline, "FastMAD Reconstruct Pipeline"); + } + break; + + default: + UnreachableCode(); + } + } + + if (chroma_smoothing) + { + m_chroma_smoothing_pipeline.reset(); + g_gpu_device->RecycleTexture(std::move(m_chroma_smoothing_texture)); + + if (g_settings.gpu_24bit_chroma_smoothing) + { + plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; + plconfig.SetTargetFormats(GPUTexture::Format::RGBA8); + + std::unique_ptr vso = + g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GenerateScreenQuadVertexShader()); + std::unique_ptr fso = + g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateChromaSmoothingFragmentShader()); + if (!vso || !fso) + return false; + GL_OBJECT_NAME(vso, "Chroma Smoothing Vertex Shader"); + GL_OBJECT_NAME(fso, "Chroma Smoothing Fragment Shader"); + + plconfig.vertex_shader = vso.get(); + plconfig.fragment_shader = fso.get(); + if (!(m_chroma_smoothing_pipeline = g_gpu_device->CreatePipeline(plconfig))) + return false; + GL_OBJECT_NAME(m_chroma_smoothing_pipeline, "Chroma Smoothing Pipeline"); + } + } return true; } @@ -1804,6 +1947,226 @@ bool GPU::RenderDisplay(GPUTexture* target, const Common::Rectangle& draw_r } } +void GPU::DestroyDeinterlaceTextures() +{ + for (std::unique_ptr& tex : m_deinterlace_buffers) + g_gpu_device->RecycleTexture(std::move(tex)); + g_gpu_device->RecycleTexture(std::move(m_deinterlace_texture)); + m_current_deinterlace_buffer = 0; +} + +bool GPU::Deinterlace(GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 field, u32 line_skip) +{ + switch (g_settings.display_deinterlacing_mode) + { + case DisplayDeinterlacingMode::Disabled: + { + if (line_skip == 0) + { + SetDisplayTexture(src, x, y, width, height); + return true; + } + + // Still have to extract the field. + if (!DeinterlaceExtractField(0, src, x, y, width, height, line_skip)) [[unlikely]] + return false; + + SetDisplayTexture(m_deinterlace_buffers[0].get(), 0, 0, width, height); + return true; + } + + case DisplayDeinterlacingMode::Weave: + { + GL_SCOPE_FMT("DeinterlaceWeave({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); + + const u32 full_height = height * 2; + if (!DeinterlaceSetTargetSize(width, full_height, true)) [[unlikely]] + { + ClearDisplayTexture(); + return false; + } + + src->MakeReadyForSampling(); + + g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); + g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); + const u32 uniforms[] = {x, y, field, line_skip}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); + g_gpu_device->Draw(3, 0); + + m_deinterlace_texture->MakeReadyForSampling(); + SetDisplayTexture(m_deinterlace_texture.get(), 0, 0, width, full_height); + return true; + } + + case DisplayDeinterlacingMode::Blend: + { + constexpr u32 NUM_BLEND_BUFFERS = 2; + + GL_SCOPE_FMT("DeinterlaceBlend({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, line_skip); + + const u32 this_buffer = m_current_deinterlace_buffer; + m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % NUM_BLEND_BUFFERS; + GL_INS_FMT("Current buffer: {}", this_buffer); + if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) || + !DeinterlaceSetTargetSize(width, height, false)) [[unlikely]] + { + ClearDisplayTexture(); + return false; + } + + // TODO: could be implemented with alpha blending instead.. + + g_gpu_device->InvalidateRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); + g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % NUM_BLEND_BUFFERS].get(), + g_gpu_device->GetNearestSampler()); + g_gpu_device->SetViewportAndScissor(0, 0, width, height); + g_gpu_device->Draw(3, 0); + + m_deinterlace_texture->MakeReadyForSampling(); + SetDisplayTexture(m_deinterlace_texture.get(), 0, 0, width, height); + return true; + } + + case DisplayDeinterlacingMode::Adaptive: + { + GL_SCOPE_FMT("DeinterlaceAdaptive({{{},{}}}, {}x{}, field={}, line_skip={})", x, y, width, height, field, + line_skip); + + const u32 full_height = height * 2; + const u32 this_buffer = m_current_deinterlace_buffer; + m_current_deinterlace_buffer = (m_current_deinterlace_buffer + 1u) % DEINTERLACE_BUFFER_COUNT; + GL_INS_FMT("Current buffer: {}", this_buffer); + if (!DeinterlaceExtractField(this_buffer, src, x, y, width, height, line_skip) || + !DeinterlaceSetTargetSize(width, full_height, false)) [[unlikely]] + { + ClearDisplayTexture(); + return false; + } + + g_gpu_device->SetRenderTarget(m_deinterlace_texture.get()); + g_gpu_device->SetPipeline(m_deinterlace_pipeline.get()); + g_gpu_device->SetTextureSampler(0, m_deinterlace_buffers[this_buffer].get(), g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(1, m_deinterlace_buffers[(this_buffer - 1) % DEINTERLACE_BUFFER_COUNT].get(), + g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(2, m_deinterlace_buffers[(this_buffer - 2) % DEINTERLACE_BUFFER_COUNT].get(), + g_gpu_device->GetNearestSampler()); + g_gpu_device->SetTextureSampler(3, m_deinterlace_buffers[(this_buffer - 3) % DEINTERLACE_BUFFER_COUNT].get(), + g_gpu_device->GetNearestSampler()); + const u32 uniforms[] = {field, full_height}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, full_height); + g_gpu_device->Draw(3, 0); + + m_deinterlace_texture->MakeReadyForSampling(); + SetDisplayTexture(m_deinterlace_texture.get(), 0, 0, width, full_height); + return true; + } + + default: + UnreachableCode(); + } +} + +bool GPU::DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 line_skip) +{ + if (!m_deinterlace_buffers[dst_bufidx] || m_deinterlace_buffers[dst_bufidx]->GetWidth() != width || + m_deinterlace_buffers[dst_bufidx]->GetHeight() != height) + { + if (!g_gpu_device->ResizeTexture(&m_deinterlace_buffers[dst_bufidx], width, height, GPUTexture::Type::RenderTarget, + GPUTexture::Format::RGBA8, false)) [[unlikely]] + { + return false; + } + + GL_OBJECT_NAME_FMT(m_deinterlace_buffers[dst_bufidx], "Blend Deinterlace Buffer {}", dst_bufidx); + } + + GPUTexture* dst = m_deinterlace_buffers[dst_bufidx].get(); + g_gpu_device->InvalidateRenderTarget(dst); + + // If we're not skipping lines, then we can simply copy the texture. + if (line_skip == 0 && src->GetFormat() == dst->GetFormat()) + { + GL_INS_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => copy direct", x, y, width, height, line_skip); + g_gpu_device->CopyTextureRegion(dst, 0, 0, 0, 0, src, x, y, 0, 0, width, height); + } + else + { + GL_SCOPE_FMT("DeinterlaceExtractField({{{},{}}} {}x{} line_skip={}) => shader copy", x, y, width, height, + line_skip); + + // Otherwise, we need to extract every other line from the texture. + src->MakeReadyForSampling(); + g_gpu_device->SetRenderTarget(dst); + g_gpu_device->SetPipeline(m_deinterlace_extract_pipeline.get()); + g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); + const u32 uniforms[] = {x, y, line_skip}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, height); + g_gpu_device->Draw(3, 0); + + GL_POP(); + } + + dst->MakeReadyForSampling(); + return true; +} + +bool GPU::DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve) +{ + if (!m_deinterlace_texture || m_deinterlace_texture->GetWidth() != width || + m_deinterlace_texture->GetHeight() != height) + { + if (!g_gpu_device->ResizeTexture(&m_deinterlace_texture, width, height, GPUTexture::Type::RenderTarget, + GPUTexture::Format::RGBA8, preserve)) [[unlikely]] + { + return false; + } + + GL_OBJECT_NAME(m_deinterlace_texture, "Deinterlace target texture"); + } + + return true; +} + +bool GPU::ApplyChromaSmoothing(GPUTexture* src, u32 x, u32 y, u32 width, u32 height) +{ + if (!m_chroma_smoothing_texture || m_chroma_smoothing_texture->GetWidth() != width || + m_chroma_smoothing_texture->GetHeight() != height) + { + if (!g_gpu_device->ResizeTexture(&m_chroma_smoothing_texture, width, height, GPUTexture::Type::RenderTarget, + GPUTexture::Format::RGBA8, false)) + { + ClearDisplayTexture(); + return false; + } + + GL_OBJECT_NAME(m_chroma_smoothing_texture, "Chroma smoothing texture"); + } + + GL_SCOPE_FMT("ApplyChromaSmoothing({{{},{}}}, {}x{})", x, y, width, height); + + src->MakeReadyForSampling(); + g_gpu_device->InvalidateRenderTarget(m_chroma_smoothing_texture.get()); + g_gpu_device->SetRenderTarget(m_chroma_smoothing_texture.get()); + g_gpu_device->SetPipeline(m_chroma_smoothing_pipeline.get()); + g_gpu_device->SetTextureSampler(0, src, g_gpu_device->GetNearestSampler()); + const u32 uniforms[] = {x, y, width - 1, height - 1}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetViewportAndScissor(0, 0, width, height); + g_gpu_device->Draw(3, 0); + + m_chroma_smoothing_texture->MakeReadyForSampling(); + SetDisplayTexture(m_chroma_smoothing_texture.get(), 0, 0, width, height); + return true; +} + Common::Rectangle GPU::CalculateDrawRect(s32 window_width, s32 window_height, float* out_left_padding, float* out_top_padding, float* out_scale, float* out_x_scale, bool apply_aspect_ratio /* = true */) const diff --git a/src/core/gpu.h b/src/core/gpu.h index 390872d87..a4f516a5c 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -61,6 +61,7 @@ public: DOT_TIMER_INDEX = 0, HBLANK_TIMER_INDEX = 1, MAX_RESOLUTION_SCALE = 32, + DEINTERLACE_BUFFER_COUNT = 4, }; enum : u16 @@ -239,6 +240,7 @@ protected: bool remove_alpha); void SoftReset(); + void ClearDisplay(); // Sets dots per scanline void UpdateCRTCConfig(); @@ -313,7 +315,6 @@ protected: virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask); virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height); virtual void DispatchRenderCommand(); - virtual void ClearDisplay(); virtual void UpdateDisplay(); virtual void DrawRendererStats(); @@ -578,6 +579,12 @@ protected: bool RenderDisplay(GPUTexture* target, const Common::Rectangle& draw_rect, bool postfx); + bool Deinterlace(GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 field, u32 line_skip); + bool DeinterlaceExtractField(u32 dst_bufidx, GPUTexture* src, u32 x, u32 y, u32 width, u32 height, u32 line_skip); + bool DeinterlaceSetTargetSize(u32 width, u32 height, bool preserve); + void DestroyDeinterlaceTextures(); + bool ApplyChromaSmoothing(GPUTexture* src, u32 x, u32 y, u32 width, u32 height); + s32 m_display_width = 0; s32 m_display_height = 0; s32 m_display_active_left = 0; @@ -586,6 +593,15 @@ protected: s32 m_display_active_height = 0; float m_display_aspect_ratio = 1.0f; + u32 m_current_deinterlace_buffer = 0; + std::unique_ptr m_deinterlace_pipeline; + std::unique_ptr m_deinterlace_extract_pipeline; + std::array, DEINTERLACE_BUFFER_COUNT> m_deinterlace_buffers; + std::unique_ptr m_deinterlace_texture; + + std::unique_ptr m_chroma_smoothing_pipeline; + std::unique_ptr m_chroma_smoothing_texture; + std::unique_ptr m_display_pipeline; GPUTexture* m_display_texture = nullptr; s32 m_display_texture_view_x = 0; @@ -619,7 +635,7 @@ protected: Stats m_stats = {}; private: - bool CompileDisplayPipeline(); + bool CompileDisplayPipelines(bool display, bool deinterlace, bool chroma_smoothing); using GP0CommandHandler = bool (GPU::*)(); using GP0CommandHandlerTable = std::array; diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 737fbace3..e0e55d79c 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -203,7 +203,6 @@ bool GPU_HW::Initialize() m_line_detect_mode = (m_resolution_scale > 1) ? g_settings.gpu_line_detect_mode : GPULineDetectMode::Disabled; m_clamp_uvs = ShouldClampUVs(); m_compute_uv_range = m_clamp_uvs; - m_chroma_smoothing = g_settings.gpu_24bit_chroma_smoothing; m_downsample_mode = GetDownsampleMode(m_resolution_scale); m_wireframe_mode = g_settings.gpu_wireframe_mode; m_disable_color_perspective = features.noperspective_interpolation && ShouldDisableColorPerspective(); @@ -336,7 +335,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) m_true_color != g_settings.gpu_true_color || m_debanding != g_settings.gpu_debanding || m_per_sample_shading != per_sample_shading || m_scaled_dithering != g_settings.gpu_scaled_dithering || m_texture_filtering != g_settings.gpu_texture_filter || m_clamp_uvs != clamp_uvs || - m_chroma_smoothing != g_settings.gpu_24bit_chroma_smoothing || m_downsample_mode != downsample_mode || + m_downsample_mode != downsample_mode || (m_downsample_mode == GPUDownsampleMode::Box && g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale) || m_wireframe_mode != wireframe_mode || m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer() || @@ -389,7 +388,6 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) m_line_detect_mode = (m_resolution_scale > 1) ? g_settings.gpu_line_detect_mode : GPULineDetectMode::Disabled; m_clamp_uvs = clamp_uvs; m_compute_uv_range = m_clamp_uvs; - m_chroma_smoothing = g_settings.gpu_24bit_chroma_smoothing; m_downsample_mode = downsample_mode; m_wireframe_mode = wireframe_mode; m_disable_color_perspective = disable_color_perspective; @@ -701,10 +699,6 @@ void GPU_HW::ClearFramebuffer() g_gpu_device->ClearRenderTarget(m_vram_texture.get(), 0); g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f); ClearVRAMDirtyRectangle(); - - if (m_display_private_texture) - g_gpu_device->ClearRenderTarget(m_display_private_texture.get(), 0); - m_last_depth_z = 1.0f; } @@ -719,11 +713,11 @@ void GPU_HW::DestroyBuffers() m_vram_upload_buffer.reset(); m_vram_readback_download_texture.reset(); g_gpu_device->RecycleTexture(std::move(m_downsample_texture)); + g_gpu_device->RecycleTexture(std::move(m_vram_extract_texture)); g_gpu_device->RecycleTexture(std::move(m_vram_read_texture)); g_gpu_device->RecycleTexture(std::move(m_vram_depth_texture)); g_gpu_device->RecycleTexture(std::move(m_vram_texture)); g_gpu_device->RecycleTexture(std::move(m_vram_readback_texture)); - g_gpu_device->RecycleTexture(std::move(m_display_private_texture)); } bool GPU_HW::CompilePipelines() @@ -1124,22 +1118,17 @@ bool GPU_HW::CompilePipelines() { for (u8 depth_24 = 0; depth_24 < 2; depth_24++) { - for (u8 interlace_mode = 0; interlace_mode < 3; interlace_mode++) - { - std::unique_ptr fs = g_gpu_device->CreateShader( - GPUShaderStage::Fragment, - shadergen.GenerateDisplayFragmentShader( - ConvertToBoolUnchecked(depth_24), static_cast(interlace_mode), m_chroma_smoothing)); - if (!fs) - return false; + std::unique_ptr fs = g_gpu_device->CreateShader( + GPUShaderStage::Fragment, shadergen.GenerateVRAMExtractFragmentShader(ConvertToBoolUnchecked(depth_24))); + if (!fs) + return false; - plconfig.fragment_shader = fs.get(); + plconfig.fragment_shader = fs.get(); - if (!(m_display_pipelines[depth_24][interlace_mode] = g_gpu_device->CreatePipeline(plconfig))) - return false; + if (!(m_vram_extract_pipeline[depth_24] = g_gpu_device->CreatePipeline(plconfig))) + return false; - progress.Increment(); - } + progress.Increment(); } } @@ -1242,6 +1231,9 @@ void GPU_HW::DestroyPipelines() for (std::unique_ptr& p : m_vram_copy_pipelines) destroy(p); + for (std::unique_ptr& p : m_vram_extract_pipeline) + destroy(p); + destroy(m_vram_readback_pipeline); destroy(m_vram_update_depth_pipeline); destroy(m_vram_write_replacement_pipeline); @@ -1251,8 +1243,6 @@ void GPU_HW::DestroyPipelines() destroy(m_downsample_blur_pass_pipeline); destroy(m_downsample_composite_pass_pipeline); m_downsample_composite_sampler.reset(); - - m_display_pipelines.enumerate(destroy); } GPU_HW::BatchRenderMode GPU_HW::BatchConfig::GetRenderMode() const @@ -1407,14 +1397,6 @@ ALWAYS_INLINE_RELEASE void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex); } -void GPU_HW::ClearDisplay() -{ - ClearDisplayTexture(); - - if (m_display_private_texture) - g_gpu_device->ClearRenderTarget(m_display_private_texture.get(), 0xFF000000u); -} - ALWAYS_INLINE_RELEASE void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices) { // Taken from beetle-psx gpu_polygon.cpp @@ -2417,19 +2399,6 @@ ALWAYS_INLINE bool GPU_HW::IsFlushed() const return (m_batch_index_count == 0); } -GPU_HW::InterlacedRenderMode GPU_HW::GetInterlacedRenderMode() const -{ - if (IsInterlacedDisplayEnabled()) - { - return m_GPUSTAT.vertical_resolution ? InterlacedRenderMode::InterleavedFields : - InterlacedRenderMode::SeparateFields; - } - else - { - return InterlacedRenderMode::None; - } -} - ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsTwoPassRendering() const { // We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled @@ -3104,6 +3073,8 @@ void GPU_HW::UpdateDisplay() { FlushRender(); + GL_SCOPE("UpdateDisplay()"); + if (g_settings.debugging.show_vram) { if (IsUsingMultisampling()) @@ -3119,89 +3090,106 @@ void GPU_HW::UpdateDisplay() SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); + return; + } + + SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, m_crtc_state.display_origin_left, + m_crtc_state.display_origin_top, m_crtc_state.display_vram_width, + m_crtc_state.display_vram_height, ComputeDisplayAspectRatio()); + + const bool interlaced = IsInterlacedDisplayEnabled(); + const u32 interlaced_field = GetInterlacedDisplayField(); + const u32 resolution_scale = m_GPUSTAT.display_area_color_depth_24 ? 1 : m_resolution_scale; + const u32 scaled_vram_offset_x = m_crtc_state.display_vram_left * resolution_scale; + const u32 scaled_vram_offset_y = (m_crtc_state.display_vram_top * resolution_scale) + + ((interlaced && m_GPUSTAT.vertical_resolution) ? interlaced_field : 0); + const u32 scaled_display_width = m_crtc_state.display_vram_width * resolution_scale; + const u32 scaled_display_height = m_crtc_state.display_vram_height * resolution_scale; + const u32 read_height = interlaced ? (scaled_display_height / 2u) : scaled_display_height; + const u32 line_skip = m_GPUSTAT.vertical_resolution; + bool drew_anything = false; + + if (IsDisplayDisabled()) + { + ClearDisplayTexture(); + return; + } + else if (!m_GPUSTAT.display_area_color_depth_24 && !IsUsingMultisampling() && + (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture->GetWidth() && + (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture->GetHeight()) + { + // Fast path if no copies are needed. + if (interlaced) + { + GL_INS("Deinterlace fast path"); + drew_anything = true; + Deinterlace(m_vram_texture.get(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width, read_height, + interlaced_field, line_skip); + } + else + { + GL_INS("Direct display"); + SetDisplayTexture(m_vram_texture.get(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width, + scaled_display_height); + } } else { - // TODO: use a dynamically sized texture - SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, m_crtc_state.display_origin_left, - m_crtc_state.display_origin_top, m_crtc_state.display_vram_width, - m_crtc_state.display_vram_height, ComputeDisplayAspectRatio()); - - const u32 resolution_scale = m_GPUSTAT.display_area_color_depth_24 ? 1 : m_resolution_scale; - const u32 vram_offset_x = m_crtc_state.display_vram_left; - const u32 vram_offset_y = m_crtc_state.display_vram_top; - const u32 scaled_vram_offset_x = vram_offset_x * resolution_scale; - const u32 scaled_vram_offset_y = vram_offset_y * resolution_scale; - const u32 display_width = m_crtc_state.display_vram_width; - const u32 display_height = m_crtc_state.display_vram_height; - const u32 scaled_display_width = display_width * resolution_scale; - const u32 scaled_display_height = display_height * resolution_scale; - const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); - - if (IsDisplayDisabled()) + if (!m_vram_extract_texture || m_vram_extract_texture->GetWidth() != scaled_display_width || + m_vram_extract_texture->GetHeight() != read_height) { - ClearDisplayTexture(); - } - else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None && - !IsUsingMultisampling() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture->GetWidth() && - (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture->GetHeight()) - { - - if (IsUsingDownsampling()) + if (!g_gpu_device->ResizeTexture(&m_vram_extract_texture, scaled_display_width, read_height, + GPUTexture::Type::RenderTarget, GPUTexture::Format::RGBA8)) [[unlikely]] { - DownsampleFramebuffer(m_vram_texture.get(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width, - scaled_display_height); + ClearDisplayTexture(); + return; } - else + } + + g_gpu_device->InvalidateRenderTarget(m_vram_extract_texture.get()); + g_gpu_device->SetRenderTarget(m_vram_extract_texture.get()); + g_gpu_device->SetPipeline(m_vram_extract_pipeline[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)].get()); + g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); + + const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale; + const u32 skip_x = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale; + GL_INS_FMT("Convert 16bpp to 24bpp, skip_x = {}, line_skip = {}", skip_x, line_skip); + + const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y, skip_x, line_skip}; + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + + g_gpu_device->SetViewportAndScissor(0, 0, scaled_display_width, read_height); + g_gpu_device->Draw(3, 0); + + m_vram_extract_texture->MakeReadyForSampling(); + drew_anything = true; + + if (g_settings.gpu_24bit_chroma_smoothing) + { + if (ApplyChromaSmoothing(m_vram_extract_texture.get(), 0, 0, scaled_display_width, read_height)) { - SetDisplayTexture(m_vram_texture.get(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width, - scaled_display_height); + if (interlaced) + Deinterlace(m_display_texture, 0, 0, scaled_display_width, read_height, interlaced_field, 0); } } else { - if (!m_display_private_texture || m_display_private_texture->GetWidth() != scaled_display_width || - m_display_private_texture->GetHeight() != scaled_display_height) - { - g_gpu_device->RecycleTexture(std::move(m_display_private_texture)); - if (!(m_display_private_texture = g_gpu_device->FetchTexture( - scaled_display_width, scaled_display_height, 1, 1, 1, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT))) - { - Log_ErrorFmt("Failed to create {}x{} display texture", scaled_display_width, scaled_display_height); - ClearDisplayTexture(); - return; - } - - GL_OBJECT_NAME(m_display_private_texture, "Display Texture"); - } - - // TODO: discard vs load for interlaced - if (interlaced == InterlacedRenderMode::None) - g_gpu_device->InvalidateRenderTarget(m_display_private_texture.get()); - - g_gpu_device->SetRenderTarget(m_display_private_texture.get()); - g_gpu_device->SetPipeline( - m_display_pipelines[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast(interlaced)].get()); - g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); - - const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0; - const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale; - const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale; - const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset, - reinterpret_crop_left, reinterpret_field_offset}; - g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); - - g_gpu_device->SetViewportAndScissor(0, 0, scaled_display_width, scaled_display_height); - g_gpu_device->Draw(3, 0); - - if (IsUsingDownsampling()) - DownsampleFramebuffer(m_display_private_texture.get(), 0, 0, scaled_display_width, scaled_display_height); + if (interlaced) + Deinterlace(m_vram_extract_texture.get(), 0, 0, scaled_display_width, read_height, interlaced_field, 0); else - SetDisplayTexture(m_display_private_texture.get(), 0, 0, scaled_display_width, scaled_display_height); - - RestoreDeviceContext(); + SetDisplayTexture(m_vram_extract_texture.get(), 0, 0, scaled_display_width, read_height); } } + + if (m_downsample_mode != GPUDownsampleMode::Disabled) + { + DebugAssert(m_display_texture); + DownsampleFramebuffer(m_display_texture, m_display_texture_view_x, m_display_texture_view_y, + m_display_texture_view_width, m_display_texture_view_height); + } + + if (drew_anything) + RestoreDeviceContext(); } void GPU_HW::DownsampleFramebuffer(GPUTexture* source, u32 left, u32 top, u32 width, u32 height) diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 02702b557..35a0aa484 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -32,13 +32,6 @@ public: OnlyTransparent }; - enum class InterlacedRenderMode : u8 - { - None, - InterleavedFields, - SeparateFields - }; - GPU_HW(); ~GPU_HW() override; @@ -56,7 +49,6 @@ public: std::tuple GetEffectiveDisplayResolution(bool scaled = true) override final; std::tuple GetFullDisplayResolution(bool scaled = true) override final; - void ClearDisplay() override; void UpdateDisplay() override; private: @@ -162,9 +154,6 @@ private: /// Returns the value to be written to the depth buffer for the current operation for mask bit emulation. float GetCurrentNormalizedVertexDepth() const; - /// Returns the interlaced mode to use when scanning out/displaying. - InterlacedRenderMode GetInterlacedRenderMode() const; - /// Returns if the draw needs to be broken into opaque/transparent passes. bool NeedsTwoPassRendering() const; @@ -212,7 +201,6 @@ private: std::unique_ptr m_vram_readback_texture; std::unique_ptr m_vram_readback_download_texture; std::unique_ptr m_vram_replacement_texture; - std::unique_ptr m_display_private_texture; // TODO: Move to base. std::unique_ptr m_vram_upload_buffer; std::unique_ptr m_vram_write_texture; @@ -237,7 +225,6 @@ private: bool m_supports_framebuffer_fetch : 1 = false; bool m_per_sample_shading : 1 = false; bool m_scaled_dithering : 1 = false; - bool m_chroma_smoothing : 1 = false; bool m_disable_color_perspective : 1 = false; GPUTextureFilter m_texture_filtering = GPUTextureFilter::Nearest; @@ -275,12 +262,11 @@ private: std::unique_ptr m_vram_readback_pipeline; std::unique_ptr m_vram_update_depth_pipeline; - - // [depth_24][interlace_mode] - DimensionalArray, 3, 2> m_display_pipelines{}; - std::unique_ptr m_vram_write_replacement_pipeline; + std::array, 2> m_vram_extract_pipeline; // [24bit] + std::unique_ptr m_vram_extract_texture; + std::unique_ptr m_downsample_texture; std::unique_ptr m_downsample_first_pass_pipeline; std::unique_ptr m_downsample_mid_pass_pipeline; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 994f23784..0e7feda18 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1019,36 +1019,18 @@ float3 ApplyDebanding(float2 frag_coord) return ss.str(); } -std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, - GPU_HW::InterlacedRenderMode interlace_mode, - bool smooth_chroma) +std::string GPU_HW_ShaderGen::GenerateVRAMExtractFragmentShader(bool depth_24bit) { std::stringstream ss; WriteHeader(ss); DefineMacro(ss, "DEPTH_24BIT", depth_24bit); - DefineMacro(ss, "INTERLACED", interlace_mode != GPU_HW::InterlacedRenderMode::None); - DefineMacro(ss, "INTERLEAVED", interlace_mode == GPU_HW::InterlacedRenderMode::InterleavedFields); - DefineMacro(ss, "SMOOTH_CHROMA", smooth_chroma); + DefineMacro(ss, "MULTISAMPLED", UsingMSAA()); WriteCommonFunctions(ss); - DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_crop_left", "uint u_field_offset"}, true); + DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_skip_x", "uint u_line_skip"}, true); DeclareTexture(ss, "samp0", 0, UsingMSAA()); ss << R"( -float3 RGBToYUV(float3 rgb) -{ - return float3(dot(rgb.rgb, float3(0.299f, 0.587f, 0.114f)), - dot(rgb.rgb, float3(-0.14713f, -0.28886f, 0.436f)), - dot(rgb.rgb, float3(0.615f, -0.51499f, -0.10001f))); -} - -float3 YUVToRGB(float3 yuv) -{ - return float3(dot(yuv, float3(1.0f, 0.0f, 1.13983f)), - dot(yuv, float3(1.0f, -0.39465f, -0.58060f)), - dot(yuv, float3(1.0f, 2.03211f, 0.0f))); -} - float4 LoadVRAM(int2 coords) { #if MULTISAMPLING @@ -1079,61 +1061,15 @@ float3 SampleVRAM24(uint2 icoords) return float3(float(s1s0 & 0xFFu) / 255.0, float((s1s0 >> 8u) & 0xFFu) / 255.0, float((s1s0 >> 16u) & 0xFFu) / 255.0); } - -float3 SampleVRAMAverage2x2(uint2 icoords) -{ - float3 value = SampleVRAM24(icoords); - value += SampleVRAM24(icoords + uint2(0, 1)); - value += SampleVRAM24(icoords + uint2(1, 0)); - value += SampleVRAM24(icoords + uint2(1, 1)); - return value * 0.25; -} - -float3 SampleVRAM24Smoothed(uint2 icoords) -{ - int2 base = int2(icoords) - 1; - uint2 low = uint2(max(base & ~1, int2(0, 0))); - uint2 high = low + 2u; - float2 coeff = vec2(base & 1) * 0.5 + 0.25; - - float3 p = SampleVRAM24(icoords); - float3 p00 = SampleVRAMAverage2x2(low); - float3 p01 = SampleVRAMAverage2x2(uint2(low.x, high.y)); - float3 p10 = SampleVRAMAverage2x2(uint2(high.x, low.y)); - float3 p11 = SampleVRAMAverage2x2(high); - - float3 s = lerp(lerp(p00, p10, coeff.x), - lerp(p01, p11, coeff.x), - coeff.y); - - float y = RGBToYUV(p).x; - float2 uv = RGBToYUV(s).yz; - return YUVToRGB(float3(y, uv)); -} )"; DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1); ss << R"( { - uint2 icoords = uint2(v_pos.xy) + uint2(u_crop_left, 0u); - - #if INTERLACED - if ((icoords.y & 1u) != u_field_offset) - discard; - - #if !INTERLEAVED - icoords.y /= 2u; - #else - icoords.y &= ~1u; - #endif - #endif + uint2 icoords = uint2(uint(v_pos.x) + u_skip_x, uint(v_pos.y) << u_line_skip); #if DEPTH_24BIT - #if SMOOTH_CHROMA - o_col0 = float4(SampleVRAM24Smoothed(icoords), 1.0); - #else - o_col0 = float4(SampleVRAM24(icoords), 1.0); - #endif + o_col0 = float4(SampleVRAM24(icoords), 1.0); #else o_col0 = float4(LoadVRAM(int2((icoords + u_vram_offset) % VRAM_SIZE)).rgb, 1.0); #endif diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index 938e6e142..f281d8e51 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -17,8 +17,6 @@ public: std::string GenerateBatchVertexShader(bool textured); std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, GPUTransparencyMode transparency, GPUTextureMode texture_mode, bool dithering, bool interlacing); - std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode, - bool smooth_chroma); std::string GenerateWireframeGeometryShader(); std::string GenerateWireframeFragmentShader(); std::string GenerateVRAMReadFragmentShader(); @@ -26,6 +24,7 @@ public: std::string GenerateVRAMCopyFragmentShader(); std::string GenerateVRAMFillFragmentShader(bool wrapped, bool interlaced); std::string GenerateVRAMUpdateDepthFragmentShader(); + std::string GenerateVRAMExtractFragmentShader(bool depth_24bit); std::string GenerateAdaptiveDownsampleVertexShader(); std::string GenerateAdaptiveDownsampleMipFragmentShader(bool first_pass); diff --git a/src/core/gpu_shadergen.cpp b/src/core/gpu_shadergen.cpp index 76583063b..473293376 100644 --- a/src/core/gpu_shadergen.cpp +++ b/src/core/gpu_shadergen.cpp @@ -83,3 +83,181 @@ std::string GPUShaderGen::GenerateDisplaySharpBilinearFragmentShader() return ss.str(); } + +std::string GPUShaderGen::GenerateInterleavedFieldExtractFragmentShader() +{ + std::stringstream ss; + WriteHeader(ss); + DeclareUniformBuffer(ss, {"uint2 u_src_offset", "uint u_line_skip"}, true); + DeclareTexture(ss, "samp0", 0, false); + + DeclareFragmentEntryPoint(ss, 0, 1, {}, true); + ss << R"( +{ + uint2 tcoord = u_src_offset + uint2(uint(v_pos.x), uint(v_pos.y) << u_line_skip); + o_col0 = LOAD_TEXTURE(samp0, int2(tcoord), 0); +} +)"; + + return ss.str(); +} + +std::string GPUShaderGen::GenerateDeinterlaceWeaveFragmentShader() +{ + std::stringstream ss; + WriteHeader(ss); + DeclareUniformBuffer(ss, {"uint2 u_src_offset", "uint u_render_field", "uint u_line_skip"}, true); + DeclareTexture(ss, "samp0", 0, false); + + DeclareFragmentEntryPoint(ss, 0, 1, {}, true); + ss << R"( +{ + uint2 fcoord = uint2(v_pos.xy); + if ((fcoord.y & 1) != u_render_field) + discard; + + uint2 tcoord = u_src_offset + uint2(fcoord.x, (fcoord.y / 2u) << u_line_skip); + o_col0 = LOAD_TEXTURE(samp0, int2(tcoord), 0); +})"; + + return ss.str(); +} + +std::string GPUShaderGen::GenerateDeinterlaceBlendFragmentShader() +{ + std::stringstream ss; + WriteHeader(ss); + DeclareTexture(ss, "samp0", 0, false); + DeclareTexture(ss, "samp1", 1, false); + + DeclareFragmentEntryPoint(ss, 0, 1, {}, true); + ss << R"( +{ + uint2 uv = uint2(v_pos.xy); + float4 c0 = LOAD_TEXTURE(samp0, int2(uv), 0); + float4 c1 = LOAD_TEXTURE(samp1, int2(uv), 0); + o_col0 = (c0 + c1) * 0.5f; +} +)"; + + return ss.str(); +} + +std::string GPUShaderGen::GenerateFastMADReconstructFragmentShader() +{ + std::stringstream ss; + WriteHeader(ss); + DeclareUniformBuffer(ss, {"uint u_current_field", "uint u_height"}, true); + DeclareTexture(ss, "samp0", 0, false); + DeclareTexture(ss, "samp1", 1, false); + DeclareTexture(ss, "samp2", 2, false); + DeclareTexture(ss, "samp3", 3, false); + + ss << R"( +CONSTANT float3 SENSITIVITY = float3(0.08f, 0.08f, 0.08f); +)"; + + DeclareFragmentEntryPoint(ss, 0, 1, {}, true); + ss << R"( +{ + int2 uv = int2(int(v_pos.x), int(v_pos.y) >> 1); + float3 cur = LOAD_TEXTURE(samp0, uv, 0).rgb; + + float3 hn = LOAD_TEXTURE(samp0, uv + int2(0, -1), 0).rgb; + float3 cn = LOAD_TEXTURE(samp1, uv, 0).rgb; + float3 ln = LOAD_TEXTURE(samp0, uv + int2(0, 1), 0).rgb; + + float3 ho = LOAD_TEXTURE(samp2, uv + int2(0, -1), 0).rgb; + float3 co = LOAD_TEXTURE(samp3, uv, 0).rgb; + float3 lo = LOAD_TEXTURE(samp2, uv + int2(0, 1), 0).rgb; + + float3 mh = abs(hn.rgb - ho.rgb) - SENSITIVITY; + float3 mc = abs(cn.rgb - co.rgb) - SENSITIVITY; + float3 ml = abs(ln.rgb - lo.rgb) - SENSITIVITY; + float3 mmaxv = max(mh, max(mc, ml)); + float mmax = max(mmaxv.r, max(mmaxv.g, mmaxv.b)); + + // Is pixel F [n][ x , y ] present in the Current Field f [n] ? + uint row = uint(v_pos.y); + if ((row & 1u) == u_current_field) + { + // Directly uses the pixel from the Current Field + o_col0.rgb = cur; + } + else if (row > 0 && row < u_height && mmax > 0.0f) + { + // Reconstructs the missing pixel as the average of the same pixel from the line above and the + // line below it in the Current Field. + o_col0.rgb = (hn + ln) / 2.0; + } + else + { + // Reconstructs the missing pixel as the same pixel from the Previous Field. + o_col0.rgb = cn; + } + o_col0.a = 1.0f; +} +)"; + + return ss.str(); +} + +std::string GPUShaderGen::GenerateChromaSmoothingFragmentShader() +{ + std::stringstream ss; + WriteHeader(ss); + DeclareUniformBuffer(ss, {"uint2 u_sample_offset", "uint2 u_clamp_size"}, true); + DeclareTexture(ss, "samp0", 0); + + ss << R"( +float3 RGBToYUV(float3 rgb) +{ + return float3(dot(rgb.rgb, float3(0.299f, 0.587f, 0.114f)), + dot(rgb.rgb, float3(-0.14713f, -0.28886f, 0.436f)), + dot(rgb.rgb, float3(0.615f, -0.51499f, -0.10001f))); +} + +float3 YUVToRGB(float3 yuv) +{ + return float3(dot(yuv, float3(1.0f, 0.0f, 1.13983f)), + dot(yuv, float3(1.0f, -0.39465f, -0.58060f)), + dot(yuv, float3(1.0f, 2.03211f, 0.0f))); +} + +float3 SampleVRAMAverage2x2(uint2 icoords) +{ + float3 value = LOAD_TEXTURE(samp0, icoords, 0).rgb; + value += LOAD_TEXTURE(samp0, icoords + uint2(0, 1), 0).rgb; + value += LOAD_TEXTURE(samp0, icoords + uint2(1, 0), 0).rgb; + value += LOAD_TEXTURE(samp0, icoords + uint2(1, 1), 0).rgb; + return value * 0.25; +} +)"; + + DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1); + ss << R"( +{ + uint2 icoords = uint2(v_pos.xy) + u_sample_offset; + int2 base = int2(icoords) - 1; + uint2 low = uint2(max(base & ~1, int2(0, 0))); + uint2 high = min(low + 2u, u_clamp_size); + float2 coeff = vec2(base & 1) * 0.5 + 0.25; + + float3 p = LOAD_TEXTURE(samp0, icoords, 0); + float3 p00 = SampleVRAMAverage2x2(low); + float3 p01 = SampleVRAMAverage2x2(uint2(low.x, high.y)); + float3 p10 = SampleVRAMAverage2x2(uint2(high.x, low.y)); + float3 p11 = SampleVRAMAverage2x2(high); + + float3 s = lerp(lerp(p00, p10, coeff.x), + lerp(p01, p11, coeff.x), + coeff.y); + + float y = RGBToYUV(p).x; + float2 uv = RGBToYUV(s).yz; + o_col0 = float4(YUVToRGB(float3(y, uv)), 1.0); +} +)"; + + return ss.str(); +} diff --git a/src/core/gpu_shadergen.h b/src/core/gpu_shadergen.h index 8a4d0cac7..171bc0a96 100644 --- a/src/core/gpu_shadergen.h +++ b/src/core/gpu_shadergen.h @@ -15,6 +15,13 @@ public: std::string GenerateDisplayFragmentShader(bool clamp_uv); std::string GenerateDisplaySharpBilinearFragmentShader(); + std::string GenerateInterleavedFieldExtractFragmentShader(); + std::string GenerateDeinterlaceWeaveFragmentShader(); + std::string GenerateDeinterlaceBlendFragmentShader(); + std::string GenerateFastMADReconstructFragmentShader(); + + std::string GenerateChromaSmoothingFragmentShader(); + private: void WriteDisplayUniformBuffer(std::stringstream& ss); }; diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index ee61bc1e2..052798981 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -28,7 +28,7 @@ GPU_SW::GPU_SW() = default; GPU_SW::~GPU_SW() { - g_gpu_device->RecycleTexture(std::move(m_private_display_texture)); + g_gpu_device->RecycleTexture(std::move(m_upload_texture)); m_backend.Shutdown(); } @@ -92,18 +92,18 @@ void GPU_SW::UpdateSettings(const Settings& old_settings) GPUTexture* GPU_SW::GetDisplayTexture(u32 width, u32 height, GPUTexture::Format format) { - if (!m_private_display_texture || m_private_display_texture->GetWidth() != width || - m_private_display_texture->GetHeight() != height || m_private_display_texture->GetFormat() != format) + if (!m_upload_texture || m_upload_texture->GetWidth() != width || m_upload_texture->GetHeight() != height || + m_upload_texture->GetFormat() != format) { ClearDisplayTexture(); - g_gpu_device->RecycleTexture(std::move(m_private_display_texture)); - m_private_display_texture = + g_gpu_device->RecycleTexture(std::move(m_upload_texture)); + m_upload_texture = g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::DynamicTexture, format, nullptr, 0); - if (!m_private_display_texture) + if (!m_upload_texture) Log_ErrorPrintf("Failed to create %ux%u %u texture", width, height, static_cast(format)); } - return m_private_display_texture.get(); + return m_upload_texture.get(); } template @@ -240,35 +240,26 @@ ALWAYS_INLINE void CopyOutRow16(const u16* src_p } template -void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field, bool interlaced, bool interleaved) +ALWAYS_INLINE_RELEASE bool GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 line_skip) { using OutputPixelType = std::conditional_t; GPUTexture* texture = GetDisplayTexture(width, height, display_format); - if (!texture) - return; + if (!texture) [[unlikely]] + return false; - u32 dst_stride = GPU_MAX_DISPLAY_WIDTH * sizeof(OutputPixelType); - u8* dst_ptr = m_display_texture_buffer.data() + (interlaced ? (field != 0 ? dst_stride : 0) : 0); - - const bool mapped = - (!interlaced && texture->Map(reinterpret_cast(&dst_ptr), &dst_stride, 0, 0, width, height)); - - const u32 output_stride = dst_stride; - const u8 interlaced_shift = BoolToUInt8(interlaced); - const u8 interleaved_shift = BoolToUInt8(interleaved); + u32 dst_stride = width * sizeof(OutputPixelType); + u8* dst_ptr = m_upload_buffer.data(); + const bool mapped = texture->Map(reinterpret_cast(&dst_ptr), &dst_stride, 0, 0, width, height); // Fast path when not wrapping around. if ((src_x + width) <= VRAM_WIDTH && (src_y + height) <= VRAM_HEIGHT) { - const u32 rows = height >> interlaced_shift; - dst_stride <<= interlaced_shift; - const u16* src_ptr = &g_vram[src_y * VRAM_WIDTH + src_x]; - const u32 src_step = VRAM_WIDTH << interleaved_shift; - for (u32 row = 0; row < rows; row++) + const u32 src_step = VRAM_WIDTH << line_skip; + for (u32 row = 0; row < height; row++) { CopyOutRow16(src_ptr, reinterpret_cast(dst_ptr), width); src_ptr += src_step; @@ -277,11 +268,9 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field } else { - const u32 rows = height >> interlaced_shift; - dst_stride <<= interlaced_shift; - const u32 end_x = src_x + width; - for (u32 row = 0; row < rows; row++) + const u32 y_step = (1 << line_skip); + for (u32 row = 0; row < height; row++) { const u16* src_row_ptr = &g_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH]; OutputPixelType* dst_row_ptr = reinterpret_cast(dst_ptr); @@ -289,7 +278,7 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field for (u32 col = src_x; col < end_x; col++) *(dst_row_ptr++) = VRAM16ToOutput(src_row_ptr[col % VRAM_WIDTH]); - src_y += (1 << interleaved_shift); + src_y += y_step; dst_ptr += dst_stride; } } @@ -297,61 +286,31 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field if (mapped) texture->Unmap(); else - texture->Update(0, 0, width, height, m_display_texture_buffer.data(), output_stride); + texture->Update(0, 0, width, height, m_upload_buffer.data(), dst_stride); - SetDisplayTexture(texture, 0, 0, width, height); -} - -void GPU_SW::CopyOut15Bit(GPUTexture::Format display_format, u32 src_x, u32 src_y, u32 width, u32 height, u32 field, - bool interlaced, bool interleaved) -{ - switch (display_format) - { - case GPUTexture::Format::RGBA5551: - CopyOut15Bit(src_x, src_y, width, height, field, interlaced, interleaved); - break; - case GPUTexture::Format::RGB565: - CopyOut15Bit(src_x, src_y, width, height, field, interlaced, interleaved); - break; - case GPUTexture::Format::RGBA8: - CopyOut15Bit(src_x, src_y, width, height, field, interlaced, interleaved); - break; - case GPUTexture::Format::BGRA8: - CopyOut15Bit(src_x, src_y, width, height, field, interlaced, interleaved); - break; - default: - break; - } + return true; } template -void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u32 field, bool interlaced, - bool interleaved) +ALWAYS_INLINE_RELEASE bool GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u32 line_skip) { using OutputPixelType = std::conditional_t; GPUTexture* texture = GetDisplayTexture(width, height, display_format); - if (!texture) - return; + if (!texture) [[unlikely]] + return false; u32 dst_stride = Common::AlignUpPow2(width * sizeof(OutputPixelType), 4); - u8* dst_ptr = m_display_texture_buffer.data() + (interlaced ? (field != 0 ? dst_stride : 0) : 0); - const bool mapped = - (!interlaced && texture->Map(reinterpret_cast(&dst_ptr), &dst_stride, 0, 0, width, height)); + u8* dst_ptr = m_upload_buffer.data(); + const bool mapped = texture->Map(reinterpret_cast(&dst_ptr), &dst_stride, 0, 0, width, height); - const u32 output_stride = dst_stride; - const u8 interlaced_shift = BoolToUInt8(interlaced); - const u8 interleaved_shift = BoolToUInt8(interleaved); - const u32 rows = height >> interlaced_shift; - dst_stride <<= interlaced_shift; - - if ((src_x + width) <= VRAM_WIDTH && (src_y + (rows << interleaved_shift)) <= VRAM_HEIGHT) + if ((src_x + width) <= VRAM_WIDTH && (src_y + (height << line_skip)) <= VRAM_HEIGHT) { const u8* src_ptr = reinterpret_cast(&g_vram[src_y * VRAM_WIDTH + src_x]) + (skip_x * 3); - const u32 src_stride = (VRAM_WIDTH << interleaved_shift) * sizeof(u16); - for (u32 row = 0; row < rows; row++) + const u32 src_stride = (VRAM_WIDTH << line_skip) * sizeof(u16); + for (u32 row = 0; row < height; row++) { if constexpr (display_format == GPUTexture::Format::RGBA8) { @@ -407,7 +366,9 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh } else { - for (u32 row = 0; row < rows; row++) + const u32 y_step = (1 << line_skip); + + for (u32 row = 0; row < height; row++) { const u16* src_row_ptr = &g_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH]; OutputPixelType* dst_row_ptr = reinterpret_cast(dst_ptr); @@ -438,7 +399,7 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh } } - src_y += (1 << interleaved_shift); + src_y += y_step; dst_ptr += dst_stride; } } @@ -446,36 +407,55 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh if (mapped) texture->Unmap(); else - texture->Update(0, 0, width, height, m_display_texture_buffer.data(), output_stride); + texture->Update(0, 0, width, height, m_upload_buffer.data(), dst_stride); - SetDisplayTexture(texture, 0, 0, width, height); + return true; } -void GPU_SW::CopyOut24Bit(GPUTexture::Format display_format, u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, - u32 field, bool interlaced, bool interleaved) +bool GPU_SW::CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u32 line_skip, bool is_24bit) { - switch (display_format) + if (!is_24bit) { - case GPUTexture::Format::RGBA5551: - CopyOut24Bit(src_x, src_y, skip_x, width, height, field, interlaced, interleaved); - break; - case GPUTexture::Format::RGB565: - CopyOut24Bit(src_x, src_y, skip_x, width, height, field, interlaced, interleaved); - break; - case GPUTexture::Format::RGBA8: - CopyOut24Bit(src_x, src_y, skip_x, width, height, field, interlaced, interleaved); - break; - case GPUTexture::Format::BGRA8: - CopyOut24Bit(src_x, src_y, skip_x, width, height, field, interlaced, interleaved); - break; - default: - break; - } -} + DebugAssert(skip_x == 0); -void GPU_SW::ClearDisplay() -{ - std::memset(m_display_texture_buffer.data(), 0, m_display_texture_buffer.size()); + switch (m_16bit_display_format) + { + case GPUTexture::Format::RGBA5551: + return CopyOut15Bit(src_x, src_y, width, height, line_skip); + + case GPUTexture::Format::RGB565: + return CopyOut15Bit(src_x, src_y, width, height, line_skip); + + case GPUTexture::Format::RGBA8: + return CopyOut15Bit(src_x, src_y, width, height, line_skip); + + case GPUTexture::Format::BGRA8: + return CopyOut15Bit(src_x, src_y, width, height, line_skip); + + default: + UnreachableCode(); + } + } + else + { + switch (m_24bit_display_format) + { + case GPUTexture::Format::RGBA5551: + return CopyOut24Bit(src_x, src_y, skip_x, width, height, line_skip); + + case GPUTexture::Format::RGB565: + return CopyOut24Bit(src_x, src_y, skip_x, width, height, line_skip); + + case GPUTexture::Format::RGBA8: + return CopyOut24Bit(src_x, src_y, skip_x, width, height, line_skip); + + case GPUTexture::Format::BGRA8: + return CopyOut24Bit(src_x, src_y, skip_x, width, height, line_skip); + + default: + UnreachableCode(); + } + } } void GPU_SW::UpdateDisplay() @@ -495,45 +475,49 @@ void GPU_SW::UpdateDisplay() return; } - const u32 vram_offset_y = m_crtc_state.display_vram_top; - const u32 display_width = m_crtc_state.display_vram_width; - const u32 display_height = m_crtc_state.display_vram_height; + const bool is_24bit = m_GPUSTAT.display_area_color_depth_24; + const bool interlaced = IsInterlacedDisplayEnabled(); + const u32 field = GetInterlacedDisplayField(); + const u32 vram_offset_x = is_24bit ? m_crtc_state.regs.X : m_crtc_state.display_vram_left; + const u32 vram_offset_y = + m_crtc_state.display_vram_top + ((interlaced && m_GPUSTAT.vertical_resolution) ? field : 0); + const u32 skip_x = is_24bit ? (m_crtc_state.display_vram_left - m_crtc_state.regs.X) : 0; + const u32 read_width = m_crtc_state.display_vram_width; + const u32 read_height = interlaced ? (m_crtc_state.display_vram_height / 2) : m_crtc_state.display_vram_height; if (IsInterlacedDisplayEnabled()) { - const u32 field = GetInterlacedDisplayField(); - if (m_GPUSTAT.display_area_color_depth_24) + const u32 line_skip = m_GPUSTAT.vertical_resolution; + if (CopyOut(vram_offset_x, vram_offset_y, skip_x, read_width, read_height, line_skip, is_24bit)) { - CopyOut24Bit(m_24bit_display_format, m_crtc_state.regs.X, vram_offset_y + field, - m_crtc_state.display_vram_left - m_crtc_state.regs.X, display_width, display_height, field, true, - m_GPUSTAT.vertical_resolution); - } - else - { - CopyOut15Bit(m_16bit_display_format, m_crtc_state.display_vram_left, vram_offset_y + field, display_width, - display_height, field, true, m_GPUSTAT.vertical_resolution); + if (is_24bit && g_settings.gpu_24bit_chroma_smoothing) + { + if (ApplyChromaSmoothing(m_upload_texture.get(), 0, 0, read_width, read_height)) + Deinterlace(m_display_texture, 0, 0, read_width, read_height, field, 0); + } + else + { + Deinterlace(m_upload_texture.get(), 0, 0, read_width, read_height, field, 0); + } } } else { - if (m_GPUSTAT.display_area_color_depth_24) + if (CopyOut(vram_offset_x, vram_offset_y, skip_x, read_width, read_height, 0, is_24bit)) { - CopyOut24Bit(m_24bit_display_format, m_crtc_state.regs.X, vram_offset_y, - m_crtc_state.display_vram_left - m_crtc_state.regs.X, display_width, display_height, 0, false, - false); - } - else - { - CopyOut15Bit(m_16bit_display_format, m_crtc_state.display_vram_left, vram_offset_y, display_width, - display_height, 0, false, false); + if (is_24bit && g_settings.gpu_24bit_chroma_smoothing) + ApplyChromaSmoothing(m_upload_texture.get(), 0, 0, read_width, read_height); + else + SetDisplayTexture(m_upload_texture.get(), 0, 0, read_width, read_height); } } } else { - CopyOut15Bit(m_16bit_display_format, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, 0, false, false); SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); + if (CopyOut(0, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, 0, false)) + SetDisplayTexture(m_upload_texture.get(), 0, 0, VRAM_WIDTH, VRAM_HEIGHT); } } diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index 70d13c0ec..483fc91a3 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -42,17 +42,13 @@ protected: void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; template - void CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field, bool interlaced, bool interleaved); - void CopyOut15Bit(GPUTexture::Format display_format, u32 src_x, u32 src_y, u32 width, u32 height, u32 field, - bool interlaced, bool interleaved); + bool CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 line_skip); template - void CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u32 field, bool interlaced, - bool interleaved); - void CopyOut24Bit(GPUTexture::Format display_format, u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, - u32 field, bool interlaced, bool interleaved); + bool CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u32 line_skip); + + bool CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u32 line_skip, bool is_24bit); - void ClearDisplay() override; void UpdateDisplay() override; void DispatchRenderCommand() override; @@ -62,10 +58,10 @@ protected: GPUTexture* GetDisplayTexture(u32 width, u32 height, GPUTexture::Format format); - FixedHeapArray m_display_texture_buffer; + FixedHeapArray m_upload_buffer; GPUTexture::Format m_16bit_display_format = GPUTexture::Format::RGB565; GPUTexture::Format m_24bit_display_format = GPUTexture::Format::RGBA8; - std::unique_ptr m_private_display_texture; // TODO: Move to base. + std::unique_ptr m_upload_texture; GPU_SW_Backend m_backend; }; diff --git a/src/core/settings.cpp b/src/core/settings.cpp index e693918c3..da1079f8d 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -224,6 +224,11 @@ void Settings::Load(SettingsInterface& si) gpu_pgxp_depth_buffer = si.GetBoolValue("GPU", "PGXPDepthBuffer", false); SetPGXPDepthClearThreshold(si.GetFloatValue("GPU", "PGXPDepthClearThreshold", DEFAULT_GPU_PGXP_DEPTH_THRESHOLD)); + display_deinterlacing_mode = + ParseDisplayDeinterlacingMode(si.GetStringValue("Display", "DeinterlacingMode", + GetDisplayDeinterlacingModeName(DEFAULT_DISPLAY_DEINTERLACING_MODE)) + .c_str()) + .value_or(DEFAULT_DISPLAY_DEINTERLACING_MODE); display_crop_mode = ParseDisplayCropMode( si.GetStringValue("Display", "CropMode", GetDisplayCropModeName(DEFAULT_DISPLAY_CROP_MODE)).c_str()) @@ -498,6 +503,7 @@ void Settings::Save(SettingsInterface& si) const si.SetBoolValue("GPU", "PGXPDepthBuffer", gpu_pgxp_depth_buffer); si.SetFloatValue("GPU", "PGXPDepthClearThreshold", GetPGXPDepthClearThreshold()); + si.SetStringValue("Display", "DeinterlacingMode", GetDisplayDeinterlacingModeName(display_deinterlacing_mode)); si.SetStringValue("Display", "CropMode", GetDisplayCropModeName(display_crop_mode)); si.SetIntValue("Display", "ActiveStartOffset", display_active_start_offset); si.SetIntValue("Display", "ActiveEndOffset", display_active_end_offset); @@ -1198,6 +1204,44 @@ const char* Settings::GetGPUWireframeModeDisplayName(GPUWireframeMode mode) return Host::TranslateToCString("GPUWireframeMode", s_wireframe_mode_display_names[static_cast(mode)]); } +static constexpr const std::array s_display_deinterlacing_mode_names = { + "Disabled", + "Weave", + "Blend", + "Adaptive", +}; +static constexpr const std::array s_display_deinterlacing_mode_display_names = { + TRANSLATE_NOOP("DisplayDeinterlacingMode", "Disabled (Flickering)"), + TRANSLATE_NOOP("DisplayDeinterlacingMode", "Weave (Combing)"), + TRANSLATE_NOOP("DisplayDeinterlacingMode", "Blend (Blur)"), + TRANSLATE_NOOP("DisplayDeinterlacingMode", "Adaptive (FastMAD)"), +}; + +std::optional Settings::ParseDisplayDeinterlacingMode(const char* str) +{ + int index = 0; + for (const char* name : s_display_deinterlacing_mode_names) + { + if (StringUtil::Strcasecmp(name, str) == 0) + return static_cast(index); + + index++; + } + + return std::nullopt; +} + +const char* Settings::GetDisplayDeinterlacingModeName(DisplayDeinterlacingMode mode) +{ + return s_display_deinterlacing_mode_names[static_cast(mode)]; +} + +const char* Settings::GetDisplayDeinterlacingModeDisplayName(DisplayDeinterlacingMode mode) +{ + return Host::TranslateToCString("DisplayDeinterlacingMode", + s_display_deinterlacing_mode_display_names[static_cast(mode)]); +} + static constexpr const std::array s_display_crop_mode_names = {"None", "Overscan", "Borders"}; static constexpr const std::array s_display_crop_mode_display_names = { TRANSLATE_NOOP("DisplayCropMode", "None"), TRANSLATE_NOOP("DisplayCropMode", "Only Overscan Area"), diff --git a/src/core/settings.h b/src/core/settings.h index 706096fe6..4e8342b32 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -129,6 +129,7 @@ struct Settings bool gpu_pgxp_cpu : 1 = false; bool gpu_pgxp_preserve_proj_fp : 1 = false; bool gpu_pgxp_depth_buffer : 1 = false; + DisplayDeinterlacingMode display_deinterlacing_mode = DEFAULT_DISPLAY_DEINTERLACING_MODE; DisplayCropMode display_crop_mode = DEFAULT_DISPLAY_CROP_MODE; DisplayAspectRatio display_aspect_ratio = DEFAULT_DISPLAY_ASPECT_RATIO; DisplayAlignment display_alignment = DEFAULT_DISPLAY_ALIGNMENT; @@ -394,6 +395,10 @@ struct Settings static const char* GetGPUWireframeModeName(GPUWireframeMode mode); static const char* GetGPUWireframeModeDisplayName(GPUWireframeMode mode); + static std::optional ParseDisplayDeinterlacingMode(const char* str); + static const char* GetDisplayDeinterlacingModeName(DisplayDeinterlacingMode mode); + static const char* GetDisplayDeinterlacingModeDisplayName(DisplayDeinterlacingMode mode); + static std::optional ParseDisplayCropMode(const char* str); static const char* GetDisplayCropModeName(DisplayCropMode crop_mode); static const char* GetDisplayCropModeDisplayName(DisplayCropMode crop_mode); @@ -483,6 +488,7 @@ struct Settings static constexpr AudioBackend DEFAULT_AUDIO_BACKEND = AudioBackend::Null; #endif + static constexpr DisplayDeinterlacingMode DEFAULT_DISPLAY_DEINTERLACING_MODE = DisplayDeinterlacingMode::Adaptive; static constexpr DisplayCropMode DEFAULT_DISPLAY_CROP_MODE = DisplayCropMode::Overscan; static constexpr DisplayAspectRatio DEFAULT_DISPLAY_ASPECT_RATIO = DisplayAspectRatio::Auto; static constexpr DisplayAlignment DEFAULT_DISPLAY_ALIGNMENT = DisplayAlignment::Center; diff --git a/src/core/system.cpp b/src/core/system.cpp index 88ab90480..2a2ec9893 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -3684,6 +3684,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings) g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale || g_settings.gpu_wireframe_mode != old_settings.gpu_wireframe_mode || + g_settings.display_deinterlacing_mode != old_settings.display_deinterlacing_mode || g_settings.display_crop_mode != old_settings.display_crop_mode || g_settings.display_aspect_ratio != old_settings.display_aspect_ratio || g_settings.display_alignment != old_settings.display_alignment || diff --git a/src/core/types.h b/src/core/types.h index 151506b96..57b45e744 100644 --- a/src/core/types.h +++ b/src/core/types.h @@ -77,6 +77,15 @@ enum class GPURenderer : u8 Count }; +enum class DisplayDeinterlacingMode : u8 +{ + Disabled, + Weave, + Blend, + Adaptive, + Count +}; + enum class GPUTextureFilter : u8 { Nearest, diff --git a/src/duckstation-qt/graphicssettingswidget.cpp b/src/duckstation-qt/graphicssettingswidget.cpp index 2706361d6..891842a88 100644 --- a/src/duckstation-qt/graphicssettingswidget.cpp +++ b/src/duckstation-qt/graphicssettingswidget.cpp @@ -66,6 +66,9 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.customAspectRatioDenominator, "Display", "CustomAspectRatioDenominator", 1); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.widescreenHack, "GPU", "WidescreenHack", false); + SettingWidgetBinder::BindWidgetToEnumSetting( + sif, m_ui.displayDeinterlacing, "Display", "DeinterlacingMode", &Settings::ParseDisplayDeinterlacingMode, + &Settings::GetDisplayDeinterlacingModeName, Settings::DEFAULT_DISPLAY_DEINTERLACING_MODE); SettingWidgetBinder::BindWidgetToEnumSetting(sif, m_ui.displayCropMode, "Display", "CropMode", &Settings::ParseDisplayCropMode, &Settings::GetDisplayCropModeName, Settings::DEFAULT_DISPLAY_CROP_MODE); @@ -251,7 +254,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* dialog->registerWidgetHelp( m_ui.gpuDownsampleMode, tr("Down-Sampling"), tr("Disabled"), tr("Downsamples the rendered image prior to displaying it. Can improve overall image quality in mixed 2D/3D games, " - "but should be disabled for pure 3D games. Only applies to the hardware renderers.")); + "but should be disabled for pure 3D games.")); dialog->registerWidgetHelp(m_ui.gpuDownsampleScale, tr("Down-Sampling Display Scale"), tr("1x"), tr("Selects the resolution scale that will be applied to the final image. 1x will " "downsample to the original console resolution.")); @@ -259,15 +262,21 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* m_ui.textureFiltering, tr("Texture Filtering"), QString::fromUtf8(Settings::GetTextureFilterDisplayName(Settings::DEFAULT_GPU_TEXTURE_FILTER)), tr("Smooths out the blockiness of magnified textures on 3D object by using filtering.
Will have a " - "greater effect on higher resolution scales. Only applies to the hardware renderers.
The JINC2 and " - "especially xBR filtering modes are very demanding, and may not be worth the speed penalty.")); + "greater effect on higher resolution scales.
The JINC2 and especially xBR filtering modes are very " + "demanding, and may not be worth the speed penalty.")); dialog->registerWidgetHelp( m_ui.displayAspectRatio, tr("Aspect Ratio"), QString::fromUtf8(Settings::GetDisplayAspectRatioDisplayName(Settings::DEFAULT_DISPLAY_ASPECT_RATIO)), tr("Changes the aspect ratio used to display the console's output to the screen. The default is Auto (Game Native) " "which automatically adjusts the aspect ratio to match how a game would be shown on a typical TV of the era.")); dialog->registerWidgetHelp( - m_ui.displayCropMode, tr("Crop Mode"), + m_ui.displayCropMode, tr("Deinterlacing"), + QString::fromUtf8(Settings::GetDisplayDeinterlacingModeName(Settings::DEFAULT_DISPLAY_DEINTERLACING_MODE)), + tr("Determines which algorithm is used to convert interlaced frames to progressive for display on your system. " + "Generally, the \"Disable Interlacing\" enhancement provides better quality output, but some games require " + "interlaced rendering.")); + dialog->registerWidgetHelp( + m_ui.displayCropMode, tr("Crop"), QString::fromUtf8(Settings::GetDisplayCropModeDisplayName(Settings::DEFAULT_DISPLAY_CROP_MODE)), tr("Determines how much of the area typically not visible on a consumer TV set to crop/hide. Some games display " "content in the overscan area, or use it for screen effects. May not display correctly with the \"All Borders\" " @@ -285,16 +294,15 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* "channel. This produces nicer looking gradients at the cost of making some colours look slightly different. " "Disabling the option also enables dithering, which makes the transition between colours less sharp by applying " "a pattern around those pixels. Most games are compatible with this option, but there is a number which aren't " - "and will have broken effects with it enabled. Only applies to the hardware renderers.")); + "and will have broken effects with it enabled.")); dialog->registerWidgetHelp( m_ui.widescreenHack, tr("Widescreen Rendering"), tr("Unchecked"), tr("Scales vertex positions in screen-space to a widescreen aspect ratio, essentially " "increasing the field of view from 4:3 to the chosen display aspect ratio in 3D games. May not be " "compatible with all games.")); - dialog->registerWidgetHelp( - m_ui.pgxpEnable, tr("PGXP Geometry Correction"), tr("Unchecked"), - tr("Reduces \"wobbly\" polygons and \"warping\" textures that are common in PS1 games.
Only " - "works with the hardware renderers. May not be compatible with all games.")); + dialog->registerWidgetHelp(m_ui.pgxpEnable, tr("PGXP Geometry Correction"), tr("Unchecked"), + tr("Reduces \"wobbly\" polygons and \"warping\" textures that are common in PS1 games. " + "May not be compatible with all games.")); dialog->registerWidgetHelp( m_ui.pgxpDepthBuffer, tr("PGXP Depth Buffer"), tr("Unchecked"), tr("Attempts to reduce polygon Z-fighting by testing pixels against the depth values from PGXP. Low compatibility, " @@ -303,8 +311,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* m_ui.force43For24Bit, tr("Force 4:3 For FMVs"), tr("Unchecked"), tr("Switches back to 4:3 display aspect ratio when displaying 24-bit content, usually FMVs.")); dialog->registerWidgetHelp(m_ui.chromaSmoothingFor24Bit, tr("FMV Chroma Smoothing"), tr("Unchecked"), - tr("Smooths out blockyness between colour transitions in 24-bit content, usually FMVs. " - "Only applies to the hardware renderers.")); + tr("Smooths out blockyness between colour transitions in 24-bit content, usually FMVs.")); dialog->registerWidgetHelp( m_ui.disableInterlacing, tr("Disable Interlacing"), tr("Checked"), tr( @@ -364,7 +371,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* dialog->registerWidgetHelp( m_ui.scaledDithering, tr("Scaled Dithering"), tr("Checked"), tr("Scales the dither pattern to the resolution scale of the emulated GPU. This makes the dither pattern much less " - "obvious at higher resolutions.
Usually safe to enable, and only supported by the hardware renderers.")); + "obvious at higher resolutions. Usually safe to enable.")); dialog->registerWidgetHelp( m_ui.useSoftwareRendererForReadbacks, tr("Software Renderer Readbacks"), tr("Unchecked"), tr("Runs the software renderer in parallel for VRAM readbacks. On some systems, this may result in greater " @@ -525,6 +532,12 @@ void GraphicsSettingsWidget::setupAdditionalUi() QString::fromUtf8(Settings::GetDisplayAspectRatioDisplayName(static_cast(i)))); } + for (u32 i = 0; i < static_cast(DisplayDeinterlacingMode::Count); i++) + { + m_ui.displayDeinterlacing->addItem( + QString::fromUtf8(Settings::GetDisplayDeinterlacingModeDisplayName(static_cast(i)))); + } + for (u32 i = 0; i < static_cast(DisplayCropMode::Count); i++) { m_ui.displayCropMode->addItem( @@ -641,7 +654,6 @@ void GraphicsSettingsWidget::updateRendererDependentOptions() m_ui.gpuDownsampleScale->setEnabled(is_hardware); m_ui.trueColor->setEnabled(is_hardware); m_ui.pgxpEnable->setEnabled(is_hardware); - m_ui.chromaSmoothingFor24Bit->setEnabled(is_hardware); m_ui.gpuLineDetectMode->setEnabled(is_hardware); m_ui.gpuLineDetectModeLabel->setEnabled(is_hardware); diff --git a/src/duckstation-qt/graphicssettingswidget.ui b/src/duckstation-qt/graphicssettingswidget.ui index e26f84d75..8df401b71 100644 --- a/src/duckstation-qt/graphicssettingswidget.ui +++ b/src/duckstation-qt/graphicssettingswidget.ui @@ -7,7 +7,7 @@ 0 0 584 - 434 + 450 @@ -260,37 +260,37 @@ - + Crop: - + - + Scaling: - + - + VSync: - + - + @@ -350,6 +350,16 @@ + + + + Deinterlacing: + + + + + + diff --git a/src/util/gpu_device.cpp b/src/util/gpu_device.cpp index 3b9668c36..187610214 100644 --- a/src/util/gpu_device.cpp +++ b/src/util/gpu_device.cpp @@ -959,6 +959,46 @@ void GPUDevice::SetDisplayMaxFPS(float max_fps) m_display_frame_interval = (max_fps > 0.0f) ? (1.0f / max_fps) : 0.0f; } +bool GPUDevice::ResizeTexture(std::unique_ptr* tex, u32 new_width, u32 new_height, GPUTexture::Type type, + GPUTexture::Format format, bool preserve /* = true */) +{ + GPUTexture* old_tex = tex->get(); + DebugAssert(!old_tex || (old_tex->GetLayers() == 1 && old_tex->GetLevels() == 1 && old_tex->GetSamples() == 1)); + std::unique_ptr new_tex = FetchTexture(new_width, new_height, 1, 1, 1, type, format); + if (!new_tex) [[unlikely]] + { + Log_ErrorFmt("Failed to create new {}x{} texture", new_width, new_height); + return false; + } + + if (old_tex) + { + if (old_tex->GetState() == GPUTexture::State::Cleared) + { + if (type == GPUTexture::Type::RenderTarget) + ClearRenderTarget(new_tex.get(), old_tex->GetClearColor()); + } + else if (old_tex->GetState() == GPUTexture::State::Dirty) + { + const u32 copy_width = std::min(new_width, old_tex->GetWidth()); + const u32 copy_height = std::min(new_height, old_tex->GetHeight()); + if (type == GPUTexture::Type::RenderTarget) + ClearRenderTarget(new_tex.get(), 0); + CopyTextureRegion(new_tex.get(), 0, 0, 0, 0, old_tex, 0, 0, 0, 0, copy_width, copy_height); + } + } + else if (preserve) + { + // If we're expecting data to be there, make sure to clear it. + if (type == GPUTexture::Type::RenderTarget) + ClearRenderTarget(new_tex.get(), 0); + } + + RecycleTexture(std::move(*tex)); + *tex = std::move(new_tex); + return true; +} + bool GPUDevice::ShouldSkipDisplayingFrame() { if (m_display_frame_interval == 0.0f) diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index 8074dea22..d77eb6a53 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -650,6 +650,8 @@ public: bool UsesLowerLeftOrigin() const; static Common::Rectangle FlipToLowerLeft(const Common::Rectangle& rc, s32 target_height); void SetDisplayMaxFPS(float max_fps); + bool ResizeTexture(std::unique_ptr* tex, u32 new_width, u32 new_height, GPUTexture::Type type, + GPUTexture::Format format, bool preserve = true); bool ShouldSkipDisplayingFrame(); void ThrottlePresentation();