From 25292d94fd623eb28de48ff574654e5e63ffb90b Mon Sep 17 00:00:00 2001 From: Stenzek Date: Mon, 22 Jul 2024 01:12:33 +1000 Subject: [PATCH] GPU/HW: Support using ROV for accurate blending --- src/core/game_database.cpp | 11 +- src/core/game_database.h | 1 + src/core/gpu_hw.cpp | 462 ++++++++++++------ src/core/gpu_hw.h | 8 +- src/core/gpu_hw_shadergen.cpp | 222 +++++---- src/core/gpu_hw_shadergen.h | 2 +- src/core/host.cpp | 6 +- src/core/settings.cpp | 4 + src/core/settings.h | 3 + src/core/shader_cache_version.h | 2 +- src/core/system.cpp | 3 + src/duckstation-qt/graphicssettingswidget.cpp | 17 +- src/duckstation-qt/graphicssettingswidget.ui | 85 ++-- src/util/shadergen.cpp | 93 +++- src/util/shadergen.h | 6 +- 15 files changed, 639 insertions(+), 286 deletions(-) diff --git a/src/core/game_database.cpp b/src/core/game_database.cpp index c2dbaf0ce..d0f45287e 100644 --- a/src/core/game_database.cpp +++ b/src/core/game_database.cpp @@ -34,7 +34,7 @@ namespace GameDatabase { enum : u32 { GAME_DATABASE_CACHE_SIGNATURE = 0x45434C48, - GAME_DATABASE_CACHE_VERSION = 11, + GAME_DATABASE_CACHE_VERSION = 12, }; static Entry* GetMutableEntry(std::string_view serial); @@ -64,6 +64,7 @@ static constexpr const std::array(GameDatabase::Tr "ForceSoftwareRenderer", "ForceSoftwareRendererForReadbacks", "ForceRoundTextureCoordinates", + "ForceAccurateBlending", "ForceInterlacing", "DisableTrueColor", "DisableUpscaling", @@ -492,6 +493,14 @@ void GameDatabase::Entry::ApplySettings(Settings& settings, bool display_osd_mes settings.gpu_force_round_texcoords = true; } + if (HasTrait(Trait::ForceAccurateBlending)) + { + if (display_osd_messages && !settings.IsUsingSoftwareRenderer() && !settings.gpu_accurate_blending) + APPEND_MESSAGE(ICON_FA_MAGIC, TRANSLATE_SV("GameDatabase", "Accurate blending enabled.")); + + settings.gpu_accurate_blending = true; + } + if (HasTrait(Trait::ForceInterlacing)) { if (display_osd_messages && settings.gpu_disable_interlacing) diff --git a/src/core/game_database.h b/src/core/game_database.h index 432072d9e..ec5abdf60 100644 --- a/src/core/game_database.h +++ b/src/core/game_database.h @@ -32,6 +32,7 @@ enum class Trait : u32 ForceSoftwareRenderer, ForceSoftwareRendererForReadbacks, ForceRoundUpscaledTextureCoordinates, + ForceAccurateBlending, ForceInterlacing, DisableTrueColor, DisableUpscaling, diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 9f8660306..022ab1c4d 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -37,10 +37,29 @@ Log_SetChannel(GPU_HW); static constexpr GPUTexture::Format VRAM_RT_FORMAT = GPUTexture::Format::RGBA8; static constexpr GPUTexture::Format VRAM_DS_FORMAT = GPUTexture::Format::D16; static constexpr GPUTexture::Format VRAM_DS_DEPTH_FORMAT = GPUTexture::Format::D32F; -static constexpr GPUTexture::Format VRAM_DS_EXTRACT_FORMAT = GPUTexture::Format::R32F; +static constexpr GPUTexture::Format VRAM_DS_COLOR_FORMAT = GPUTexture::Format::R32F; #ifdef _DEBUG + static u32 s_draw_number = 0; + +static constexpr const std::array s_transparency_modes = { + "HalfBackgroundPlusHalfForeground", + "BackgroundPlusForeground", + "BackgroundMinusForeground", + "BackgroundPlusQuarterForeground", + "Disabled", +}; + +static constexpr const std::array s_batch_texture_modes = { + "Palette4Bit", "Palette8Bit", "Direct16Bit", "Disabled", + "SpritePalette4Bit", "SpritePalette8Bit", "SpriteDirect16Bit", +}; + +static constexpr const std::array s_batch_render_modes = { + "TransparencyDisabled", "TransparentAndOpaque", "OnlyOpaque", "OnlyTransparent", "ShaderBlend", +}; + #endif /// Returns the distance between two rectangles. @@ -370,9 +389,9 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) const u8 resolution_scale = Truncate8(CalculateResolutionScale()); const u8 multisamples = Truncate8(std::min(g_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples())); const bool clamp_uvs = ShouldClampUVs(m_texture_filtering) || ShouldClampUVs(m_sprite_texture_filtering); - const bool framebuffer_changed = - (m_resolution_scale != resolution_scale || m_multisamples != multisamples || - (static_cast(m_vram_depth_texture) != (g_settings.UsingPGXPDepthBuffer() || !m_supports_framebuffer_fetch))); + const bool framebuffer_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples || + g_settings.IsUsingAccurateBlending() != old_settings.IsUsingAccurateBlending() || + m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer()); const bool shaders_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples || m_true_color != g_settings.gpu_true_color || g_settings.gpu_debanding != old_settings.gpu_debanding || @@ -380,6 +399,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) (resolution_scale > 1 && g_settings.gpu_scaled_dithering != old_settings.gpu_scaled_dithering) || (resolution_scale > 1 && g_settings.gpu_texture_filter == GPUTextureFilter::Nearest && g_settings.gpu_force_round_texcoords != old_settings.gpu_force_round_texcoords) || + g_settings.IsUsingAccurateBlending() != old_settings.IsUsingAccurateBlending() || m_texture_filtering != g_settings.gpu_texture_filter || m_sprite_texture_filtering != g_settings.gpu_sprite_texture_filter || m_clamp_uvs != clamp_uvs || (resolution_scale > 1 && (g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || @@ -442,24 +462,16 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) m_allow_sprite_mode = ShouldAllowSpriteMode(resolution_scale, m_texture_filtering, m_sprite_texture_filtering); m_batch.sprite_mode = (m_allow_sprite_mode && m_batch.sprite_mode); - CheckSettings(); - - if (m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer()) + const bool depth_buffer_changed = (m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer()); + if (depth_buffer_changed) { m_pgxp_depth_buffer = g_settings.UsingPGXPDepthBuffer(); m_batch.use_depth_buffer = false; m_depth_was_copied = false; - - // might be null when resizing - if (m_vram_depth_texture) - { - if (m_pgxp_depth_buffer) - ClearDepthBuffer(); - else - UpdateDepthBufferFromMaskBit(); - } } + CheckSettings(); + UpdateSoftwareRenderer(true); PrintSettingsToLog(); @@ -489,9 +501,17 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) UpdateDownsamplingLevels(); RestoreDeviceContext(); UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, false, false); - UpdateDepthBufferFromMaskBit(); + if (m_write_mask_as_depth) + UpdateDepthBufferFromMaskBit(); UpdateDisplay(); } + else if (m_vram_depth_texture && depth_buffer_changed) + { + if (m_pgxp_depth_buffer) + ClearDepthBuffer(); + else if (m_write_mask_as_depth) + UpdateDepthBufferFromMaskBit(); + } if (g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || (g_settings.gpu_downsample_mode == GPUDownsampleMode::Box && @@ -536,6 +556,37 @@ void GPU_HW::CheckSettings() m_allow_sprite_mode = ShouldAllowSpriteMode(m_resolution_scale, m_texture_filtering, m_sprite_texture_filtering); } + if (g_settings.IsUsingAccurateBlending() && !m_supports_framebuffer_fetch && !features.feedback_loops && + !features.raster_order_views) + { + // m_allow_shader_blend/m_prefer_shader_blend will be cleared in pipeline compile. + Host::AddIconOSDMessage( + "AccurateBlendingUnsupported", ICON_FA_PAINT_BRUSH, + TRANSLATE_STR("GPU_HW", "Accurate blending is not supported by your current GPU.\nIt requires framebuffer fetch, " + "feedback loops, or rasterizer order views."), + Host::OSD_WARNING_DURATION); + } + else if (IsUsingMultisampling() && !features.framebuffer_fetch && + ((g_settings.IsUsingAccurateBlending() && features.raster_order_views) || + (m_pgxp_depth_buffer && features.raster_order_views && !features.feedback_loops))) + { + Host::AddIconOSDMessage( + "AccurateBlendingUnsupported", ICON_FA_PAINT_BRUSH, + TRANSLATE_STR("GPU_HW", "Multisample anti-aliasing is not supported when using ROV blending."), + Host::OSD_WARNING_DURATION); + m_multisamples = 1; + } + + if (m_pgxp_depth_buffer && !features.feedback_loops && !features.framebuffer_fetch && !features.raster_order_views) + { + Host::AddIconOSDMessage( + "AccurateBlendingUnsupported", ICON_FA_PAINT_BRUSH, + TRANSLATE_STR("GPU_HW", "PGXP depth buffer is not supported by your current GPU or renderer.\nIt requires " + "framebuffer fetch, feedback loops, or rasterizer order views."), + Host::OSD_WARNING_DURATION); + m_pgxp_depth_buffer = false; + } + if (!features.noperspective_interpolation && !ShouldDisableColorPerspective()) WARNING_LOG("Disable color perspective not supported, but should be used."); @@ -734,16 +785,11 @@ void GPU_HW::PrintSettingsToLog() INFO_LOG("Separate sprite shaders: {}", m_allow_sprite_mode ? "YES" : "NO"); } -bool GPU_HW::NeedsDepthBuffer() const -{ - // PGXP depth, or no fbfetch, which means we need depth for the mask bit. - return (m_pgxp_depth_buffer || !m_supports_framebuffer_fetch); -} - GPUTexture::Format GPU_HW::GetDepthBufferFormat() const { // Use 32-bit depth for PGXP depth buffer, otherwise 16-bit for mask bit. - return m_pgxp_depth_buffer ? VRAM_DS_DEPTH_FORMAT : VRAM_DS_FORMAT; + return m_pgxp_depth_buffer ? (m_use_rov_for_shader_blend ? VRAM_DS_COLOR_FORMAT : VRAM_DS_DEPTH_FORMAT) : + VRAM_DS_FORMAT; } bool GPU_HW::CreateBuffers() @@ -754,22 +800,25 @@ bool GPU_HW::CreateBuffers() const u32 texture_width = VRAM_WIDTH * m_resolution_scale; const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; const u8 samples = static_cast(m_multisamples); - const bool needs_depth_buffer = NeedsDepthBuffer(); - DEV_LOG("Depth buffer is {}needed in {}", needs_depth_buffer ? "" : "NOT ", - GPUTexture::GetFormatName(GetDepthBufferFormat())); + const bool needs_depth_buffer = m_write_mask_as_depth || m_pgxp_depth_buffer; // Needed for Metal resolve. const GPUTexture::Type read_texture_type = (g_gpu_device->GetRenderAPI() == RenderAPI::Metal && m_multisamples > 1) ? GPUTexture::Type::RWTexture : GPUTexture::Type::Texture; + const GPUTexture::Type vram_texture_type = + m_use_rov_for_shader_blend ? GPUTexture::Type::RWTexture : GPUTexture::Type::RenderTarget; + const GPUTexture::Type depth_texture_type = + m_use_rov_for_shader_blend ? GPUTexture::Type::RWTexture : GPUTexture::Type::DepthStencil; - if (!(m_vram_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, - GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) || + if (!(m_vram_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, vram_texture_type, + VRAM_RT_FORMAT)) || (needs_depth_buffer && - (!(m_vram_depth_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, - GPUTexture::Type::DepthStencil, GetDepthBufferFormat())) || - !(m_vram_depth_copy_texture = g_gpu_device->FetchTexture( - texture_width, texture_height, 1, 1, samples, GPUTexture::Type::RenderTarget, VRAM_DS_EXTRACT_FORMAT)))) || + !(m_vram_depth_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, + depth_texture_type, GetDepthBufferFormat()))) || + (m_pgxp_depth_buffer && !(m_vram_depth_copy_texture = + g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, + GPUTexture::Type::RenderTarget, VRAM_DS_COLOR_FORMAT))) || !(m_vram_read_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, 1, read_texture_type, VRAM_RT_FORMAT)) || !(m_vram_readback_texture = g_gpu_device->FetchTexture(VRAM_WIDTH / 2, VRAM_HEIGHT, 1, 1, 1, @@ -826,15 +875,43 @@ void GPU_HW::ClearFramebuffer() { g_gpu_device->ClearRenderTarget(m_vram_texture.get(), 0); if (m_vram_depth_texture) - g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f); + { + if (m_use_rov_for_shader_blend) + g_gpu_device->ClearRenderTarget(m_vram_depth_texture.get(), 0xFF); + else + g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f); + } ClearVRAMDirtyRectangle(); m_last_depth_z = 1.0f; } void GPU_HW::SetVRAMRenderTarget() { - g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get(), - m_allow_shader_blend ? GPUPipeline::ColorFeedbackLoop : GPUPipeline::NoRenderPassFlags); + if (m_use_rov_for_shader_blend) + { + GPUTexture* rts[2] = {m_vram_texture.get(), m_vram_depth_texture.get()}; + const u32 num_rts = m_pgxp_depth_buffer ? 2 : 1; + g_gpu_device->SetRenderTargets( + rts, num_rts, nullptr, m_rov_active ? GPUPipeline::BindRenderTargetsAsImages : GPUPipeline::NoRenderPassFlags); + } + else + { + g_gpu_device->SetRenderTarget( + m_vram_texture.get(), m_vram_depth_texture.get(), + ((m_allow_shader_blend && !m_supports_framebuffer_fetch && !m_use_rov_for_shader_blend) ? + GPUPipeline::ColorFeedbackLoop : + GPUPipeline::NoRenderPassFlags)); + } +} + +void GPU_HW::DeactivateROV() +{ + if (!m_rov_active) + return; + + GL_INS("Deactivating ROV."); + m_rov_active = false; + SetVRAMRenderTarget(); } void GPU_HW::DestroyBuffers() @@ -863,42 +940,73 @@ bool GPU_HW::CompilePipelines(Error* error) const bool per_sample_shading = g_settings.gpu_per_sample_shading && features.per_sample_shading; const bool force_round_texcoords = (m_resolution_scale > 1 && m_texture_filtering == GPUTextureFilter::Nearest && g_settings.gpu_force_round_texcoords); - const bool needs_depth_buffer = NeedsDepthBuffer(); - const bool write_mask_as_depth = (!m_pgxp_depth_buffer && needs_depth_buffer); + + // Determine when to use shader blending. + // FBFetch is free, we need it for filtering without DSB, or when accurate blending is forced. + // But, don't bother with accurate blending if true colour is on. The result will be the same. + // Prefer ROV over barriers/feedback loops without FBFetch, it'll be faster. + // Abuse the depth buffer for the mask bit when it's free (FBFetch), or PGXP depth buffering is enabled. + m_allow_shader_blend = (features.feedback_loops || features.raster_order_views || features.framebuffer_fetch) && + (m_pgxp_depth_buffer || g_settings.gpu_accurate_blending || + (!m_supports_dual_source_blend && (IsBlendedTextureFiltering(m_texture_filtering) || + IsBlendedTextureFiltering(m_sprite_texture_filtering)))); + m_prefer_shader_blend = (m_allow_shader_blend && g_settings.gpu_accurate_blending && !g_settings.gpu_true_color); + m_use_rov_for_shader_blend = (m_allow_shader_blend && !features.framebuffer_fetch && features.raster_order_views && + (m_prefer_shader_blend || !features.feedback_loops)); + m_write_mask_as_depth = (!m_pgxp_depth_buffer && !features.framebuffer_fetch && !m_prefer_shader_blend); + + // ROV doesn't support MSAA in DirectX. + Assert(!m_use_rov_for_shader_blend || !IsUsingMultisampling()); + + const bool needs_depth_buffer = (m_pgxp_depth_buffer || m_write_mask_as_depth); + const bool needs_rov_depth = (m_pgxp_depth_buffer && m_use_rov_for_shader_blend); + const bool needs_real_depth_buffer = (needs_depth_buffer && !needs_rov_depth); + const bool needs_feedback_loop = (m_allow_shader_blend && features.feedback_loops && !m_use_rov_for_shader_blend); const GPUTexture::Format depth_buffer_format = needs_depth_buffer ? GetDepthBufferFormat() : GPUTexture::Format::Unknown; - m_allow_shader_blend = (features.feedback_loops && (m_pgxp_depth_buffer || !needs_depth_buffer)); + // Logging in case something goes wrong. + INFO_LOG("Shader blending allowed: {}", m_allow_shader_blend ? "YES" : "NO"); + INFO_LOG("Shader blending preferred: {}", m_prefer_shader_blend ? "YES" : "NO"); + INFO_LOG("Use ROV for shader blending: {}", m_use_rov_for_shader_blend ? "YES" : "NO"); + INFO_LOG("Write mask as depth: {}", m_write_mask_as_depth ? "YES" : "NO"); + INFO_LOG("Depth buffer is {}needed in {}.", needs_depth_buffer ? "" : "NOT ", + GPUTexture::GetFormatName(GetDepthBufferFormat())); + INFO_LOG("Using ROV depth: {}", needs_rov_depth ? "YES" : "NO"); + INFO_LOG("Using real depth buffer: {}", needs_real_depth_buffer ? "YES" : "NO"); + INFO_LOG("Using feedback loops: {}", needs_feedback_loop ? "YES" : "NO"); + + // Start generating shaders. GPU_HW_ShaderGen shadergen(g_gpu_device->GetRenderAPI(), m_resolution_scale, m_multisamples, per_sample_shading, m_true_color, (m_resolution_scale > 1 && g_settings.gpu_scaled_dithering), - write_mask_as_depth, ShouldDisableColorPerspective(), m_supports_dual_source_blend, + m_write_mask_as_depth, ShouldDisableColorPerspective(), m_supports_dual_source_blend, m_supports_framebuffer_fetch, g_settings.gpu_true_color && g_settings.gpu_debanding); const u32 active_texture_modes = m_allow_sprite_mode ? NUM_TEXTURE_MODES : (NUM_TEXTURE_MODES - (NUM_TEXTURE_MODES - static_cast(BatchTextureMode::SpriteStart))); const u32 total_pipelines = - (m_allow_sprite_mode ? 5 : 3) + // vertex shaders - (active_texture_modes * 5 * 9 * 2 * 2 * 2) + // fragment shaders - ((m_pgxp_depth_buffer ? 2 : 1) * 5 * 5 * active_texture_modes * 2 * 2 * 2) + // batch pipelines - ((m_wireframe_mode != GPUWireframeMode::Disabled) ? 1 : 0) + // wireframe - 1 + // fullscreen quad VS - (2 * 2) + // vram fill - (1 + BoolToUInt32(write_mask_as_depth)) + // vram copy - (1 + BoolToUInt32(write_mask_as_depth)) + // vram write - 1 + // vram write replacement - (needs_depth_buffer ? 1 : 0) + // mask -> depth - 1 + // vram read - 2 + // extract/display - ((m_downsample_mode != GPUDownsampleMode::Disabled) ? 1 : 0); // downsample + (m_allow_sprite_mode ? 5 : 3) + // vertex shaders + (active_texture_modes * 5 * 9 * 2 * 2 * 2 * (1 + BoolToUInt32(needs_rov_depth))) + // fragment shaders + ((m_pgxp_depth_buffer ? 2 : 1) * 5 * 5 * active_texture_modes * 2 * 2 * 2) + // batch pipelines + ((m_wireframe_mode != GPUWireframeMode::Disabled) ? 1 : 0) + // wireframe + 1 + // fullscreen quad VS + (2 * 2) + // vram fill + (1 + BoolToUInt32(m_write_mask_as_depth)) + // vram copy + (1 + BoolToUInt32(m_write_mask_as_depth)) + // vram write + 1 + // vram write replacement + (m_write_mask_as_depth ? 1 : 0) + // mask -> depth + 1 + // vram read + 2 + // extract/display + ((m_downsample_mode != GPUDownsampleMode::Disabled) ? 1 : 0); // downsample ShaderCompileProgressTracker progress("Compiling Pipelines", total_pipelines); // vertex shaders - [textured/palette/sprite] - // fragment shaders - [render_mode][transparency_mode][texture_mode][check_mask][dithering][interlacing] + // fragment shaders - [depth_test][render_mode][transparency_mode][texture_mode][check_mask][dithering][interlacing] static constexpr auto destroy_shader = [](std::unique_ptr& s) { s.reset(); }; DimensionalArray, 2, 2, 2> batch_vertex_shaders{}; - DimensionalArray, 2, 2, 2, NUM_TEXTURE_MODES, 5, 5> batch_fragment_shaders{}; + DimensionalArray, 2, 2, 2, NUM_TEXTURE_MODES, 5, 5, 2> batch_fragment_shaders{}; ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() { batch_vertex_shaders.enumerate(destroy_shader); batch_fragment_shaders.enumerate(destroy_shader); @@ -924,56 +1032,71 @@ bool GPU_HW::CompilePipelines(Error* error) } } - for (u8 render_mode = 0; render_mode < 5; render_mode++) + for (u8 depth_test = 0; depth_test < 2; depth_test++) { - for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) + if (depth_test && !needs_rov_depth) { - if ( - // Can't generate shader blending. - ((render_mode == static_cast(BatchRenderMode::ShaderBlend) && !features.feedback_loops) || - (render_mode != static_cast(BatchRenderMode::ShaderBlend) && - transparency_mode != static_cast(GPUTransparencyMode::Disabled))) || - // Don't need multipass shaders. - (m_supports_framebuffer_fetch && (render_mode == static_cast(BatchRenderMode::OnlyOpaque) || - render_mode == static_cast(BatchRenderMode::OnlyTransparent)))) - { - progress.Increment(active_texture_modes * 2 * 2 * 2); - continue; - } + // Don't need to do depth testing in the shader. + continue; + } - for (u8 texture_mode = 0; texture_mode < active_texture_modes; texture_mode++) + for (u8 render_mode = 0; render_mode < 5; render_mode++) + { + for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) { - for (u8 check_mask = 0; check_mask < 2; check_mask++) + if ( + // Can't generate shader blending. + ((render_mode == static_cast(BatchRenderMode::ShaderBlend) && !m_allow_shader_blend) || + (render_mode != static_cast(BatchRenderMode::ShaderBlend) && + transparency_mode != static_cast(GPUTransparencyMode::Disabled))) || + // Don't need multipass shaders if we're preferring shader blend or have (free) FBFetch. + ((m_supports_framebuffer_fetch || m_prefer_shader_blend) && + (render_mode == static_cast(BatchRenderMode::OnlyOpaque) || + render_mode == static_cast(BatchRenderMode::OnlyTransparent))) || + // If using ROV depth, we only draw with shader blending. + (needs_rov_depth && render_mode != static_cast(BatchRenderMode::ShaderBlend))) { - if (check_mask && render_mode != static_cast(BatchRenderMode::ShaderBlend)) - { - // mask bit testing is only valid with shader blending. - progress.Increment(2 * 2); - continue; - } + progress.Increment(active_texture_modes * 2 * 2 * 2); + continue; + } - for (u8 dithering = 0; dithering < 2; dithering++) + for (u8 texture_mode = 0; texture_mode < active_texture_modes; texture_mode++) + { + for (u8 check_mask = 0; check_mask < 2; check_mask++) { - for (u8 interlacing = 0; interlacing < 2; interlacing++) + if (check_mask && render_mode != static_cast(BatchRenderMode::ShaderBlend)) { - const bool sprite = (static_cast(texture_mode) >= BatchTextureMode::SpriteStart); - const bool uv_limits = ShouldClampUVs(sprite ? m_sprite_texture_filtering : m_texture_filtering); - const BatchTextureMode shader_texmode = static_cast( - texture_mode - (sprite ? static_cast(BatchTextureMode::SpriteStart) : 0)); - const std::string fs = shadergen.GenerateBatchFragmentShader( - static_cast(render_mode), static_cast(transparency_mode), - shader_texmode, sprite ? m_sprite_texture_filtering : m_texture_filtering, uv_limits, - !sprite && force_round_texcoords, ConvertToBoolUnchecked(dithering), - ConvertToBoolUnchecked(interlacing), ConvertToBoolUnchecked(check_mask)); + // mask bit testing is only valid with shader blending. + progress.Increment(2 * 2); + continue; + } - if (!(batch_fragment_shaders[render_mode][transparency_mode][texture_mode][check_mask][dithering] - [interlacing] = g_gpu_device->CreateShader( - GPUShaderStage::Fragment, shadergen.GetLanguage(), fs, error))) + for (u8 dithering = 0; dithering < 2; dithering++) + { + for (u8 interlacing = 0; interlacing < 2; interlacing++) { - return false; - } + const bool sprite = (static_cast(texture_mode) >= BatchTextureMode::SpriteStart); + const bool uv_limits = ShouldClampUVs(sprite ? m_sprite_texture_filtering : m_texture_filtering); + const BatchTextureMode shader_texmode = static_cast( + texture_mode - (sprite ? static_cast(BatchTextureMode::SpriteStart) : 0)); + const bool use_rov = + (render_mode == static_cast(BatchRenderMode::ShaderBlend) && m_use_rov_for_shader_blend); + const std::string fs = shadergen.GenerateBatchFragmentShader( + static_cast(render_mode), static_cast(transparency_mode), + shader_texmode, sprite ? m_sprite_texture_filtering : m_texture_filtering, uv_limits, + !sprite && force_round_texcoords, ConvertToBoolUnchecked(dithering), + ConvertToBoolUnchecked(interlacing), ConvertToBoolUnchecked(check_mask), use_rov, needs_rov_depth, + (depth_test != 0)); - progress.Increment(); + if (!(batch_fragment_shaders[depth_test][render_mode][transparency_mode][texture_mode][check_mask] + [dithering][interlacing] = g_gpu_device->CreateShader( + GPUShaderStage::Fragment, shadergen.GetLanguage(), fs, error))) + { + return false; + } + + progress.Increment(); + } } } } @@ -1003,10 +1126,8 @@ bool GPU_HW::CompilePipelines(Error* error) plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); plconfig.primitive = GPUPipeline::Primitive::Triangles; plconfig.geometry_shader = nullptr; - plconfig.SetTargetFormats(VRAM_RT_FORMAT, depth_buffer_format); plconfig.samples = m_multisamples; plconfig.per_sample_shading = per_sample_shading; - plconfig.render_pass_flags = m_allow_shader_blend ? GPUPipeline::ColorFeedbackLoop : GPUPipeline::NoRenderPassFlags; plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); // [depth_test][transparency_mode][render_mode][texture_mode][dithering][interlacing][check_mask] @@ -1026,8 +1147,11 @@ bool GPU_HW::CompilePipelines(Error* error) // Can't generate shader blending. (render_mode == static_cast(BatchRenderMode::ShaderBlend) && !m_allow_shader_blend) || // Don't need multipass shaders. - (m_supports_framebuffer_fetch && (render_mode == static_cast(BatchRenderMode::OnlyOpaque) || - render_mode == static_cast(BatchRenderMode::OnlyTransparent)))) + ((m_supports_framebuffer_fetch || m_prefer_shader_blend) && + (render_mode == static_cast(BatchRenderMode::OnlyOpaque) || + render_mode == static_cast(BatchRenderMode::OnlyTransparent))) || + // If using ROV depth, we only draw with shader blending. + (needs_rov_depth && render_mode != static_cast(BatchRenderMode::ShaderBlend))) { progress.Increment(9 * 2 * 2 * 2); continue; @@ -1049,12 +1173,12 @@ bool GPU_HW::CompilePipelines(Error* error) static_cast(texture_mode) == BatchTextureMode::SpritePalette8Bit); const bool sprite = (static_cast(texture_mode) >= BatchTextureMode::SpriteStart); const bool uv_limits = ShouldClampUVs(sprite ? m_sprite_texture_filtering : m_texture_filtering); + const bool use_rov = + (render_mode == static_cast(BatchRenderMode::ShaderBlend) && m_use_rov_for_shader_blend); const bool use_shader_blending = - (render_mode == static_cast(BatchRenderMode::ShaderBlend) && - ((textured && - NeedsShaderBlending(static_cast(transparency_mode), (check_mask != 0))) || - check_mask)); - + (use_rov || ((render_mode == static_cast(BatchRenderMode::ShaderBlend) && + NeedsShaderBlending(static_cast(transparency_mode), + static_cast(texture_mode), (check_mask != 0))))); plconfig.input_layout.vertex_attributes = textured ? (uv_limits ? std::span( @@ -1066,14 +1190,14 @@ bool GPU_HW::CompilePipelines(Error* error) plconfig.vertex_shader = batch_vertex_shaders[BoolToUInt8(textured)][BoolToUInt8(palette)][BoolToUInt8(sprite)].get(); plconfig.fragment_shader = - batch_fragment_shaders[render_mode] + batch_fragment_shaders[BoolToUInt8(depth_test && needs_rov_depth)][render_mode] [use_shader_blending ? transparency_mode : static_cast(GPUTransparencyMode::Disabled)] [texture_mode][use_shader_blending ? check_mask : 0][dithering][interlacing] .get(); Assert(plconfig.vertex_shader && plconfig.fragment_shader); - if (needs_depth_buffer) + if (needs_real_depth_buffer) { plconfig.depth.depth_test = m_pgxp_depth_buffer ? @@ -1086,14 +1210,25 @@ bool GPU_HW::CompilePipelines(Error* error) (depth_test && transparency_mode == static_cast(GPUTransparencyMode::Disabled)); } + plconfig.SetTargetFormats(use_rov ? GPUTexture::Format::Unknown : VRAM_RT_FORMAT, + needs_rov_depth ? GPUTexture::Format::Unknown : depth_buffer_format); + plconfig.color_formats[1] = needs_rov_depth ? VRAM_DS_COLOR_FORMAT : GPUTexture::Format::Unknown; + plconfig.render_pass_flags = + use_rov ? GPUPipeline::BindRenderTargetsAsImages : + (needs_feedback_loop ? GPUPipeline::ColorFeedbackLoop : GPUPipeline::NoRenderPassFlags); + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); - if (!use_shader_blending && - ((static_cast(transparency_mode) != GPUTransparencyMode::Disabled && - (static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && - static_cast(render_mode) != BatchRenderMode::OnlyOpaque)) || - (textured && - IsBlendedTextureFiltering(sprite ? m_sprite_texture_filtering : m_texture_filtering)))) + if (use_rov) + { + plconfig.blend.write_mask = 0; + } + else if (!use_shader_blending && + ((static_cast(transparency_mode) != GPUTransparencyMode::Disabled && + (static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && + static_cast(render_mode) != BatchRenderMode::OnlyOpaque)) || + (textured && + IsBlendedTextureFiltering(sprite ? m_sprite_texture_filtering : m_texture_filtering)))) { plconfig.blend.enable = true; plconfig.blend.src_alpha_blend = GPUPipeline::BlendFunc::One; @@ -1151,6 +1286,9 @@ bool GPU_HW::CompilePipelines(Error* error) } } + plconfig.SetTargetFormats(VRAM_RT_FORMAT, needs_rov_depth ? GPUTexture::Format::Unknown : depth_buffer_format); + plconfig.render_pass_flags = needs_feedback_loop ? GPUPipeline::ColorFeedbackLoop : GPUPipeline::NoRenderPassFlags; + if (m_wireframe_mode != GPUWireframeMode::Disabled) { std::unique_ptr gs = g_gpu_device->CreateShader(GPUShaderStage::Geometry, shadergen.GetLanguage(), @@ -1203,6 +1341,7 @@ bool GPU_HW::CompilePipelines(Error* error) plconfig.per_sample_shading = false; plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); plconfig.vertex_shader = fullscreen_quad_vertex_shader.get(); + plconfig.color_formats[1] = needs_rov_depth ? VRAM_DS_COLOR_FORMAT : GPUTexture::Format::Unknown; // VRAM fill for (u8 wrapped = 0; wrapped < 2; wrapped++) @@ -1217,8 +1356,8 @@ bool GPU_HW::CompilePipelines(Error* error) return false; plconfig.fragment_shader = fs.get(); - plconfig.depth = needs_depth_buffer ? GPUPipeline::DepthState::GetAlwaysWriteState() : - GPUPipeline::DepthState::GetNoTestsState(); + plconfig.depth = needs_real_depth_buffer ? GPUPipeline::DepthState::GetAlwaysWriteState() : + GPUPipeline::DepthState::GetNoTestsState(); if (!(m_vram_fill_pipelines[wrapped][interlaced] = g_gpu_device->CreatePipeline(plconfig, error))) return false; @@ -1237,10 +1376,10 @@ bool GPU_HW::CompilePipelines(Error* error) plconfig.fragment_shader = fs.get(); for (u8 depth_test = 0; depth_test < 2; depth_test++) { - if (depth_test && !write_mask_as_depth) + if (depth_test && !m_write_mask_as_depth) continue; - plconfig.depth.depth_write = needs_depth_buffer; + plconfig.depth.depth_write = needs_real_depth_buffer; plconfig.depth.depth_test = (depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always; @@ -1268,10 +1407,10 @@ bool GPU_HW::CompilePipelines(Error* error) plconfig.fragment_shader = fs.get(); for (u8 depth_test = 0; depth_test < 2; depth_test++) { - if (depth_test && !write_mask_as_depth) + if (depth_test && !m_write_mask_as_depth) continue; - plconfig.depth.depth_write = needs_depth_buffer; + plconfig.depth.depth_write = needs_real_depth_buffer; plconfig.depth.depth_test = (depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always; @@ -1301,10 +1440,8 @@ bool GPU_HW::CompilePipelines(Error* error) progress.Increment(); } - plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; - // VRAM update depth - if (needs_depth_buffer) + if (m_write_mask_as_depth) { std::unique_ptr fs = g_gpu_device->CreateShader( GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateVRAMUpdateDepthFragmentShader(), error); @@ -1325,6 +1462,7 @@ bool GPU_HW::CompilePipelines(Error* error) } plconfig.SetTargetFormats(VRAM_RT_FORMAT); + plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); plconfig.samples = 1; @@ -1366,7 +1504,7 @@ bool GPU_HW::CompilePipelines(Error* error) plconfig.layout = depth_extract ? GPUPipeline::Layout::MultiTextureAndPushConstants : GPUPipeline::Layout::SingleTextureAndPushConstants; - plconfig.color_formats[1] = depth_extract ? VRAM_DS_EXTRACT_FORMAT : GPUTexture::Format::Unknown; + plconfig.color_formats[1] = depth_extract ? VRAM_DS_COLOR_FORMAT : GPUTexture::Format::Unknown; if (!(m_vram_extract_pipeline[shader] = g_gpu_device->CreatePipeline(plconfig, error))) return false; @@ -1385,7 +1523,7 @@ bool GPU_HW::CompilePipelines(Error* error) return false; plconfig.fragment_shader = fs.get(); - plconfig.SetTargetFormats(VRAM_DS_EXTRACT_FORMAT); + plconfig.SetTargetFormats(VRAM_DS_COLOR_FORMAT); if (!(m_copy_depth_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) return false; } @@ -1588,8 +1726,7 @@ void GPU_HW::UpdateVRAMReadTexture(bool drawn, bool written) void GPU_HW::UpdateDepthBufferFromMaskBit() { - if (m_pgxp_depth_buffer || !m_vram_depth_texture) - return; + DebugAssert(!m_pgxp_depth_buffer && m_vram_depth_texture && m_write_mask_as_depth); // Viewport should already be set full, only need to fudge the scissor. g_gpu_device->SetScissor(m_vram_texture->GetRect()); @@ -1639,7 +1776,10 @@ void GPU_HW::ClearDepthBuffer() { GL_SCOPE("GPU_HW::ClearDepthBuffer()"); DebugAssert(m_pgxp_depth_buffer); - g_gpu_device->ClearDepth(m_vram_depth_texture.get(), 1.0f); + if (m_use_rov_for_shader_blend) + g_gpu_device->ClearRenderTarget(m_vram_depth_texture.get(), 0xFF); + else + g_gpu_device->ClearDepth(m_vram_depth_texture.get(), 1.0f); m_last_depth_z = 1.0f; } @@ -1690,10 +1830,41 @@ ALWAYS_INLINE_RELEASE void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode render_mode)][texture_mode][BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)][check_mask] .get()); - if (render_mode != BatchRenderMode::ShaderBlend || m_supports_framebuffer_fetch) - g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex); + GL_INS_FMT("Texture mode: {}", s_batch_texture_modes[texture_mode]); + GL_INS_FMT("Transparency mode: {}", s_transparency_modes[static_cast(m_batch.transparency_mode)]); + GL_INS_FMT("Render mode: {}", s_batch_render_modes[static_cast(render_mode)]); + GL_INS_FMT("Mask bit test: {}", m_batch.check_mask_before_draw); + GL_INS_FMT("Interlacing: {}", m_batch.check_mask_before_draw); + + // Activating ROV? + if (render_mode == BatchRenderMode::ShaderBlend) + { + if (m_use_rov_for_shader_blend) + { + if (!m_rov_active) + { + GL_INS("Activating ROV."); + m_rov_active = true; + SetVRAMRenderTarget(); + } + + g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex); + } + else if (m_supports_framebuffer_fetch) + { + // No barriers needed for FBFetch. + g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex); + } + else + { + // Barriers. Yucky. + g_gpu_device->DrawIndexedWithBarrier(num_indices, base_index, base_vertex, GPUDevice::DrawBarrier::Full); + } + } else - g_gpu_device->DrawIndexedWithBarrier(num_indices, base_index, base_vertex, GPUDevice::DrawBarrier::Full); + { + g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex); + } } ALWAYS_INLINE_RELEASE void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices) @@ -2733,12 +2904,14 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsTwoPassRendering() const (!m_supports_dual_source_blend && m_batch.transparency_mode != GPUTransparencyMode::Disabled))); } -ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsShaderBlending(GPUTransparencyMode transparency, bool check_mask) const +ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsShaderBlending(GPUTransparencyMode transparency, BatchTextureMode texture_mode, + bool check_mask) const { return (m_allow_shader_blend && - ((check_mask && (m_pgxp_depth_buffer || !m_vram_depth_texture)) || - transparency == GPUTransparencyMode::BackgroundMinusForeground || - (!m_supports_dual_source_blend && + ((check_mask && !m_write_mask_as_depth) || + (transparency != GPUTransparencyMode::Disabled && m_prefer_shader_blend) || + (transparency == GPUTransparencyMode::BackgroundMinusForeground) || + (!m_supports_dual_source_blend && texture_mode != BatchTextureMode::Disabled && (transparency != GPUTransparencyMode::Disabled || IsBlendedTextureFiltering(m_texture_filtering) || IsBlendedTextureFiltering(m_sprite_texture_filtering))))); } @@ -2799,7 +2972,7 @@ void GPU_HW::ResetBatchVertexDepth() { DEV_LOG("Resetting batch vertex depth"); - if (m_vram_depth_texture && !m_pgxp_depth_buffer) + if (m_write_mask_as_depth) UpdateDepthBufferFromMaskBit(); m_current_depth = 1; @@ -2874,6 +3047,7 @@ void GPU_HW::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) co void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) { GL_SCOPE_FMT("FillVRAM({},{} => {},{} ({}x{}) with 0x{:08X}", x, y, x + width, y + height, width, height, color); + DeactivateROV(); if (m_sw_renderer) { @@ -3027,6 +3201,8 @@ void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, b void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_pitch, bool set_mask, bool check_mask, const GSVector4i bounds) { + DeactivateROV(); + std::unique_ptr upload_texture; u32 map_index; @@ -3070,8 +3246,7 @@ void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* da // the viewport should already be set to the full vram, so just adjust the scissor const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale)); g_gpu_device->SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.width(), scaled_bounds.height()); - g_gpu_device->SetPipeline( - m_vram_write_pipelines[BoolToUInt8(check_mask && !m_pgxp_depth_buffer && NeedsDepthBuffer())].get()); + g_gpu_device->SetPipeline(m_vram_write_pipelines[BoolToUInt8(check_mask && m_write_mask_as_depth)].get()); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); if (upload_texture) { @@ -3121,6 +3296,8 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 UpdateVRAMReadTexture(intersect_with_draw, intersect_with_write); AddUnclampedDrawnRectangle(dst_bounds); + DeactivateROV(); + struct VRAMCopyUBOData { u32 u_src_x; @@ -3149,8 +3326,7 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 const GSVector4i dst_bounds_scaled = dst_bounds.mul32l(GSVector4i(m_resolution_scale)); g_gpu_device->SetViewportAndScissor(dst_bounds_scaled); g_gpu_device->SetPipeline( - m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer && NeedsDepthBuffer())] - .get()); + m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && m_write_mask_as_depth)].get()); g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); RestoreDeviceContext(); @@ -3285,8 +3461,8 @@ void GPU_HW::DispatchRenderCommand() { // transparency mode change const bool check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; - if (transparency_mode != GPUTransparencyMode::Disabled && - (texture_mode == BatchTextureMode::Disabled || !NeedsShaderBlending(transparency_mode, check_mask_before_draw))) + if (transparency_mode != GPUTransparencyMode::Disabled && !m_rov_active && !m_prefer_shader_blend && + !NeedsShaderBlending(transparency_mode, texture_mode, check_mask_before_draw)) { static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}}; @@ -3399,7 +3575,8 @@ void GPU_HW::FlushRender() if (m_wireframe_mode != GPUWireframeMode::OnlyWireframe) { - if (NeedsShaderBlending(m_batch.transparency_mode, m_batch.check_mask_before_draw)) + if (NeedsShaderBlending(m_batch.transparency_mode, m_batch.texture_mode, m_batch.check_mask_before_draw) || + m_rov_active || (m_use_rov_for_shader_blend && m_pgxp_depth_buffer)) { DrawBatchVertices(BatchRenderMode::ShaderBlend, index_count, base_index, base_vertex); } @@ -3416,6 +3593,8 @@ void GPU_HW::FlushRender() if (m_wireframe_mode != GPUWireframeMode::Disabled) { + // This'll be less than ideal, but wireframe is for debugging, so take the perf hit. + DeactivateROV(); g_gpu_device->SetPipeline(m_wireframe_pipeline.get()); g_gpu_device->DrawIndexed(index_count, base_index, base_vertex); } @@ -3424,6 +3603,7 @@ void GPU_HW::FlushRender() void GPU_HW::UpdateDisplay() { FlushRender(); + DeactivateROV(); GL_SCOPE("UpdateDisplay()"); @@ -3506,7 +3686,7 @@ void GPU_HW::UpdateDisplay() ((m_vram_extract_depth_texture && m_vram_extract_depth_texture->GetWidth() == scaled_display_width && m_vram_extract_depth_texture->GetHeight() == scaled_display_height) || !g_gpu_device->ResizeTexture(&m_vram_extract_depth_texture, scaled_display_width, scaled_display_height, - GPUTexture::Type::RenderTarget, VRAM_DS_EXTRACT_FORMAT))) + GPUTexture::Type::RenderTarget, VRAM_DS_COLOR_FORMAT))) { depth_source->MakeReadyForSampling(); g_gpu_device->InvalidateRenderTarget(m_vram_extract_depth_texture.get()); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index dd0a81863..7eccdd751 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -144,7 +144,6 @@ private: std::numeric_limits::min()); /// Returns true if a depth buffer should be created. - bool NeedsDepthBuffer() const; GPUTexture::Format GetDepthBufferFormat() const; bool CreateBuffers(); @@ -165,6 +164,7 @@ private: void ClearDepthBuffer(); void SetScissor(); void SetVRAMRenderTarget(); + void DeactivateROV(); void MapGPUBuffer(u32 required_vertices, u32 required_indices); void UnmapGPUBuffer(u32 used_vertices, u32 used_indices); void DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, u32 base_vertex); @@ -197,7 +197,7 @@ private: bool NeedsTwoPassRendering() const; /// Returns true if the draw is going to use shader blending/framebuffer fetch. - bool NeedsShaderBlending(GPUTransparencyMode transparency, bool check_mask) const; + bool NeedsShaderBlending(GPUTransparencyMode transparency, BatchTextureMode texture, bool check_mask) const; void FillBackendCommandParameters(GPUBackendCommand* cmd) const; void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const; @@ -281,8 +281,12 @@ private: bool m_compute_uv_range : 1 = false; bool m_allow_sprite_mode : 1 = false; bool m_allow_shader_blend : 1 = false; + bool m_prefer_shader_blend : 1 = false; + bool m_use_rov_for_shader_blend : 1 = false; + bool m_write_mask_as_depth : 1 = false; bool m_depth_was_copied : 1 = false; bool m_texture_window_active : 1 = false; + bool m_rov_active : 1 = false; u8 m_texpage_dirty = 0; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index d67b3d1b2..ab57a5d10 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -77,7 +77,8 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool pale { DeclareVertexEntryPoint( ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage", "float4 a_uv_limits"}, 1, 1, - {{"nointerpolation", palette ? "uint4 v_texpage" : "uint2 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, + {{"nointerpolation", palette ? "uint4 v_texpage" : "uint2 v_texpage"}, + {"nointerpolation", "float4 v_uv_limits"}}, false, "", UsingMSAA(), UsingPerSampleShading(), m_disable_color_perspective); } else @@ -647,28 +648,26 @@ void FilteredSampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords, float4 uv_limi } } -std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, - GPUTransparencyMode transparency, - GPU_HW::BatchTextureMode texture_mode, - GPUTextureFilter texture_filtering, bool uv_limits, - bool force_round_texcoords, bool dithering, bool interlacing, - bool check_mask) +std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader( + GPU_HW::BatchRenderMode render_mode, GPUTransparencyMode transparency, GPU_HW::BatchTextureMode texture_mode, + GPUTextureFilter texture_filtering, bool uv_limits, bool force_round_texcoords, bool dithering, bool interlacing, + bool check_mask, bool use_rov, bool use_rov_depth, bool rov_depth_test) { // TODO: don't write depth for shader blend DebugAssert(transparency == GPUTransparencyMode::Disabled || render_mode == GPU_HW::BatchRenderMode::ShaderBlend); + DebugAssert(!rov_depth_test || (use_rov && use_rov_depth)); const bool textured = (texture_mode != GPU_HW::BatchTextureMode::Disabled); const bool palette = (texture_mode == GPU_HW::BatchTextureMode::Palette4Bit || texture_mode == GPU_HW::BatchTextureMode::Palette8Bit); - const bool shader_blending = (render_mode == GPU_HW::BatchRenderMode::ShaderBlend && - (transparency != GPUTransparencyMode::Disabled || check_mask)); - const bool use_dual_source = (!shader_blending && m_supports_dual_source_blend && + const bool shader_blending = (render_mode == GPU_HW::BatchRenderMode::ShaderBlend); + const bool use_dual_source = (!shader_blending && !use_rov && m_supports_dual_source_blend && ((render_mode != GPU_HW::BatchRenderMode::TransparencyDisabled && render_mode != GPU_HW::BatchRenderMode::OnlyOpaque) || texture_filtering != GPUTextureFilter::Nearest)); std::stringstream ss; - WriteHeader(ss); + WriteHeader(ss, use_rov); DefineMacro(ss, "TRANSPARENCY", render_mode != GPU_HW::BatchRenderMode::TransparencyDisabled); DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", render_mode == GPU_HW::BatchRenderMode::OnlyOpaque); DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENT", render_mode == GPU_HW::BatchRenderMode::OnlyTransparent); @@ -687,6 +686,9 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod DefineMacro(ss, "TRUE_COLOR", m_true_color); DefineMacro(ss, "TEXTURE_FILTERING", texture_filtering != GPUTextureFilter::Nearest); DefineMacro(ss, "UV_LIMITS", uv_limits); + DefineMacro(ss, "USE_ROV", use_rov); + DefineMacro(ss, "USE_ROV_DEPTH", use_rov_depth); + DefineMacro(ss, "ROV_DEPTH_TEST", rov_depth_test); DefineMacro(ss, "USE_DUAL_SOURCE", use_dual_source); DefineMacro(ss, "WRITE_MASK_AS_DEPTH", m_write_mask_as_depth); DefineMacro(ss, "FORCE_ROUND_TEXCOORDS", force_round_texcoords); @@ -696,6 +698,13 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod WriteBatchUniformBuffer(ss); DeclareTexture(ss, "samp0", 0); + if (use_rov) + { + DeclareImage(ss, "rov_color", 0); + if (use_rov_depth) + DeclareImage(ss, "rov_depth", 1, true); + } + if (m_glsl) ss << "CONSTANT int[16] s_dither_values = int[16]( "; else @@ -825,6 +834,7 @@ float3 ApplyDebanding(float2 frag_coord) } )"; + const u32 num_fragment_outputs = use_rov ? 0 : (use_dual_source ? 2 : 1); if (textured) { if (texture_filtering != GPUTextureFilter::Nearest) @@ -835,26 +845,29 @@ float3 ApplyDebanding(float2 frag_coord) DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", palette ? "uint4 v_texpage" : "uint2 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, - true, use_dual_source ? 2 : 1, use_dual_source, m_write_mask_as_depth, UsingMSAA(), - UsingPerSampleShading(), false, m_disable_color_perspective, shader_blending); + true, num_fragment_outputs, use_dual_source, m_write_mask_as_depth, UsingMSAA(), + UsingPerSampleShading(), false, m_disable_color_perspective, + shader_blending && !use_rov, use_rov); } else { DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", palette ? "uint4 v_texpage" : "uint2 v_texpage"}}, true, - use_dual_source ? 2 : 1, use_dual_source, m_write_mask_as_depth, UsingMSAA(), - UsingPerSampleShading(), false, m_disable_color_perspective, shader_blending); + num_fragment_outputs, use_dual_source, m_write_mask_as_depth, UsingMSAA(), + UsingPerSampleShading(), false, m_disable_color_perspective, + shader_blending && !use_rov, use_rov); } } else { - DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source ? 2 : 1, use_dual_source, m_write_mask_as_depth, + DeclareFragmentEntryPoint(ss, 1, 0, {}, true, num_fragment_outputs, use_dual_source, m_write_mask_as_depth, UsingMSAA(), UsingPerSampleShading(), false, m_disable_color_perspective, - shader_blending); + shader_blending && !use_rov, use_rov); } ss << R"( { uint3 vertcol = uint3(v_col0.rgb * float3(255.0, 255.0, 255.0) + ApplyDebanding(v_pos.xy)); + uint2 fragpos = uint2(v_pos.xy); bool semitransparent; uint3 icolor; @@ -862,7 +875,7 @@ float3 ApplyDebanding(float2 frag_coord) float oalpha; #if INTERLACING - if ((uint(v_pos.y) & 1u) == u_interlaced_displayed_field) + if ((fragpos.y & 1u) == u_interlaced_displayed_field) discard; #endif @@ -891,7 +904,7 @@ float3 ApplyDebanding(float2 frag_coord) icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0)) >> 3; icolor = (icolor * vertcol) >> 4; #if DITHERING - icolor = ApplyDithering(uint2(v_pos.xy), icolor); + icolor = ApplyDithering(fragpos, icolor); #else icolor = min(icolor >> 3, uint3(31u, 31u, 31u)); #endif @@ -899,7 +912,7 @@ float3 ApplyDebanding(float2 frag_coord) icolor = uint3(texcol.rgb * float3(255.0, 255.0, 255.0) + ApplyDebanding(v_pos.xy)); icolor = (icolor * vertcol) >> 7; #if DITHERING - icolor = ApplyDithering(uint2(v_pos.xy), icolor); + icolor = ApplyDithering(fragpos, icolor); #else icolor = min(icolor, uint3(255u, 255u, 255u)); #endif @@ -914,7 +927,7 @@ float3 ApplyDebanding(float2 frag_coord) ialpha = 1.0; #if DITHERING - icolor = ApplyDithering(uint2(v_pos.xy), icolor); + icolor = ApplyDithering(fragpos, icolor); #else #if !TRUE_COLOR icolor >>= 3; @@ -925,29 +938,34 @@ float3 ApplyDebanding(float2 frag_coord) oalpha = float(u_set_mask_while_drawing); #endif - // Premultiply alpha so we don't need to use a colour output for it. - float premultiply_alpha = ialpha; - #if TRANSPARENCY && !SHADER_BLENDING - premultiply_alpha = ialpha * (semitransparent ? u_src_alpha_factor : 1.0); - #endif - - float3 color; - #if !TRUE_COLOR - // We want to apply the alpha before the truncation to 16-bit, otherwise we'll be passing a 32-bit precision color - // into the blend unit, which can cause a small amount of error to accumulate. - color = floor(float3(icolor) * premultiply_alpha) / float3(31.0, 31.0, 31.0); - #else - // True color is actually simpler here since we want to preserve the precision. - color = (float3(icolor) * premultiply_alpha) / float3(255.0, 255.0, 255.0); - #endif - #if SHADER_BLENDING - float4 bg_col = LAST_FRAG_COLOR; - float4 fg_col = float4(color, oalpha); + #if USE_ROV + BEGIN_ROV_REGION; + float4 bg_col = ROV_LOAD(rov_color, fragpos); + float4 o_col0; + bool discarded = false; - #if CHECK_MASK_BIT - if (bg_col.a != 0.0) - discard; + #if ROV_DEPTH_TEST + float bg_depth = ROV_LOAD(rov_depth, fragpos).r; + discarded = (v_pos.z > bg_depth); + #endif + #if CHECK_MASK_BIT + discarded = discarded || (bg_col.a != 0.0); + #endif + #else + float4 bg_col = LAST_FRAG_COLOR; + #if CHECK_MASK_BIT + if (bg_col.a != 0.0) + discard; + #endif + #endif + + // Work in normalized space for true colour, matches HW blend. + float4 fg_col = float4(float3(icolor), oalpha); + #if TRUE_COLOR + fg_col.rgb /= 255.0; + #elif TRANSPARENCY // rgb not used in check-mask only + bg_col.rgb = roundEven(bg_col.rgb * 31.0); #endif #if TEXTURE_FILTERING @@ -969,14 +987,87 @@ float3 ApplyDebanding(float2 frag_coord) #else o_col0.rgb = fg_col.rgb; #endif + + // 16-bit truncation. + #if !TRUE_COLOR && TRANSPARENCY + o_col0.rgb = floor(o_col0.rgb); + #endif + #if TRANSPARENCY // If pixel isn't marked as semitransparent, replace with previous colour. o_col0 = semitransparent ? o_col0 : fg_col; #endif - #elif TRANSPARENCY && TEXTURED - // Apply semitransparency. If not a semitransparent texel, destination alpha is ignored. - if (semitransparent) - { + + // Normalize for non-true-color. + #if !TRUE_COLOR + o_col0.rgb /= 31.0; + #endif + + #if USE_ROV + if (!discarded) + { + ROV_STORE(rov_color, fragpos, o_col0); + #if USE_ROV_DEPTH + ROV_STORE(rov_depth, fragpos, float4(v_pos.z, 0.0, 0.0, 0.0)); + #endif + } + END_ROV_REGION; + #endif + #else + // Premultiply alpha so we don't need to use a colour output for it. + float premultiply_alpha = ialpha; + #if TRANSPARENCY + premultiply_alpha = ialpha * (semitransparent ? u_src_alpha_factor : 1.0); + #endif + + float3 color; + #if !TRUE_COLOR + // We want to apply the alpha before the truncation to 16-bit, otherwise we'll be passing a 32-bit precision color + // into the blend unit, which can cause a small amount of error to accumulate. + color = floor(float3(icolor) * premultiply_alpha) / 31.0; + #else + // True color is actually simpler here since we want to preserve the precision. + color = (float3(icolor) * premultiply_alpha) / 255.0; + #endif + + #if TRANSPARENCY && TEXTURED + // Apply semitransparency. If not a semitransparent texel, destination alpha is ignored. + if (semitransparent) + { + #if USE_DUAL_SOURCE + o_col0 = float4(color, oalpha); + o_col1 = float4(0.0, 0.0, 0.0, u_dst_alpha_factor / ialpha); + #else + o_col0 = float4(color, oalpha); + #endif + + #if WRITE_MASK_AS_DEPTH + o_depth = oalpha * v_pos.z; + #endif + + #if TRANSPARENCY_ONLY_OPAQUE + discard; + #endif + } + else + { + #if USE_DUAL_SOURCE + o_col0 = float4(color, oalpha); + o_col1 = float4(0.0, 0.0, 0.0, 1.0 - ialpha); + #else + o_col0 = float4(color, oalpha); + #endif + + #if WRITE_MASK_AS_DEPTH + o_depth = oalpha * v_pos.z; + #endif + + #if TRANSPARENCY_ONLY_TRANSPARENT + discard; + #endif + } + #elif TRANSPARENCY + // We shouldn't be rendering opaque geometry only when untextured, so no need to test/discard here. #if USE_DUAL_SOURCE o_col0 = float4(color, oalpha); o_col1 = float4(0.0, 0.0, 0.0, u_dst_alpha_factor / ialpha); @@ -987,50 +1078,17 @@ float3 ApplyDebanding(float2 frag_coord) #if WRITE_MASK_AS_DEPTH o_depth = oalpha * v_pos.z; #endif + #else + // Non-transparency won't enable blending so we can write the mask here regardless. + o_col0 = float4(color, oalpha); - #if TRANSPARENCY_ONLY_OPAQUE - discard; - #endif - } - else - { #if USE_DUAL_SOURCE - o_col0 = float4(color, oalpha); o_col1 = float4(0.0, 0.0, 0.0, 1.0 - ialpha); - #else - o_col0 = float4(color, oalpha); #endif #if WRITE_MASK_AS_DEPTH o_depth = oalpha * v_pos.z; #endif - - #if TRANSPARENCY_ONLY_TRANSPARENT - discard; - #endif - } - #elif TRANSPARENCY - // We shouldn't be rendering opaque geometry only when untextured, so no need to test/discard here. - #if USE_DUAL_SOURCE - o_col0 = float4(color, oalpha); - o_col1 = float4(0.0, 0.0, 0.0, u_dst_alpha_factor / ialpha); - #else - o_col0 = float4(color, oalpha); - #endif - - #if WRITE_MASK_AS_DEPTH - o_depth = oalpha * v_pos.z; - #endif - #else - // Non-transparency won't enable blending so we can write the mask here regardless. - o_col0 = float4(color, oalpha); - - #if USE_DUAL_SOURCE - o_col1 = float4(0.0, 0.0, 0.0, 1.0 - ialpha); - #endif - - #if WRITE_MASK_AS_DEPTH - o_depth = oalpha * v_pos.z; #endif #endif } diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index f456392ac..fc7cc2e53 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -18,7 +18,7 @@ public: std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, GPUTransparencyMode transparency, GPU_HW::BatchTextureMode texture_mode, GPUTextureFilter texture_filtering, bool uv_limits, bool force_round_texcoords, bool dithering, bool interlacing, - bool check_mask); + bool check_mask, bool use_rov, bool use_rov_depth, bool rov_depth_test); std::string GenerateWireframeGeometryShader(); std::string GenerateWireframeFragmentShader(); std::string GenerateVRAMReadFragmentShader(); diff --git a/src/core/host.cpp b/src/core/host.cpp index 871748edb..ce55996d6 100644 --- a/src/core/host.cpp +++ b/src/core/host.cpp @@ -271,8 +271,10 @@ bool Host::CreateGPUDevice(RenderAPI api, Error* error) disabled_features |= GPUDevice::FEATURE_MASK_FRAMEBUFFER_FETCH; if (g_settings.gpu_disable_texture_buffers) disabled_features |= GPUDevice::FEATURE_MASK_TEXTURE_BUFFERS; - if (g_settings.gpu_disable_texture_copy_to_self) - disabled_features |= GPUDevice::FEATURE_MASK_TEXTURE_COPY_TO_SELF; + if (g_settings.gpu_disable_memory_import) + disabled_features |= GPUDevice::FEATURE_MASK_MEMORY_IMPORT; + if (g_settings.gpu_disable_raster_order_views) + disabled_features |= GPUDevice::FEATURE_MASK_RASTER_ORDER_VIEWS; Error create_error; if (!g_gpu_device || !g_gpu_device->Create(g_settings.gpu_adapter, diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 5aacfe0af..e74018644 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -191,6 +191,7 @@ void Settings::Load(SettingsInterface& si) gpu_disable_texture_buffers = si.GetBoolValue("GPU", "DisableTextureBuffers", false); gpu_disable_texture_copy_to_self = si.GetBoolValue("GPU", "DisableTextureCopyToSelf", false); gpu_disable_memory_import = si.GetBoolValue("GPU", "DisableMemoryImport", false); + gpu_disable_raster_order_views = si.GetBoolValue("GPU", "DisableRasterOrderViews", false); gpu_per_sample_shading = si.GetBoolValue("GPU", "PerSampleShading", false); gpu_use_thread = si.GetBoolValue("GPU", "UseThread", true); gpu_use_software_renderer_for_readbacks = si.GetBoolValue("GPU", "UseSoftwareRendererForReadbacks", false); @@ -199,6 +200,7 @@ void Settings::Load(SettingsInterface& si) gpu_debanding = si.GetBoolValue("GPU", "Debanding", false); gpu_scaled_dithering = si.GetBoolValue("GPU", "ScaledDithering", true); gpu_force_round_texcoords = si.GetBoolValue("GPU", "ForceRoundTextureCoordinates", false); + gpu_accurate_blending = si.GetBoolValue("GPU", "AccurateBlending", false); gpu_texture_filter = ParseTextureFilterName( si.GetStringValue("GPU", "TextureFilter", GetTextureFilterName(DEFAULT_GPU_TEXTURE_FILTER)).c_str()) @@ -494,6 +496,7 @@ void Settings::Save(SettingsInterface& si, bool ignore_base) const si.SetBoolValue("GPU", "DisableTextureBuffers", gpu_disable_texture_buffers); si.SetBoolValue("GPU", "DisableTextureCopyToSelf", gpu_disable_texture_copy_to_self); si.SetBoolValue("GPU", "DisableMemoryImport", gpu_disable_memory_import); + si.SetBoolValue("GPU", "DisableRasterOrderViews", gpu_disable_raster_order_views); } si.SetBoolValue("GPU", "PerSampleShading", gpu_per_sample_shading); @@ -504,6 +507,7 @@ void Settings::Save(SettingsInterface& si, bool ignore_base) const si.SetBoolValue("GPU", "Debanding", gpu_debanding); si.SetBoolValue("GPU", "ScaledDithering", gpu_scaled_dithering); si.SetBoolValue("GPU", "ForceRoundTextureCoordinates", gpu_force_round_texcoords); + si.SetBoolValue("GPU", "AccurateBlending", gpu_accurate_blending); si.SetStringValue("GPU", "TextureFilter", GetTextureFilterName(gpu_texture_filter)); si.SetStringValue( "GPU", "SpriteTextureFilter", diff --git a/src/core/settings.h b/src/core/settings.h index 933c983aa..821535949 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -114,11 +114,13 @@ struct Settings bool gpu_disable_texture_buffers : 1 = false; bool gpu_disable_texture_copy_to_self : 1 = false; bool gpu_disable_memory_import : 1 = false; + bool gpu_disable_raster_order_views : 1 = false; bool gpu_per_sample_shading : 1 = false; bool gpu_true_color : 1 = true; bool gpu_debanding : 1 = false; bool gpu_scaled_dithering : 1 = true; bool gpu_force_round_texcoords : 1 = false; + bool gpu_accurate_blending : 1 = false; bool gpu_disable_interlacing : 1 = true; bool gpu_force_ntsc_timings : 1 = false; bool gpu_widescreen_hack : 1 = false; @@ -280,6 +282,7 @@ struct Settings bool log_to_file : 1 = false; ALWAYS_INLINE bool IsUsingSoftwareRenderer() const { return (gpu_renderer == GPURenderer::Software); } + ALWAYS_INLINE bool IsUsingAccurateBlending() const { return (gpu_accurate_blending && !gpu_true_color); } ALWAYS_INLINE bool IsRunaheadEnabled() const { return (runahead_frames > 0); } ALWAYS_INLINE PGXPMode GetPGXPMode() diff --git a/src/core/shader_cache_version.h b/src/core/shader_cache_version.h index 5217c0dfb..bcf27217c 100644 --- a/src/core/shader_cache_version.h +++ b/src/core/shader_cache_version.h @@ -4,4 +4,4 @@ #pragma once #include "common/types.h" -static constexpr u32 SHADER_CACHE_VERSION = 17; +static constexpr u32 SHADER_CACHE_VERSION = 18; diff --git a/src/core/system.cpp b/src/core/system.cpp index 348143408..34625cd7e 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -3938,6 +3938,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings) g_settings.gpu_disable_texture_buffers != old_settings.gpu_disable_texture_buffers || g_settings.gpu_disable_texture_copy_to_self != old_settings.gpu_disable_texture_copy_to_self || g_settings.gpu_disable_memory_import != old_settings.gpu_disable_memory_import || + g_settings.gpu_disable_raster_order_views != old_settings.gpu_disable_raster_order_views || g_settings.display_exclusive_fullscreen_control != old_settings.display_exclusive_fullscreen_control)) { // if debug device/threaded presentation change, we need to recreate the whole display @@ -3950,6 +3951,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings) g_settings.gpu_disable_texture_buffers != old_settings.gpu_disable_texture_buffers || g_settings.gpu_disable_texture_copy_to_self != old_settings.gpu_disable_texture_copy_to_self || g_settings.gpu_disable_memory_import != old_settings.gpu_disable_memory_import || + g_settings.gpu_disable_raster_order_views != old_settings.gpu_disable_raster_order_views || g_settings.display_exclusive_fullscreen_control != old_settings.display_exclusive_fullscreen_control); Host::AddIconOSDMessage("RendererSwitch", ICON_FA_PAINT_ROLLER, @@ -4055,6 +4057,7 @@ void System::CheckForSettingsChanges(const Settings& old_settings) g_settings.gpu_debanding != old_settings.gpu_debanding || g_settings.gpu_scaled_dithering != old_settings.gpu_scaled_dithering || g_settings.gpu_force_round_texcoords != old_settings.gpu_force_round_texcoords || + g_settings.gpu_accurate_blending != old_settings.gpu_accurate_blending || g_settings.gpu_texture_filter != old_settings.gpu_texture_filter || g_settings.gpu_sprite_texture_filter != old_settings.gpu_sprite_texture_filter || g_settings.gpu_line_detect_mode != old_settings.gpu_line_detect_mode || diff --git a/src/duckstation-qt/graphicssettingswidget.cpp b/src/duckstation-qt/graphicssettingswidget.cpp index c4ecb5932..155cd3f54 100644 --- a/src/duckstation-qt/graphicssettingswidget.cpp +++ b/src/duckstation-qt/graphicssettingswidget.cpp @@ -138,6 +138,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* "UseSoftwareRendererForReadbacks", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.forceRoundedTexcoords, "GPU", "ForceRoundTextureCoordinates", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.accurateBlending, "GPU", "AccurateBlending", false); SettingWidgetBinder::SetAvailability(m_ui.scaledDithering, !m_dialog->hasGameTrait(GameDatabase::Trait::DisableScaledDithering)); @@ -388,6 +389,10 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* m_ui.forceRoundedTexcoords, tr("Round Upscaled Texture Coordinates"), tr("Unchecked"), tr("Rounds texture coordinates instead of flooring when upscaling. Can fix misaligned textures in some games, but " "break others, and is incompatible with texture filtering.")); + dialog->registerWidgetHelp( + m_ui.accurateBlending, tr("Accurate Blending"), tr("Unchecked"), + tr("Forces blending to be done in the shader at 16-bit precision, when not using true color. Very few games " + "actually require this, and there is a non-trivial performance cost.")); // PGXP Tab @@ -520,6 +525,12 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsWindow* dialog, QWidget* dialog->registerWidgetHelp(m_ui.disableTextureCopyToSelf, tr("Disable Texture Copies To Self"), tr("Unchecked"), tr("Disables the use of self-copy updates for the VRAM texture. Useful for testing broken " "graphics drivers. Only for developer use.")); + dialog->registerWidgetHelp(m_ui.disableMemoryImport, tr("Disable Memory Import"), tr("Unchecked"), + tr("Disables the use of host memory importing. Useful for testing broken graphics " + "drivers. Only for developer use.")); + dialog->registerWidgetHelp(m_ui.disableRasterOrderViews, tr("Disable Rasterizer Order Views"), tr("Unchecked"), + tr("Disables the use of rasterizer order views. Useful for testing broken graphics " + "drivers. Only for developer use.")); } GraphicsSettingsWidget::~GraphicsSettingsWidget() = default; @@ -669,6 +680,8 @@ void GraphicsSettingsWidget::updateRendererDependentOptions() m_ui.debanding->setEnabled(is_hardware); m_ui.scaledDithering->setEnabled(is_hardware && !m_dialog->hasGameTrait(GameDatabase::Trait::DisableScaledDithering)); m_ui.useSoftwareRendererForReadbacks->setEnabled(is_hardware); + m_ui.forceRoundedTexcoords->setEnabled(is_hardware); + m_ui.accurateBlending->setEnabled(is_hardware); m_ui.tabs->setTabEnabled(TAB_INDEX_TEXTURE_REPLACEMENTS, is_hardware); @@ -881,9 +894,9 @@ void GraphicsSettingsWidget::onTrueColorChanged() const bool true_color = m_dialog->getEffectiveBoolValue("GPU", "TrueColor", false); const bool allow_scaled_dithering = (resolution_scale != 1 && !true_color && !m_dialog->hasGameTrait(GameDatabase::Trait::DisableScaledDithering)); - const bool allow_debanding = true_color; m_ui.scaledDithering->setEnabled(allow_scaled_dithering); - m_ui.debanding->setEnabled(allow_debanding); + m_ui.debanding->setEnabled(true_color); + m_ui.accurateBlending->setEnabled(!true_color); } void GraphicsSettingsWidget::onDownsampleModeChanged() diff --git a/src/duckstation-qt/graphicssettingswidget.ui b/src/duckstation-qt/graphicssettingswidget.ui index 662f9a1db..06ca84fd5 100644 --- a/src/duckstation-qt/graphicssettingswidget.ui +++ b/src/duckstation-qt/graphicssettingswidget.ui @@ -392,13 +392,6 @@ - - - - Threaded Rendering - - - @@ -406,13 +399,6 @@ - - - - Software Renderer Readbacks - - - @@ -420,13 +406,34 @@ - + + + + Software Renderer Readbacks + + + + Round Upscaled Texture Coordinates + + + + Threaded Rendering + + + + + + + Accurate Blending + + + @@ -993,13 +1000,6 @@ - - - - Disable Framebuffer Fetch - - - @@ -1007,13 +1007,6 @@ - - - - Disable Dual-Source Blending - - - @@ -1021,10 +1014,17 @@ - - + + - Disable Shader Cache + Disable Dual-Source Blending + + + + + + + Disable Framebuffer Fetch @@ -1035,6 +1035,27 @@ + + + + Disable Shader Cache + + + + + + + Disable Memory Import + + + + + + + Disable Rasterizer Order Views + + + diff --git a/src/util/shadergen.cpp b/src/util/shadergen.cpp index 8ecc4c947..3a9257f5d 100644 --- a/src/util/shadergen.cpp +++ b/src/util/shadergen.cpp @@ -149,7 +149,7 @@ TinyString ShaderGen::GetGLSLVersionString(RenderAPI render_api, u32 version) } #endif -void ShaderGen::WriteHeader(std::stringstream& ss) +void ShaderGen::WriteHeader(std::stringstream& ss, bool enable_rov /* = false */) { if (m_shader_language == GPUShaderLanguage::GLSL || m_shader_language == GPUShaderLanguage::GLSLES) ss << m_glsl_version_string << "\n\n"; @@ -211,6 +211,11 @@ void ShaderGen::WriteHeader(std::stringstream& ss) if (!GLAD_GL_VERSION_4_3 && !GLAD_GL_ES_VERSION_3_1 && GLAD_GL_ARB_shader_storage_buffer_object) ss << "#extension GL_ARB_shader_storage_buffer_object : require\n"; } + else if (m_shader_language == GPUShaderLanguage::GLSLVK) + { + if (enable_rov) + ss << "#extension GL_ARB_fragment_shader_interlock : require\n"; + } #endif DefineMacro(ss, "API_OPENGL", m_render_api == RenderAPI::OpenGL); @@ -413,6 +418,27 @@ void ShaderGen::DeclareTextureBuffer(std::stringstream& ss, const char* name, u3 } } +void ShaderGen::DeclareImage(std::stringstream& ss, const char* name, u32 index, bool is_float /* = false */, + bool is_int /* = false */, bool is_unsigned /* = false */) +{ + if (m_glsl) + { + if (m_spirv) + ss << "layout(set = " << (m_has_uniform_buffer ? 2 : 1) << ", binding = " << index; + else + ss << "layout(binding = " << index; + + ss << ", " << (is_int ? (is_unsigned ? "rgba8ui" : "rgba8i") : "rgba8") << ") " + << "uniform restrict coherent image2D " << name << ";\n"; + } + else + { + ss << "RasterizerOrderedTexture2D<" + << (is_int ? (is_unsigned ? "uint4" : "int4") : (is_float ? "float4" : "unorm float4")) << "> " << name + << " : register(u" << index << ");\n"; + } +} + const char* ShaderGen::GetInterpolationQualifier(bool interface_block, bool centroid_interpolation, bool sample_interpolation, bool is_out) const { @@ -545,7 +571,8 @@ void ShaderGen::DeclareFragmentEntryPoint( const std::initializer_list>& additional_inputs /* = */, bool declare_fragcoord /* = false */, u32 num_color_outputs /* = 1 */, bool dual_source_output /* = false */, bool depth_output /* = false */, bool msaa /* = false */, bool ssaa /* = false */, - bool declare_sample_id /* = false */, bool noperspective_color /* = false */, bool feedback_loop /* = false */) + bool declare_sample_id /* = false */, bool noperspective_color /* = false */, bool feedback_loop /* = false */, + bool rov /* = false */) { if (m_glsl) { @@ -603,6 +630,8 @@ void ShaderGen::DeclareFragmentEntryPoint( if (feedback_loop) { + Assert(!rov); + #ifdef ENABLE_OPENGL if (m_render_api == RenderAPI::OpenGL || m_render_api == RenderAPI::OpenGLES) { @@ -647,6 +676,14 @@ void ShaderGen::DeclareFragmentEntryPoint( } #endif } + else if (rov) + { + ss << "layout(pixel_interlock_ordered) in;\n"; + ss << "#define ROV_LOAD(name, coords) imageLoad(name, ivec2(coords))\n"; + ss << "#define ROV_STORE(name, coords, value) imageStore(name, ivec2(coords), value)\n"; + ss << "#define BEGIN_ROV_REGION beginInvocationInterlockARB()\n"; + ss << "#define END_ROV_REGION endInvocationInterlockARB()\n"; + } if (m_use_glsl_binding_layout) { @@ -679,48 +716,64 @@ void ShaderGen::DeclareFragmentEntryPoint( } else { + if (rov) + { + ss << "#define ROV_LOAD(name, coords) name[uint2(coords)]\n"; + ss << "#define ROV_STORE(name, coords, value) name[uint2(coords)] = value\n"; + ss << "#define BEGIN_ROV_REGION\n"; + ss << "#define END_ROV_REGION\n"; + } + const char* qualifier = GetInterpolationQualifier(false, msaa, ssaa, false); ss << "void main(\n"; + bool first = true; for (u32 i = 0; i < num_color_inputs; i++) - ss << " " << qualifier << (noperspective_color ? "noperspective " : "") << "in float4 v_col" << i << " : COLOR" - << i << ",\n"; + { + ss << (first ? "" : ",\n") << " " << qualifier << (noperspective_color ? "noperspective " : "") + << "in float4 v_col" << i << " : COLOR" << i; + first = false; + } for (u32 i = 0; i < num_texcoord_inputs; i++) - ss << " " << qualifier << "in float2 v_tex" << i << " : TEXCOORD" << i << ",\n"; + { + ss << (first ? "" : ",\n") << " " << qualifier << "in float2 v_tex" << i << " : TEXCOORD" << i; + first = false; + } u32 additional_counter = num_texcoord_inputs; for (const auto& [qualifiers, name] : additional_inputs) { const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier; - ss << " " << qualifier_to_use << " in " << name << " : TEXCOORD" << additional_counter << ",\n"; + ss << (first ? "" : ",\n") << " " << qualifier_to_use << " in " << name << " : TEXCOORD" << additional_counter; additional_counter++; + first = false; } if (declare_fragcoord) - ss << " in float4 v_pos : SV_Position,\n"; + { + ss << (first ? "" : ",\n") << " in float4 v_pos : SV_Position"; + first = false; + } if (declare_sample_id) - ss << " in uint f_sample_index : SV_SampleIndex,\n"; + { + ss << (first ? "" : ",\n") << " in uint f_sample_index : SV_SampleIndex"; + first = false; + } if (depth_output) { - ss << " out float o_depth : SV_Depth"; - if (num_color_outputs > 0) - ss << ",\n"; - else - ss << ")\n"; + ss << (first ? "" : ",\n") << " out float o_depth : SV_Depth"; + first = false; } - for (u32 i = 0; i < num_color_outputs; i++) { - ss << " out float4 o_col" << i << " : SV_Target" << i; - - if (i == (num_color_outputs - 1)) - ss << ")\n"; - else - ss << ",\n"; + ss << (first ? "" : ",\n") << " out float4 o_col" << i << " : SV_Target" << i; + first = false; } + + ss << ")"; } } diff --git a/src/util/shadergen.h b/src/util/shadergen.h index 82e8ec1a5..3d38cca49 100644 --- a/src/util/shadergen.h +++ b/src/util/shadergen.h @@ -44,13 +44,15 @@ protected: void DefineMacro(std::stringstream& ss, const char* name, bool enabled); void DefineMacro(std::stringstream& ss, const char* name, s32 value); - void WriteHeader(std::stringstream& ss); + void WriteHeader(std::stringstream& ss, bool enable_rov = false); void WriteUniformBufferDeclaration(std::stringstream& ss, bool push_constant_on_vulkan); void DeclareUniformBuffer(std::stringstream& ss, const std::initializer_list& members, bool push_constant_on_vulkan); void DeclareTexture(std::stringstream& ss, const char* name, u32 index, bool multisampled = false, bool is_int = false, bool is_unsigned = false); void DeclareTextureBuffer(std::stringstream& ss, const char* name, u32 index, bool is_int, bool is_unsigned); + void DeclareImage(std::stringstream& ss, const char* name, u32 index, bool is_float = false, bool is_int = false, + bool is_unsigned = false); void DeclareVertexEntryPoint(std::stringstream& ss, const std::initializer_list& attributes, u32 num_color_outputs, u32 num_texcoord_outputs, const std::initializer_list>& additional_outputs, @@ -62,7 +64,7 @@ protected: bool declare_fragcoord = false, u32 num_color_outputs = 1, bool dual_source_output = false, bool depth_output = false, bool msaa = false, bool ssaa = false, bool declare_sample_id = false, bool noperspective_color = false, - bool feedback_loop = false); + bool feedback_loop = false, bool rov = false); RenderAPI m_render_api; GPUShaderLanguage m_shader_language;