diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 0aebe2e96..a92698366 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -304,7 +304,7 @@ bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_di void GPU_HW::RestoreDeviceContext() { g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); - g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get()); + SetVRAMRenderTarget(); g_gpu_device->SetViewport(0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); SetScissor(); m_batch_ubo_dirty = true; @@ -620,6 +620,12 @@ void GPU_HW::PrintSettingsToLog() Log_InfoFmt("Using software renderer for readbacks: {}", m_sw_renderer ? "YES" : "NO"); } +bool GPU_HW::NeedsDepthBuffer() const +{ + // PGXP depth, or no fbfetch, which means we need depth for the mask bit. + return (m_pgxp_depth_buffer || !m_supports_framebuffer_fetch); +} + bool GPU_HW::CreateBuffers() { DestroyBuffers(); @@ -628,6 +634,8 @@ bool GPU_HW::CreateBuffers() const u32 texture_width = VRAM_WIDTH * m_resolution_scale; const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; const u8 samples = static_cast(m_multisamples); + const bool needs_depth_buffer = NeedsDepthBuffer(); + Log_DevFmt("Depth buffer is {}needed", needs_depth_buffer ? "" : "NOT "); // Needed for Metal resolve. const GPUTexture::Type read_texture_type = (g_gpu_device->GetRenderAPI() == RenderAPI::Metal && m_multisamples > 1) ? @@ -636,8 +644,9 @@ bool GPU_HW::CreateBuffers() if (!(m_vram_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT)) || - !(m_vram_depth_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, - GPUTexture::Type::DepthStencil, VRAM_DS_FORMAT)) || + (needs_depth_buffer && + !(m_vram_depth_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, + GPUTexture::Type::DepthStencil, VRAM_DS_FORMAT))) || !(m_vram_read_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, 1, read_texture_type, VRAM_RT_FORMAT)) || !(m_vram_readback_texture = g_gpu_device->FetchTexture(VRAM_WIDTH / 2, VRAM_HEIGHT, 1, 1, 1, @@ -647,7 +656,8 @@ bool GPU_HW::CreateBuffers() } GL_OBJECT_NAME(m_vram_texture, "VRAM Texture"); - GL_OBJECT_NAME(m_vram_depth_texture, "VRAM Depth Texture"); + if (m_vram_depth_texture) + GL_OBJECT_NAME(m_vram_depth_texture, "VRAM Depth Texture"); GL_OBJECT_NAME(m_vram_read_texture, "VRAM Read Texture"); GL_OBJECT_NAME(m_vram_readback_texture, "VRAM Readback Texture"); @@ -689,7 +699,7 @@ bool GPU_HW::CreateBuffers() else if (m_downsample_mode == GPUDownsampleMode::Box) m_downsample_scale_or_levels = m_resolution_scale / GetBoxDownsampleScale(m_resolution_scale); - g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get()); + SetVRAMRenderTarget(); SetFullVRAMDirtyRectangle(); return true; } @@ -697,11 +707,18 @@ bool GPU_HW::CreateBuffers() void GPU_HW::ClearFramebuffer() { g_gpu_device->ClearRenderTarget(m_vram_texture.get(), 0); - g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f); + if (m_vram_depth_texture) + g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f); ClearVRAMDirtyRectangle(); m_last_depth_z = 1.0f; } +void GPU_HW::SetVRAMRenderTarget() +{ + g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get(), + m_allow_shader_blend ? GPUPipeline::ColorFeedbackLoop : GPUPipeline::NoRenderPassFlags); +} + void GPU_HW::DestroyBuffers() { ClearDisplayTexture(); @@ -723,19 +740,23 @@ void GPU_HW::DestroyBuffers() bool GPU_HW::CompilePipelines() { const GPUDevice::Features features = g_gpu_device->GetFeatures(); + const bool needs_depth_buffer = NeedsDepthBuffer(); + m_allow_shader_blend = (features.feedback_loops && (m_pgxp_depth_buffer || !needs_depth_buffer)); + GPU_HW_ShaderGen shadergen(g_gpu_device->GetRenderAPI(), m_resolution_scale, m_multisamples, m_per_sample_shading, m_true_color, m_scaled_dithering, m_texture_filtering, m_clamp_uvs, m_pgxp_depth_buffer, m_disable_color_perspective, m_supports_dual_source_blend, m_supports_framebuffer_fetch, m_debanding); - ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (4 * 5 * 9 * 2 * 2) + (3 * 4 * 5 * 9 * 2 * 2) + 1 + - 2 + (2 * 2) + 2 + 1 + 1 + (2 * 3) + 1); + ShaderCompileProgressTracker progress("Compiling Pipelines", 2 + (5 * 5 * 9 * 2 * 2 * 2) + + (2 * 5 * 5 * 9 * 2 * 2 * 2) + 1 + 2 + (2 * 2) + 2 + 1 + + 1 + (2 * 3) + 1); // vertex shaders - [textured] - // fragment shaders - [render_mode][texture_mode][dithering][interlacing] + // fragment shaders - [render_mode][transparency_mode][texture_mode][check_mask][dithering][interlacing] static constexpr auto destroy_shader = [](std::unique_ptr& s) { s.reset(); }; DimensionalArray, 2> batch_vertex_shaders{}; - DimensionalArray, 2, 2, 9, 5, 4> batch_fragment_shaders{}; + DimensionalArray, 2, 2, 2, 9, 5, 5> batch_fragment_shaders{}; ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() { batch_vertex_shaders.enumerate(destroy_shader); batch_fragment_shaders.enumerate(destroy_shader); @@ -750,48 +771,51 @@ bool GPU_HW::CompilePipelines() progress.Increment(); } - for (u8 render_mode = 0; render_mode < 4; render_mode++) + for (u8 render_mode = 0; render_mode < 5; render_mode++) { for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) { - if (m_supports_framebuffer_fetch) - { - // Don't need multipass shaders. - if (render_mode != static_cast(BatchRenderMode::TransparencyDisabled) && - render_mode != static_cast(BatchRenderMode::TransparentAndOpaque)) - { - progress.Increment(2 * 2 * 9); - continue; - } - } - else - { + if ( // Can't generate shader blending. - if (transparency_mode != static_cast(GPUTransparencyMode::Disabled)) - { - progress.Increment(2 * 2 * 9); - continue; - } + ((render_mode == static_cast(BatchRenderMode::ShaderBlend) && !features.feedback_loops) || + (render_mode != static_cast(BatchRenderMode::ShaderBlend) && + transparency_mode != static_cast(GPUTransparencyMode::Disabled))) || + // Don't need multipass shaders. + (m_supports_framebuffer_fetch && (render_mode == static_cast(BatchRenderMode::OnlyOpaque) || + render_mode == static_cast(BatchRenderMode::OnlyTransparent)))) + { + progress.Increment(9 * 2 * 2 * 2); + continue; } for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) { - for (u8 dithering = 0; dithering < 2; dithering++) + for (u8 check_mask = 0; check_mask < 2; check_mask++) { - for (u8 interlacing = 0; interlacing < 2; interlacing++) + if (check_mask && render_mode != static_cast(BatchRenderMode::ShaderBlend)) { - const std::string fs = shadergen.GenerateBatchFragmentShader( - static_cast(render_mode), static_cast(transparency_mode), - static_cast(texture_mode), ConvertToBoolUnchecked(dithering), - ConvertToBoolUnchecked(interlacing)); + // mask bit testing is only valid with shader blending. + progress.Increment(2 * 2); + continue; + } - if (!(batch_fragment_shaders[render_mode][transparency_mode][texture_mode][dithering][interlacing] = - g_gpu_device->CreateShader(GPUShaderStage::Fragment, fs))) + for (u8 dithering = 0; dithering < 2; dithering++) + { + for (u8 interlacing = 0; interlacing < 2; interlacing++) { - return false; - } + const std::string fs = shadergen.GenerateBatchFragmentShader( + static_cast(render_mode), static_cast(transparency_mode), + static_cast(texture_mode), ConvertToBoolUnchecked(dithering), + ConvertToBoolUnchecked(interlacing), ConvertToBoolUnchecked(check_mask)); - progress.Increment(); + if (!(batch_fragment_shaders[render_mode][transparency_mode][texture_mode][check_mask][dithering] + [interlacing] = g_gpu_device->CreateShader(GPUShaderStage::Fragment, fs))) + { + return false; + } + + progress.Increment(); + } } } } @@ -820,116 +844,139 @@ bool GPU_HW::CompilePipelines() plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); plconfig.primitive = GPUPipeline::Primitive::Triangles; plconfig.geometry_shader = nullptr; - plconfig.SetTargetFormats(VRAM_RT_FORMAT, VRAM_DS_FORMAT); + plconfig.SetTargetFormats(VRAM_RT_FORMAT, needs_depth_buffer ? VRAM_DS_FORMAT : GPUTexture::Format::Unknown); plconfig.samples = m_multisamples; plconfig.per_sample_shading = m_per_sample_shading; - plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; + plconfig.render_pass_flags = m_allow_shader_blend ? GPUPipeline::ColorFeedbackLoop : GPUPipeline::NoRenderPassFlags; + plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); - // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] - for (u8 depth_test = 0; depth_test < 3; depth_test++) + // [depth_test][transparency_mode][render_mode][texture_mode][dithering][interlacing][check_mask] + for (u8 depth_test = 0; depth_test < 2; depth_test++) { - for (u8 render_mode = 0; render_mode < 4; render_mode++) + if (depth_test && !m_pgxp_depth_buffer) { - if (m_supports_framebuffer_fetch) + // Not used. + progress.Increment(5 * 5 * 9 * 2 * 2 * 2); + continue; + } + + for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) + { + for (u8 render_mode = 0; render_mode < 5; render_mode++) { - // Don't need multipass shaders. - if (render_mode != static_cast(BatchRenderMode::TransparencyDisabled) && - render_mode != static_cast(BatchRenderMode::TransparentAndOpaque)) + if ( + // Can't generate shader blending. + (render_mode == static_cast(BatchRenderMode::ShaderBlend) && !features.feedback_loops) || + // Don't need multipass shaders. + (m_supports_framebuffer_fetch && (render_mode == static_cast(BatchRenderMode::OnlyOpaque) || + render_mode == static_cast(BatchRenderMode::OnlyTransparent)))) { - progress.Increment(2 * 2 * 9 * 5); + progress.Increment(9 * 2 * 2 * 2); continue; } - } - for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) - { for (u8 texture_mode = 0; texture_mode < 9; texture_mode++) { for (u8 dithering = 0; dithering < 2; dithering++) { for (u8 interlacing = 0; interlacing < 2; interlacing++) { - static constexpr std::array depth_test_values = { - GPUPipeline::DepthFunc::Always, GPUPipeline::DepthFunc::GreaterEqual, - GPUPipeline::DepthFunc::LessEqual}; - const bool textured = (static_cast(texture_mode) != GPUTextureMode::Disabled); - const bool use_shader_blending = - (textured && NeedsShaderBlending(static_cast(transparency_mode))); - - plconfig.input_layout.vertex_attributes = - textured ? - (m_clamp_uvs ? std::span( - vertex_attributes, NUM_BATCH_TEXTURED_LIMITS_VERTEX_ATTRIBUTES) : - std::span(vertex_attributes, - NUM_BATCH_TEXTURED_VERTEX_ATTRIBUTES)) : - std::span(vertex_attributes, NUM_BATCH_VERTEX_ATTRIBUTES); - - plconfig.vertex_shader = batch_vertex_shaders[BoolToUInt8(textured)].get(); - plconfig.fragment_shader = - batch_fragment_shaders[render_mode] - [use_shader_blending ? transparency_mode : - static_cast(GPUTransparencyMode::Disabled)] - [texture_mode][dithering][interlacing] - .get(); - - plconfig.depth.depth_test = depth_test_values[depth_test]; - plconfig.depth.depth_write = !m_pgxp_depth_buffer || depth_test != 0; - plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); - - if (!use_shader_blending && - ((static_cast(transparency_mode) != GPUTransparencyMode::Disabled && - (static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && - static_cast(render_mode) != BatchRenderMode::OnlyOpaque)) || - (textured && IsBlendedTextureFiltering(m_texture_filtering)))) + for (u8 check_mask = 0; check_mask < 2; check_mask++) { - plconfig.blend.enable = true; - plconfig.blend.src_alpha_blend = GPUPipeline::BlendFunc::One; - plconfig.blend.dst_alpha_blend = GPUPipeline::BlendFunc::Zero; - plconfig.blend.alpha_blend_op = GPUPipeline::BlendOp::Add; + const bool textured = (static_cast(texture_mode) != GPUTextureMode::Disabled); + const bool use_shader_blending = + (render_mode == static_cast(BatchRenderMode::ShaderBlend) && + ((textured && + NeedsShaderBlending(static_cast(transparency_mode), (check_mask != 0))) || + check_mask)); - if (m_supports_dual_source_blend) + plconfig.input_layout.vertex_attributes = + textured ? + (m_clamp_uvs ? std::span( + vertex_attributes, NUM_BATCH_TEXTURED_LIMITS_VERTEX_ATTRIBUTES) : + std::span( + vertex_attributes, NUM_BATCH_TEXTURED_VERTEX_ATTRIBUTES)) : + std::span(vertex_attributes, NUM_BATCH_VERTEX_ATTRIBUTES); + + plconfig.vertex_shader = batch_vertex_shaders[BoolToUInt8(textured)].get(); + plconfig.fragment_shader = + batch_fragment_shaders[render_mode] + [use_shader_blending ? transparency_mode : + static_cast(GPUTransparencyMode::Disabled)] + [texture_mode][use_shader_blending ? check_mask : 0][dithering][interlacing] + .get(); + Assert(plconfig.vertex_shader && plconfig.fragment_shader); + + if (needs_depth_buffer) { - plconfig.blend.src_blend = GPUPipeline::BlendFunc::One; - plconfig.blend.dst_blend = GPUPipeline::BlendFunc::SrcAlpha1; - plconfig.blend.blend_op = - (static_cast(transparency_mode) == - GPUTransparencyMode::BackgroundMinusForeground && - static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && - static_cast(render_mode) != BatchRenderMode::OnlyOpaque) ? - GPUPipeline::BlendOp::ReverseSubtract : - GPUPipeline::BlendOp::Add; + plconfig.depth.depth_test = + m_pgxp_depth_buffer ? + (depth_test ? GPUPipeline::DepthFunc::LessEqual : GPUPipeline::DepthFunc::Always) : + (check_mask ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always); + + // Don't write for transparent, but still test. + plconfig.depth.depth_write = + !m_pgxp_depth_buffer || + (depth_test && transparency_mode == static_cast(GPUTransparencyMode::Disabled)); } - else + + plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); + + if (!use_shader_blending && + ((static_cast(transparency_mode) != GPUTransparencyMode::Disabled && + (static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && + static_cast(render_mode) != BatchRenderMode::OnlyOpaque)) || + (textured && IsBlendedTextureFiltering(m_texture_filtering)))) { - // TODO: This isn't entirely accurate, 127.5 versus 128. - // But if we use fbfetch on Mali, it doesn't matter. - plconfig.blend.src_blend = GPUPipeline::BlendFunc::One; - plconfig.blend.dst_blend = GPUPipeline::BlendFunc::One; - if (static_cast(transparency_mode) == - GPUTransparencyMode::HalfBackgroundPlusHalfForeground) + plconfig.blend.enable = true; + plconfig.blend.src_alpha_blend = GPUPipeline::BlendFunc::One; + plconfig.blend.dst_alpha_blend = GPUPipeline::BlendFunc::Zero; + plconfig.blend.alpha_blend_op = GPUPipeline::BlendOp::Add; + + if (m_supports_dual_source_blend) { - plconfig.blend.dst_blend = GPUPipeline::BlendFunc::ConstantColor; - plconfig.blend.dst_alpha_blend = GPUPipeline::BlendFunc::ConstantColor; - plconfig.blend.constant = 0x00808080u; + plconfig.blend.src_blend = GPUPipeline::BlendFunc::One; + plconfig.blend.dst_blend = GPUPipeline::BlendFunc::SrcAlpha1; + plconfig.blend.blend_op = + (static_cast(transparency_mode) == + GPUTransparencyMode::BackgroundMinusForeground && + static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && + static_cast(render_mode) != BatchRenderMode::OnlyOpaque) ? + GPUPipeline::BlendOp::ReverseSubtract : + GPUPipeline::BlendOp::Add; } + else + { + // TODO: This isn't entirely accurate, 127.5 versus 128. + // But if we use fbfetch on Mali, it doesn't matter. + plconfig.blend.src_blend = GPUPipeline::BlendFunc::One; + plconfig.blend.dst_blend = GPUPipeline::BlendFunc::One; + if (static_cast(transparency_mode) == + GPUTransparencyMode::HalfBackgroundPlusHalfForeground) + { + plconfig.blend.dst_blend = GPUPipeline::BlendFunc::ConstantColor; + plconfig.blend.dst_alpha_blend = GPUPipeline::BlendFunc::ConstantColor; + plconfig.blend.constant = 0x00808080u; + } - plconfig.blend.blend_op = - (static_cast(transparency_mode) == - GPUTransparencyMode::BackgroundMinusForeground && - static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && - static_cast(render_mode) != BatchRenderMode::OnlyOpaque) ? - GPUPipeline::BlendOp::ReverseSubtract : - GPUPipeline::BlendOp::Add; + plconfig.blend.blend_op = + (static_cast(transparency_mode) == + GPUTransparencyMode::BackgroundMinusForeground && + static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && + static_cast(render_mode) != BatchRenderMode::OnlyOpaque) ? + GPUPipeline::BlendOp::ReverseSubtract : + GPUPipeline::BlendOp::Add; + } } - } - if (!(m_batch_pipelines[depth_test][render_mode][texture_mode][transparency_mode][dithering] - [interlacing] = g_gpu_device->CreatePipeline(plconfig))) - { - return false; - } + if (!(m_batch_pipelines[depth_test][transparency_mode][render_mode][texture_mode][dithering] + [interlacing][check_mask] = g_gpu_device->CreatePipeline(plconfig))) + { + return false; + } - progress.Increment(); + progress.Increment(); + } } } } @@ -1073,7 +1120,10 @@ bool GPU_HW::CompilePipelines() return false; } + plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; + // VRAM update depth + if (needs_depth_buffer) { std::unique_ptr fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GenerateVRAMUpdateDepthFragmentShader()); @@ -1323,7 +1373,7 @@ void GPU_HW::UpdateVRAMReadTexture(bool drawn, bool written) void GPU_HW::UpdateDepthBufferFromMaskBit() { - if (m_pgxp_depth_buffer) + if (m_pgxp_depth_buffer || !m_vram_depth_texture) return; // Viewport should already be set full, only need to fudge the scissor. @@ -1336,7 +1386,7 @@ void GPU_HW::UpdateDepthBufferFromMaskBit() // Restore. g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); - g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get()); + SetVRAMRenderTarget(); SetScissor(); } @@ -1389,13 +1439,18 @@ void GPU_HW::UnmapGPUBuffer(u32 used_vertices, u32 used_indices) ALWAYS_INLINE_RELEASE void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, u32 base_vertex) { - // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] - const u8 depth_test = m_batch.use_depth_buffer ? static_cast(2) : BoolToUInt8(m_batch.check_mask_before_draw); - g_gpu_device->SetPipeline( - m_batch_pipelines[depth_test][static_cast(render_mode)][static_cast(m_batch.texture_mode)][static_cast( - m_batch.transparency_mode)][BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)] - .get()); - g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex); + // [depth_test][transparency_mode][render_mode][texture_mode][dithering][interlacing][check_mask] + const u8 depth_test = BoolToUInt8(m_batch.use_depth_buffer); + const u8 check_mask = BoolToUInt8(m_batch.check_mask_before_draw); + g_gpu_device->SetPipeline(m_batch_pipelines[depth_test][static_cast(m_batch.transparency_mode)][static_cast( + render_mode)][static_cast(m_batch.texture_mode)][BoolToUInt8(m_batch.dithering)] + [BoolToUInt8(m_batch.interlacing)][check_mask] + .get()); + + if (render_mode != BatchRenderMode::ShaderBlend || m_supports_framebuffer_fetch) + g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex); + else + g_gpu_device->DrawIndexedWithBarrier(num_indices, base_index, base_vertex, GPUDevice::DrawBarrier::Full); } ALWAYS_INLINE_RELEASE void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices) @@ -1873,10 +1928,8 @@ void GPU_HW::LoadVertices() } else if (m_pgxp_depth_buffer) { - const bool use_depth = (m_batch.transparency_mode == GPUTransparencyMode::Disabled); - SetBatchDepthBuffer(use_depth); - if (use_depth) - CheckForDepthClear(vertices.data(), num_vertices); + SetBatchDepthBuffer(true); + CheckForDepthClear(vertices.data(), num_vertices); } } @@ -2410,10 +2463,11 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsTwoPassRendering() const (!m_supports_dual_source_blend && m_batch.transparency_mode != GPUTransparencyMode::Disabled))); } -ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsShaderBlending(GPUTransparencyMode transparency) const +ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsShaderBlending(GPUTransparencyMode transparency, bool check_mask) const { - return (m_supports_framebuffer_fetch && - (transparency == GPUTransparencyMode::BackgroundMinusForeground || + return (m_allow_shader_blend && + ((check_mask && (m_pgxp_depth_buffer || !m_vram_depth_texture)) || + transparency == GPUTransparencyMode::BackgroundMinusForeground || (!m_supports_dual_source_blend && (transparency != GPUTransparencyMode::Disabled || IsBlendedTextureFiltering(m_texture_filtering))))); } @@ -2472,7 +2526,7 @@ void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand() void GPU_HW::ResetBatchVertexDepth() { - if (m_pgxp_depth_buffer) + if (m_pgxp_depth_buffer || !m_vram_depth_texture) return; Log_PerfPrint("Resetting batch vertex depth"); @@ -2945,7 +2999,7 @@ void GPU_HW::DispatchRenderCommand() rc.transparency_enable ? m_draw_mode.mode_reg.transparency_mode : GPUTransparencyMode::Disabled; const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false; if (texture_mode != m_batch.texture_mode || transparency_mode != m_batch.transparency_mode || - (transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && !m_supports_framebuffer_fetch) || + (transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && !m_allow_shader_blend) || dithering_enable != m_batch.dithering) { FlushRender(); @@ -2956,8 +3010,9 @@ void GPU_HW::DispatchRenderCommand() if (m_batch_index_count == 0) { // transparency mode change + const bool check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; if (transparency_mode != GPUTransparencyMode::Disabled && - (texture_mode == GPUTextureMode::Disabled || !NeedsShaderBlending(transparency_mode))) + (texture_mode == GPUTextureMode::Disabled || !NeedsShaderBlending(transparency_mode, check_mask_before_draw))) { static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}}; @@ -2969,7 +3024,6 @@ void GPU_HW::DispatchRenderCommand() m_batch_ubo_data.u_dst_alpha_factor = dst_alpha_factor; } - const bool check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; const bool set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; if (m_batch.check_mask_before_draw != check_mask_before_draw || m_batch.set_mask_while_drawing != set_mask_while_drawing) @@ -3052,7 +3106,11 @@ void GPU_HW::FlushRender() if (m_wireframe_mode != GPUWireframeMode::OnlyWireframe) { - if (NeedsTwoPassRendering()) + if (NeedsShaderBlending(m_batch.transparency_mode, m_batch.check_mask_before_draw)) + { + DrawBatchVertices(BatchRenderMode::ShaderBlend, index_count, base_index, base_vertex); + } + else if (NeedsTwoPassRendering()) { DrawBatchVertices(BatchRenderMode::OnlyOpaque, index_count, base_index, base_vertex); DrawBatchVertices(BatchRenderMode::OnlyTransparent, index_count, base_index, base_vertex); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 35a0aa484..22b41aaeb 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -29,7 +29,8 @@ public: TransparencyDisabled, TransparentAndOpaque, OnlyOpaque, - OnlyTransparent + OnlyTransparent, + ShaderBlend }; GPU_HW(); @@ -115,6 +116,9 @@ private: u32 num_uniform_buffer_updates; }; + /// Returns true if a depth buffer should be created. + bool NeedsDepthBuffer() const; + bool CreateBuffers(); void ClearFramebuffer(); void DestroyBuffers(); @@ -131,6 +135,7 @@ private: void UpdateDepthBufferFromMaskBit(); void ClearDepthBuffer(); void SetScissor(); + void SetVRAMRenderTarget(); void MapGPUBuffer(u32 required_vertices, u32 required_indices); void UnmapGPUBuffer(u32 used_vertices, u32 used_indices); void DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, u32 base_vertex); @@ -158,7 +163,7 @@ private: bool NeedsTwoPassRendering() const; /// Returns true if the draw is going to use shader blending/framebuffer fetch. - bool NeedsShaderBlending(GPUTransparencyMode transparency) const; + bool NeedsShaderBlending(GPUTransparencyMode transparency, bool check_mask) const; void FillBackendCommandParameters(GPUBackendCommand* cmd) const; void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const; @@ -236,6 +241,8 @@ private: bool m_clamp_uvs : 1 = false; bool m_compute_uv_range : 1 = false; bool m_pgxp_depth_buffer : 1 = false; + bool m_allow_shader_blend : 1 = false; + bool m_prefer_shader_blend : 1 = false; u8 m_texpage_dirty = 0; BatchConfig m_batch; @@ -249,8 +256,8 @@ private: Common::Rectangle m_vram_dirty_write_rect; Common::Rectangle m_current_uv_range; - // [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing] - DimensionalArray, 2, 2, 5, 9, 4, 3> m_batch_pipelines{}; + // [depth_test][transparency_mode][render_mode][texture_mode][dithering][interlacing][check_mask] + DimensionalArray, 2, 2, 2, 9, 5, 5, 2> m_batch_pipelines{}; std::unique_ptr m_wireframe_pipeline; // [wrapped][interlaced] diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 0d4e6ad9c..874c13f62 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -632,19 +632,20 @@ void FilteredSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits, std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, GPUTransparencyMode transparency, GPUTextureMode texture_mode, - bool dithering, bool interlacing) + bool dithering, bool interlacing, bool check_mask) { - // Shouldn't be using shader blending without fbfetch. - DebugAssert(m_supports_framebuffer_fetch || transparency == GPUTransparencyMode::Disabled); + // TODO: don't write depth for shader blend + DebugAssert(transparency == GPUTransparencyMode::Disabled || render_mode == GPU_HW::BatchRenderMode::ShaderBlend); const GPUTextureMode actual_texture_mode = texture_mode & ~GPUTextureMode::RawTextureBit; const bool raw_texture = (texture_mode & GPUTextureMode::RawTextureBit) == GPUTextureMode::RawTextureBit; const bool textured = (texture_mode != GPUTextureMode::Disabled); - const bool use_framebuffer_fetch = (m_supports_framebuffer_fetch && transparency != GPUTransparencyMode::Disabled); - const bool use_dual_source = !use_framebuffer_fetch && m_supports_dual_source_blend && - ((render_mode != GPU_HW::BatchRenderMode::TransparencyDisabled && - render_mode != GPU_HW::BatchRenderMode::OnlyOpaque) || - m_texture_filter != GPUTextureFilter::Nearest); + const bool shader_blending = (render_mode == GPU_HW::BatchRenderMode::ShaderBlend && + (transparency != GPUTransparencyMode::Disabled || check_mask)); + const bool use_dual_source = (!shader_blending && m_supports_dual_source_blend && + ((render_mode != GPU_HW::BatchRenderMode::TransparencyDisabled && + render_mode != GPU_HW::BatchRenderMode::OnlyOpaque) || + m_texture_filter != GPUTextureFilter::Nearest)); std::stringstream ss; WriteHeader(ss); @@ -652,7 +653,8 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod DefineMacro(ss, "TRANSPARENCY_ONLY_OPAQUE", render_mode == GPU_HW::BatchRenderMode::OnlyOpaque); DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENT", render_mode == GPU_HW::BatchRenderMode::OnlyTransparent); DefineMacro(ss, "TRANSPARENCY_MODE", static_cast(transparency)); - DefineMacro(ss, "SHADER_BLENDING", use_framebuffer_fetch); + DefineMacro(ss, "SHADER_BLENDING", shader_blending); + DefineMacro(ss, "CHECK_MASK_BIT", check_mask); DefineMacro(ss, "TEXTURED", textured); DefineMacro(ss, "PALETTE", actual_texture_mode == GPUTextureMode::Palette4Bit || actual_texture_mode == GPUTextureMode::Palette8Bit); @@ -800,19 +802,19 @@ float3 ApplyDebanding(float2 frag_coord) DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, true, use_dual_source ? 2 : 1, !m_pgxp_depth, UsingMSAA(), UsingPerSampleShading(), - false, m_disable_color_perspective, use_framebuffer_fetch); + false, m_disable_color_perspective, shader_blending); } else { DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1, !m_pgxp_depth, UsingMSAA(), UsingPerSampleShading(), false, m_disable_color_perspective, - use_framebuffer_fetch); + shader_blending); } } else { DeclareFragmentEntryPoint(ss, 1, 0, {}, true, use_dual_source ? 2 : 1, !m_pgxp_depth, UsingMSAA(), - UsingPerSampleShading(), false, m_disable_color_perspective, use_framebuffer_fetch); + UsingPerSampleShading(), false, m_disable_color_perspective, shader_blending); } ss << R"( @@ -930,6 +932,11 @@ float3 ApplyDebanding(float2 frag_coord) float4 bg_col = LAST_FRAG_COLOR; float4 fg_col = float4(color, oalpha); + #if CHECK_MASK_BIT + if (bg_col.a != 0.0) + discard; + #endif + #if TEXTURE_FILTERING #if TRANSPARENCY_MODE == 0 || TRANSPARENCY_MODE == 3 bg_col.rgb /= ialpha; diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index ab89f93a8..bd41aea08 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -16,7 +16,8 @@ public: std::string GenerateBatchVertexShader(bool textured); std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, GPUTransparencyMode transparency, - GPUTextureMode texture_mode, bool dithering, bool interlacing); + GPUTextureMode texture_mode, bool dithering, bool interlacing, + bool check_mask); std::string GenerateWireframeGeometryShader(); std::string GenerateWireframeFragmentShader(); std::string GenerateVRAMReadFragmentShader();