From 0d207a6c0d4754ed468a90a4a0ca93936a24f948 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 21 Sep 2016 16:41:29 +0300 Subject: [PATCH] GL/Vulkan: Bug fixes and improvements; alphakill for vulkan (#2146) * vk: Zero-initialize some more structs * gl: Clean up fragment program generation code * vk: Enable alpha kill * vk: Fix surface clear; redirect output for surface_type:b * vk: Tie renderpass to program object to avoid incompatible passes --- rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp | 63 +++++++++------------ rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 75 +++++++++++++++++-------- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 78 ++++++++++++++++---------- rpcs3/Emu/RSX/VK/VKHelpers.h | 4 +- rpcs3/Emu/RSX/VK/VKProgramBuffer.h | 2 + 5 files changed, 134 insertions(+), 88 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index b8326498a3..99495b1253 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -88,10 +88,10 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS) int index = atoi(&PI.name.data()[3]); OS << "uniform " << samplerType << " " << PI.name << ";" << std::endl; - OS << "uniform " << "vec4 " << "f" << PI.name << "_cm = vec4(1.0);" << std::endl; } } + OS << std::endl; OS << "layout(std140, binding = 2) uniform FragmentConstantsBuffer" << std::endl; OS << "{" << std::endl; @@ -147,41 +147,42 @@ namespace OS << " vec4 fogc = clamp(vec4(" << fog_func << ", 0., 0.), 0., 1.);\n"; } - void insert_texture_fetch(std::stringstream & OS, const RSXFragmentProgram& prog, const ParamArray& param) + void insert_texture_scale(std::stringstream & OS, const RSXFragmentProgram& prog, int index) { - OS << "vec4 texture_fetch(int index, vec4 coord)\n{\n"; - OS << " switch (index)\n\t{\n"; + std::string vec_type = "vec2"; - for (u8 id = 0; id < 16; id++) + switch (prog.get_texture_dimension(index)) { - if (prog.textures_alpha_kill[id]) - { - OS << " case " + std::to_string(id) + ": return "; - - switch (prog.get_texture_dimension(id)) - { - case rsx::texture_dimension_extended::texture_dimension_1d: OS << "texture(tex" + std::to_string(id) + ", coord.x)"; break; - case rsx::texture_dimension_extended::texture_dimension_2d: OS << "texture(tex" + std::to_string(id) + ", coord.xy)"; break; - case rsx::texture_dimension_extended::texture_dimension_3d: - case rsx::texture_dimension_extended::texture_dimension_cubemap: OS << "texture(tex" + std::to_string(id) + ", coord.xyz)"; break; - - default: OS << "vec4(0.0)"; - } - - OS << ";\n"; - } + case rsx::texture_dimension_extended::texture_dimension_1d: vec_type = "float"; break; + case rsx::texture_dimension_extended::texture_dimension_2d: vec_type = "vec2"; break; + case rsx::texture_dimension_extended::texture_dimension_3d: + case rsx::texture_dimension_extended::texture_dimension_cubemap: vec_type = "vec3"; } - OS << " default: return vec4(0.0);\n"; - OS << " }\n"; - OS << "}\n"; + if (prog.unnormalized_coords & (1 << index)) + OS << "\t" << vec_type << " tex" << index << "_coord_scale = 1. / textureSize(tex" << index << ", 0);\n"; + else + OS << "\t" << vec_type << " tex" << index << "_coord_scale = " << vec_type << "(1.);\n"; + } + + std::string insert_texture_fetch(const RSXFragmentProgram& prog, int index) + { + std::string tex_name = "tex" + std::to_string(index); + std::string coord_name = "tc" + std::to_string(index); + + switch (prog.get_texture_dimension(index)) + { + case rsx::texture_dimension_extended::texture_dimension_1d: return "texture(" + tex_name + ", (" + coord_name + ".x * " + tex_name + "_coord_scale))"; + case rsx::texture_dimension_extended::texture_dimension_2d: return "texture(" + tex_name + ", (" + coord_name + ".xy * " + tex_name + "_coord_scale))"; + case rsx::texture_dimension_extended::texture_dimension_3d: + case rsx::texture_dimension_extended::texture_dimension_cubemap: return "texture(" + tex_name + ", (" + coord_name + ".xyz * " + tex_name + "_coord_scale))"; + } } } void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS) { insert_glsl_legacy_function(OS); - insert_texture_fetch(OS, m_prog, m_parr); OS << "void main ()" << std::endl; OS << "{" << std::endl; @@ -209,14 +210,7 @@ void GLFragmentDecompilerThread::insertMainStart(std::stringstream & OS) std::string samplerType = PT.type; int index = atoi(&PI.name.data()[3]); - if (m_prog.unnormalized_coords & (1 << index)) - { - OS << " vec2 tex" << index << "_coord_scale = 1. / textureSize(" << PI.name << ", 0);\n"; - } - else - { - OS << " vec2 tex" << index << "_coord_scale = vec2(1.);\n"; - } + insert_texture_scale(OS, m_prog, index); } } @@ -292,8 +286,7 @@ void GLFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) { if (m_prog.textures_alpha_kill[index]) { - std::string index_string = std::to_string(index); - std::string fetch_texture = "texture_fetch(" + index_string + ", tc" + index_string + " * ftex" + index_string + "_cm).a"; + std::string fetch_texture = insert_texture_fetch(m_prog, index) + ".a"; OS << make_comparison_test((rsx::comparison_function)m_prog.textures_zfunc[index], "", "0", fetch_texture); } } diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index f63c39c0b5..6bef080282 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -108,10 +108,18 @@ void VKFragmentDecompilerThread::insertOutputs(std::stringstream & OS) { "ocol3", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r4" : "h8" }, }; - for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) + //We always bind the first usable image to index 0, even if surface type is surface_type::b + //If only surface 1 is being written to, redirect to output 0 + + if (m_parr.HasParam(PF_PARAM_NONE, "vec4", table[1].second) && !m_parr.HasParam(PF_PARAM_NONE, "vec4", table[0].second)) + OS << "layout(location=0) out vec4 " << table[1].first << ";" << std::endl; + else { - if (m_parr.HasParam(PF_PARAM_NONE, "vec4", table[i].second)) - OS << "layout(location=" << i << ") " << "out vec4 " << table[i].first << ";" << std::endl; + for (int i = 0; i < sizeof(table) / sizeof(*table); ++i) + { + if (m_parr.HasParam(PF_PARAM_NONE, "vec4", table[i].second)) + OS << "layout(location=" << i << ") " << "out vec4 " << table[i].first << ";" << std::endl; + } } } @@ -203,6 +211,20 @@ namespace vk return; } } + + std::string insert_texture_fetch(const RSXFragmentProgram& prog, int index) + { + std::string tex_name = "tex" + std::to_string(index); + std::string coord_name = "tc" + std::to_string(index); + + switch (prog.get_texture_dimension(index)) + { + case rsx::texture_dimension_extended::texture_dimension_1d: return "texture(" + tex_name + ", " + coord_name + ".x)"; + case rsx::texture_dimension_extended::texture_dimension_2d: return "texture(" + tex_name + ", " + coord_name + ".xy)"; + case rsx::texture_dimension_extended::texture_dimension_3d: + case rsx::texture_dimension_extended::texture_dimension_cubemap: return "texture(" + tex_name + ", " + coord_name + ".xyz)"; + } + } } void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) @@ -314,27 +336,36 @@ void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) if (!first_output_name.empty()) { - switch (m_prog.alpha_func) + auto make_comparison_test = [](rsx::comparison_function compare_func, const std::string &test, const std::string &a, const std::string &b) -> std::string { - case rsx::comparison_function::equal: - OS << " if (bool(alpha_test) && " << first_output_name << ".a != alpha_ref) discard;\n"; - break; - case rsx::comparison_function::not_equal: - OS << " if (bool(alpha_test) && " << first_output_name << ".a == alpha_ref) discard;\n"; - break; - case rsx::comparison_function::less_or_equal: - OS << " if (bool(alpha_test) && " << first_output_name << ".a > alpha_ref) discard;\n"; - break; - case rsx::comparison_function::less: - OS << " if (bool(alpha_test) && " << first_output_name << ".a >= alpha_ref) discard;\n"; - break; - case rsx::comparison_function::greater: - OS << " if (bool(alpha_test) && " << first_output_name << ".a <= alpha_ref) discard;\n"; - break; - case rsx::comparison_function::greater_or_equal: - OS << " if (bool(alpha_test) && " << first_output_name << ".a < alpha_ref) discard;\n"; - break; + if (compare_func == rsx::comparison_function::always) return{}; + + if (compare_func == rsx::comparison_function::never) return " discard;\n"; + + std::string compare; + switch (compare_func) + { + case rsx::comparison_function::equal: compare = " == "; break; + case rsx::comparison_function::not_equal: compare = " != "; break; + case rsx::comparison_function::less_or_equal: compare = " <= "; break; + case rsx::comparison_function::less: compare = " < "; break; + case rsx::comparison_function::greater: compare = " > "; break; + case rsx::comparison_function::greater_or_equal: compare = " >= "; break; + } + + return " if (" + test + "!(" + a + compare + b + ")) discard;\n"; + }; + + for (u8 index = 0; index < 16; ++index) + { + if (m_prog.textures_alpha_kill[index]) + { + std::string fetch_texture = vk::insert_texture_fetch(m_prog, index) + ".a"; + OS << make_comparison_test((rsx::comparison_function)m_prog.textures_zfunc[index], "", "0", fetch_texture); + } } + + OS << make_comparison_test(m_prog.alpha_func, "bool(alpha_test) && ", first_output_name + ".a", "alpha_ref"); } OS << "}" << std::endl; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index df5a8c1f12..b95491b2d3 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -842,8 +842,7 @@ void VKGSRender::clear_surface(u32 mask) { // Ignore clear if surface target is set to CELL_GCM_SURFACE_TARGET_NONE if (rsx::method_registers.surface_color_target() == rsx::surface_target::none) return; - - //TODO: Build clear commands into current renderpass descriptor set + if (!(mask & 0xF3)) return; if (m_current_present_image == 0xFFFF) return; @@ -851,11 +850,22 @@ void VKGSRender::clear_surface(u32 mask) float depth_clear = 1.f; u32 stencil_clear = 0; + u32 depth_stencil_mask = 0; + + std::vector clear_descriptors; + std::vector clear_regions; VkClearValue depth_stencil_clear_values, color_clear_values; - VkImageSubresourceRange depth_range = vk::get_image_subresource_range(0, 0, 1, 1, 0); - rsx::surface_depth_format surface_depth_format = rsx::method_registers.surface_depth_fmt(); + u16 scissor_x = rsx::method_registers.scissor_origin_x(); + u16 scissor_w = rsx::method_registers.scissor_width(); + u16 scissor_y = rsx::method_registers.scissor_origin_y(); + u16 scissor_h = rsx::method_registers.scissor_height(); + + VkClearRect region = { { { scissor_x, scissor_y },{ scissor_w, scissor_h } }, 0, 1 }; + + auto targets = vk::get_draw_buffers(rsx::method_registers.surface_color_target()); + auto surface_depth_format = rsx::method_registers.surface_depth_fmt(); if (mask & 0x1) { @@ -864,19 +874,23 @@ void VKGSRender::clear_surface(u32 mask) u32 clear_depth = rsx::method_registers.z_clear_value(); float depth_clear = (float)clear_depth / max_depth_value; - depth_range.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT; depth_stencil_clear_values.depthStencil.depth = depth_clear; depth_stencil_clear_values.depthStencil.stencil = stencil_clear; + + depth_stencil_mask |= VK_IMAGE_ASPECT_DEPTH_BIT; } if (mask & 0x2) { - u8 clear_stencil = rsx::method_registers.stencil_clear_value(); - u32 stencil_mask = rsx::method_registers.stencil_mask(); + if (surface_depth_format == rsx::surface_depth_format::z24s8) + { + u8 clear_stencil = rsx::method_registers.stencil_clear_value(); + u32 stencil_mask = rsx::method_registers.stencil_mask(); - //TODO set stencil mask - depth_range.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; - depth_stencil_clear_values.depthStencil.stencil = stencil_mask; + depth_stencil_clear_values.depthStencil.stencil = stencil_mask; + + depth_stencil_mask |= VK_IMAGE_ASPECT_STENCIL_BIT; + } } if (mask & 0xF0) @@ -886,38 +900,44 @@ void VKGSRender::clear_surface(u32 mask) u8 clear_g = rsx::method_registers.clear_color_g(); u8 clear_b = rsx::method_registers.clear_color_b(); - //TODO set color mask - /*VkBool32 clear_red = (VkBool32)!!(mask & 0x20); - VkBool32 clear_green = (VkBool32)!!(mask & 0x40); - VkBool32 clear_blue = (VkBool32)!!(mask & 0x80); - VkBool32 clear_alpha = (VkBool32)!!(mask & 0x10);*/ - color_clear_values.color.float32[0] = (float)clear_r / 255; color_clear_values.color.float32[1] = (float)clear_g / 255; color_clear_values.color.float32[2] = (float)clear_b / 255; color_clear_values.color.float32[3] = (float)clear_a / 255; - for (u32 i = 0; i < m_rtts.m_bound_render_targets.size(); ++i) + for (int index = 0; index < targets.size(); ++index) { - VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT); - if (std::get<1>(m_rtts.m_bound_render_targets[i]) == nullptr) continue; - - VkImage color_image = std::get<1>(m_rtts.m_bound_render_targets[i])->value; - change_image_layout(m_command_buffer, color_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, range); - vkCmdClearColorImage(m_command_buffer, color_image, VK_IMAGE_LAYOUT_GENERAL, &color_clear_values.color, 1, &range); - change_image_layout(m_command_buffer, color_image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, range); + clear_descriptors.push_back({ VK_IMAGE_ASPECT_COLOR_BIT, (uint32_t)index, color_clear_values }); + clear_regions.push_back(region); } } if (mask & 0x3) { - VkImageAspectFlags depth_stencil_aspect = (surface_depth_format == rsx::surface_depth_format::z24s8) ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) : VK_IMAGE_ASPECT_DEPTH_BIT; - VkImage depth_stencil_image = std::get<1>(m_rtts.m_bound_depth_stencil)->value; - change_image_layout(m_command_buffer, depth_stencil_image, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, vk::get_image_subresource_range(0, 0, 1, 1, depth_stencil_aspect)); - vkCmdClearDepthStencilImage(m_command_buffer, std::get<1>(m_rtts.m_bound_depth_stencil)->value, VK_IMAGE_LAYOUT_GENERAL, &depth_stencil_clear_values.depthStencil, 1, &depth_range); - change_image_layout(m_command_buffer, depth_stencil_image, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, depth_stencil_aspect)); + clear_descriptors.push_back({ (VkImageAspectFlags)depth_stencil_mask, 0, depth_stencil_clear_values }); + clear_regions.push_back(region); } + size_t idx = vk::get_render_pass_location( + vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first, + vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, surface_depth_format), + (u8)targets.size()); + VkRenderPass current_render_pass = m_render_passes[idx]; + + VkRenderPassBeginInfo rp_begin = {}; + rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + rp_begin.renderPass = current_render_pass; + rp_begin.framebuffer = m_framebuffer_to_clean.back()->value; + rp_begin.renderArea.offset.x = 0; + rp_begin.renderArea.offset.y = 0; + rp_begin.renderArea.extent.width = m_framebuffer_to_clean.back()->width(); + rp_begin.renderArea.extent.height = m_framebuffer_to_clean.back()->height(); + + vkCmdBeginRenderPass(m_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); + + vkCmdClearAttachments(m_command_buffer, clear_descriptors.size(), clear_descriptors.data(), clear_regions.size(), clear_regions.data()); + + vkCmdEndRenderPass(m_command_buffer); } void VKGSRender::sync_at_semaphore_release() diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index b9539fe5f8..bfdccbd97f 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -165,7 +165,7 @@ namespace vk float queue_priorities[1] = { 0.f }; pgpu = &pdev; - VkDeviceQueueCreateInfo queue; + VkDeviceQueueCreateInfo queue = {}; queue.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; queue.pNext = NULL; queue.queueFamilyIndex = graphics_queue_idx; @@ -183,7 +183,7 @@ namespace vk if (g_cfg_rsx_debug_output) layers.push_back("VK_LAYER_LUNARG_standard_validation"); - VkDeviceCreateInfo device; + VkDeviceCreateInfo device = {}; device.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; device.pNext = NULL; device.queueCreateInfoCount = 1; diff --git a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h index a5cd609b14..6d17620489 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h +++ b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h @@ -29,6 +29,8 @@ namespace vk return false; if (memcmp(&rs, &other.rs, sizeof(VkPipelineRasterizationStateCreateInfo))) return false; + if (render_pass != other.render_pass) + return false; return num_targets == other.num_targets; }