From 4a5bbba2778efee7151ce5b5a876917ca10f1b49 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 30 May 2019 18:38:18 +0300 Subject: [PATCH] rsx: Enable MSAA - vk: Enable depth buffer resolve+unresolve - vk: Add AMD stenciling extension support - rsx: Temporarily disables MSAA-compatible hacks such as transparency AA - TODO: Add paths to optionally disable MSAA --- rpcs3/Emu/RSX/Common/TextureUtils.h | 3 +- rpcs3/Emu/RSX/Common/surface_store.h | 4 +- rpcs3/Emu/RSX/Common/surface_utils.h | 41 ++- rpcs3/Emu/RSX/Common/texture_cache.h | 8 +- rpcs3/Emu/RSX/GL/GLRenderTargets.h | 5 +- rpcs3/Emu/RSX/RSXThread.cpp | 4 +- rpcs3/Emu/RSX/VK/VKCompute.h | 70 +++-- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 46 ++- rpcs3/Emu/RSX/VK/VKHelpers.cpp | 37 +++ rpcs3/Emu/RSX/VK/VKHelpers.h | 46 ++- rpcs3/Emu/RSX/VK/VKOverlays.h | 11 +- rpcs3/Emu/RSX/VK/VKProgramBuffer.h | 50 +-- rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 26 +- rpcs3/Emu/RSX/VK/VKRenderPass.cpp | 2 +- rpcs3/Emu/RSX/VK/VKRenderTargets.h | 413 ++++++++++++++++++++----- rpcs3/Emu/RSX/VK/VKResolveHelper.cpp | 181 +++++++++++ rpcs3/Emu/RSX/VK/VKResolveHelper.h | 406 ++++++++++++++++++++++++ rpcs3/Emu/RSX/rsx_utils.h | 26 +- rpcs3/VKGSRender.vcxproj | 6 +- rpcs3/VKGSRender.vcxproj.filters | 10 +- 20 files changed, 1214 insertions(+), 181 deletions(-) create mode 100644 rpcs3/Emu/RSX/VK/VKResolveHelper.cpp create mode 100644 rpcs3/Emu/RSX/VK/VKResolveHelper.h diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index 356c466d81..c82d8a8181 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -38,7 +38,8 @@ namespace rsx enum surface_access : u32 { read = 0, - write = 1 + write = 1, + transfer = 2 }; //Sampled image descriptor diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 5e904f801a..8cde579508 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -677,11 +677,11 @@ namespace rsx { for (auto &tex_info : data) { - auto this_address = std::get<0>(tex_info); + const auto this_address = tex_info.first; if (this_address >= limit) continue; - auto surface = std::get<1>(tex_info).get(); + auto surface = tex_info.second.get(); const auto pitch = surface->get_rsx_pitch(); if (!rsx::pitch_compatible(surface, required_pitch, required_height)) continue; diff --git a/rpcs3/Emu/RSX/Common/surface_utils.h b/rpcs3/Emu/RSX/Common/surface_utils.h index f46c948010..f1e83f3667 100644 --- a/rpcs3/Emu/RSX/Common/surface_utils.h +++ b/rpcs3/Emu/RSX/Common/surface_utils.h @@ -11,7 +11,9 @@ namespace rsx enum surface_state_flags : u32 { ready = 0, - erase_bkgnd = 1 + erase_bkgnd = 1, + require_resolve = 2, + require_unresolve = 4 }; template @@ -119,8 +121,11 @@ namespace rsx u8 samples_x = 1; u8 samples_y = 1; + std::unique_ptr::type> resolve_surface; + flags32_t memory_usage_flags = surface_usage_flags::unknown; flags32_t state_flags = surface_state_flags::ready; + flags32_t msaa_flags = surface_state_flags::ready; union { @@ -410,7 +415,7 @@ namespace rsx } } - void on_write(u64 write_tag = 0) + void on_write(u64 write_tag = 0, rsx::surface_state_flags resolve_flags = surface_state_flags::require_resolve) { if (write_tag) { @@ -424,12 +429,22 @@ namespace rsx // HACK!! This should be cleared through memory barriers only state_flags = rsx::surface_state_flags::ready; + if (spp > 1) + { + msaa_flags = resolve_flags; + } + if (old_contents.source) { clear_rw_barrier(); } } + void on_write_copy(u64 write_tag = 0) + { + on_write(write_tag, rsx::surface_state_flags::require_unresolve); + } + // Returns the rect area occupied by this surface expressed as an 8bpp image with no AA areau get_normalized_memory_area() const { @@ -456,6 +471,17 @@ namespace rsx area.y2 /= samples_y; } + template + void transform_pixels_to_samples(area_base& area) + { + if (LIKELY(spp == 1)) return; + + area.x1 *= samples_x; + area.x2 *= samples_x; + area.y1 *= samples_y; + area.y2 *= samples_y; + } + template void transform_samples_to_pixels(T& x1, T& x2, T& y1, T& y2) { @@ -466,5 +492,16 @@ namespace rsx y1 /= samples_y; y2 /= samples_y; } + + template + void transform_pixels_to_samples(T& x1, T& x2, T& y1, T& y2) + { + if (LIKELY(spp == 1)) return; + + x1 *= samples_x; + x2 *= samples_x; + y1 *= samples_y; + y2 *= samples_y; + } }; } diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index ff76d8edd7..b1664c98a9 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -2564,7 +2564,7 @@ namespace rsx // Destination dimensions are relaxed (true) dst_area = dst_subres.get_src_area(); - dest_texture = dst_subres.surface->get_surface(rsx::surface_access::write); + dest_texture = dst_subres.surface->get_surface(rsx::surface_access::transfer); typeless_info.dst_context = texture_upload_context::framebuffer_storage; max_dst_width = (u16)(dst_subres.surface->get_surface_width(rsx::surface_metrics::samples) * typeless_info.dst_scaling_hint); @@ -2851,7 +2851,7 @@ namespace rsx } else { - dst_subres.surface->on_write(rsx::get_shared_tag()); + dst_subres.surface->on_write_copy(rsx::get_shared_tag()); m_rtts.notify_memory_structure_changed(); } @@ -2889,13 +2889,13 @@ namespace rsx } } - if (src_is_render_target) + if (0)//src_is_render_target) { // TODO: Specify typeless for high sample counts src_subres.surface->transform_samples_to_pixels(src_area); } - if (dst_is_render_target) + if (0)//dst_is_render_target) { // TODO: Specify typeless for high sample counts dst_subres.surface->transform_samples_to_pixels(dst_area); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index 82df7c90b9..045d531805 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -94,7 +94,7 @@ namespace gl static_cast(t)->release(); } - texture* get_surface(rsx::surface_access access_type) override + texture* get_surface(rsx::surface_access /*access_type*/) override { // TODO return (gl::texture*)this; @@ -259,7 +259,6 @@ struct gl_render_target_traits void invalidate_surface_contents(gl::command_context&, gl::render_target *surface, u32 address, size_t pitch) { surface->set_rsx_pitch((u16)pitch); - surface->set_aa_mode(rsx::surface_antialiasing::center_1_sample); surface->queue_tag(address); surface->last_use_tag = 0; surface->memory_usage_flags = rsx::surface_usage_flags::unknown; @@ -278,7 +277,7 @@ struct gl_render_target_traits } static - void notify_surface_persist(const std::unique_ptr& surface) + void notify_surface_persist(const std::unique_ptr& /*surface*/) {} static diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 50efb92aa8..fb18801480 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -658,8 +658,8 @@ namespace rsx auto alpha_ref = rsx::method_registers.alpha_ref() / 255.f; auto rop_control = rsx::method_registers.alpha_test_enabled()? 1u : 0u; - if (rsx::method_registers.msaa_alpha_to_coverage_enabled() && - rsx::method_registers.msaa_enabled() && + if (0 && + rsx::method_registers.msaa_alpha_to_coverage_enabled() && rsx::method_registers.surface_antialias() != rsx::surface_antialiasing::center_1_sample) { // Alpha values generate a coverage mask for order independent blending diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index b2e9105245..7097ba69ef 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -25,35 +25,49 @@ namespace vk u32 optimal_group_size = 1; u32 optimal_kernel_size = 1; + virtual std::vector> get_descriptor_layout() + { + std::vector> result; + result.push_back({ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1 }); + + if (uniform_inputs) + { + result.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1 }); + } + + return result; + } + void init_descriptors() { - VkDescriptorPoolSize descriptor_pool_sizes[2] = + std::vector descriptor_pool_sizes; + std::vector bindings; + + const auto layout = get_descriptor_layout(); + for (const auto &e : layout) { - { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_MAX_COMPUTE_TASKS }, - { VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_MAX_COMPUTE_TASKS } - }; + descriptor_pool_sizes.push_back({e.first, u32(VK_MAX_COMPUTE_TASKS * e.second)}); + + for (unsigned n = 0; n < e.second; ++n) + { + bindings.push_back + ({ + uint32_t(bindings.size()), + e.first, + 1, + VK_SHADER_STAGE_COMPUTE_BIT, + nullptr + }); + } + } // Reserve descriptor pools - m_descriptor_pool.create(*get_current_renderer(), descriptor_pool_sizes, 2, VK_MAX_COMPUTE_TASKS, 2); - - std::vector bindings(2); - - bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[0].descriptorCount = 1; - bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - bindings[0].binding = 0; - bindings[0].pImmutableSamplers = nullptr; - - bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[1].descriptorCount = 1; - bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - bindings[1].binding = 1; - bindings[1].pImmutableSamplers = nullptr; + m_descriptor_pool.create(*get_current_renderer(), descriptor_pool_sizes.data(), (u32)descriptor_pool_sizes.size(), VK_MAX_COMPUTE_TASKS, 2); VkDescriptorSetLayoutCreateInfo infos = {}; infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; infos.pBindings = bindings.data(); - infos.bindingCount = uniform_inputs? 2u : 1u; + infos.bindingCount = (u32)bindings.size(); CHECK_RESULT(vkCreateDescriptorSetLayout(*get_current_renderer(), &infos, nullptr, &m_descriptor_layout)); @@ -120,6 +134,9 @@ namespace vk virtual void bind_resources() {} + virtual void declare_inputs() + {} + void load_program(VkCommandBuffer cmd) { if (!m_program) @@ -143,8 +160,8 @@ namespace vk VkPipeline pipeline; vkCreateComputePipelines(*get_current_renderer(), nullptr, 1, &info, nullptr, &pipeline); - std::vector inputs; - m_program = std::make_unique(*get_current_renderer(), pipeline, inputs, inputs); + m_program = std::make_unique(*get_current_renderer(), pipeline); + declare_inputs(); } verify(HERE), m_used_descriptors < VK_MAX_COMPUTE_TASKS; @@ -164,10 +181,15 @@ namespace vk vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipeline_layout, 0, 1, &m_descriptor_set, 0, nullptr); } - virtual void run(VkCommandBuffer cmd, u32 num_invocations) + virtual void run(VkCommandBuffer cmd, u32 invocations_x, u32 invocations_y) { load_program(cmd); - vkCmdDispatch(cmd, num_invocations, 1, 1); + vkCmdDispatch(cmd, invocations_x, invocations_y, 1); + } + + virtual void run(VkCommandBuffer cmd, u32 num_invocations) + { + run(cmd, num_invocations, 1); } }; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 3d9614570d..3ac335f95d 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -89,10 +89,10 @@ namespace vk return std::make_pair(VK_FORMAT_R32G32B32A32_SFLOAT, vk::default_component_map()); case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: - return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, o_rgb); + return std::make_pair(VK_FORMAT_A1R5G5B5_UNORM_PACK16, o_rgb); case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: - return std::make_pair(VK_FORMAT_B8G8R8A8_UNORM, z_rgb); + return std::make_pair(VK_FORMAT_A1R5G5B5_UNORM_PACK16, z_rgb); case rsx::surface_color_format::b8: { @@ -539,7 +539,7 @@ VKGSRender::VKGSRender() : GSRender() else m_vertex_cache = std::make_unique(); - m_shaders_cache = std::make_unique(*m_prog_buffer, "vulkan", "v1.7"); + m_shaders_cache = std::make_unique(*m_prog_buffer, "vulkan", "v1.8"); open_command_buffer(); @@ -1566,7 +1566,11 @@ void VKGSRender::end() if (!image_ptr) { LOG_ERROR(RSX, "Texture upload failed to vtexture index %d. Binding null sampler.", i); - m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, rsx::constants::vertex_texture_names[i], m_current_frame->descriptor_set); + m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, + i, + ::glsl::program_domain::glsl_vertex_program, + m_current_frame->descriptor_set); + continue; } @@ -1623,16 +1627,6 @@ void VKGSRender::end() m_current_command_buffer->flags |= vk::command_buffer::cb_has_occlusion_task; } - // Final heap check... - check_heap_status(VK_HEAP_CHECK_VERTEX_STORAGE | VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE); - - // While vertex upload is an interruptible process, if we made it this far, there's no need to sync anything that occurs past this point - // Only textures are synchronized tightly with the GPU and they have been read back above - vk::enter_uninterruptible(); - - vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); - update_draw_state(); - // Apply write memory barriers if (true)//g_cfg.video.strict_rendering_mode) { @@ -1682,6 +1676,16 @@ void VKGSRender::end() } } + // Final heap check... + check_heap_status(VK_HEAP_CHECK_VERTEX_STORAGE | VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE); + + // While vertex upload is an interruptible process, if we made it this far, there's no need to sync anything that occurs past this point + // Only textures are synchronized tightly with the GPU and they have been read back above + vk::enter_uninterruptible(); + + vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); + update_draw_state(); + u32 sub_index = 0; rsx::method_registers.current_draw_clause.begin(); do @@ -2238,7 +2242,7 @@ void VKGSRender::frame_context_cleanup(frame_context_t *ctx, bool free_resources m_overlay_manager->dispose(uids_to_dispose); } - vk::reset_compute_tasks(); + vk::reset_global_resources(); m_attachment_clear_pass->free_resources(); m_depth_converter->free_resources(); @@ -2472,8 +2476,18 @@ bool VKGSRender::load_program() } } + const auto rasterization_samples = u8((m_current_renderpass_key >> 16) & 0xF); + if (rasterization_samples > 1) + { + properties.state.set_multisample_state( + rasterization_samples, + rsx::method_registers.msaa_sample_mask(), + rsx::method_registers.msaa_enabled(), + rsx::method_registers.msaa_alpha_to_coverage_enabled(), + rsx::method_registers.msaa_alpha_to_one_enabled()); + } + properties.renderpass_key = m_current_renderpass_key; - properties.num_targets = (u32)m_draw_buffers.size(); vk::enter_uninterruptible(); diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index 40630eebc0..d1dcad6405 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -3,6 +3,7 @@ #include "VKCompute.h" #include "VKRenderPass.h" #include "VKFramebuffer.h" +#include "VKResolveHelper.h" #include "Utilities/mutex.h" namespace vk @@ -234,11 +235,18 @@ namespace vk } } + void reset_global_resources() + { + vk::reset_compute_tasks(); + vk::reset_resolve_resources(); + } + void destroy_global_resources() { VkDevice dev = *g_current_renderer; vk::clear_renderpass_cache(dev); vk::clear_framebuffer_cache(); + vk::clear_resolve_helpers(); g_null_texture.reset(); g_null_image_view.reset(); @@ -416,6 +424,27 @@ namespace vk vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 1, &barrier, 0, nullptr); } + void insert_image_memory_barrier( + VkCommandBuffer cmd, VkImage image, + VkImageLayout current_layout, VkImageLayout new_layout, + VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, + VkAccessFlags src_mask, VkAccessFlags dst_mask, + const VkImageSubresourceRange& range) + { + VkImageMemoryBarrier barrier = {}; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.newLayout = new_layout; + barrier.oldLayout = current_layout; + barrier.image = image; + barrier.srcAccessMask = src_mask; + barrier.dstAccessMask = dst_mask; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.subresourceRange = range; + + vkCmdPipelineBarrier(cmd, src_stage, dst_stage, 0, 0, nullptr, 0, nullptr, 1, &barrier); + } + void change_image_layout(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, const VkImageSubresourceRange& range) { //Prepare an image to match the new layout.. @@ -615,6 +644,12 @@ namespace vk void insert_texture_barrier(VkCommandBuffer cmd, vk::image *image, VkImageLayout new_layout) { + if (image->samples() > 1) + { + // This barrier is pointless for multisampled images as they require a resolve operation before access anyway + return; + } + insert_texture_barrier(cmd, image->value, image->current_layout, new_layout, { image->aspect(), 0, 1, 0, 1 }); image->current_layout = new_layout; } @@ -835,6 +870,8 @@ namespace vk { if (msgFlags & VK_DEBUG_REPORT_ERROR_BIT_EXT) { + if (strstr(pMsg, "IMAGE_VIEW_TYPE_1D")) return false; + LOG_ERROR(RSX, "ERROR: [%s] Code %d : %s", pLayerPrefix, msgCode, pMsg); } else if (msgFlags & VK_DEBUG_REPORT_WARNING_BIT_EXT) diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 4792f15251..a1093cea4d 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -133,6 +133,7 @@ namespace vk void reset_compute_tasks(); void destroy_global_resources(); + void reset_global_resources(); /** * Allocate enough space in upload_buffer and write all mipmap/layer data into the subbuffer. @@ -171,6 +172,10 @@ namespace vk void insert_buffer_memory_barrier(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize length, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_mask, VkAccessFlags dst_mask); + + void insert_image_memory_barrier(VkCommandBuffer cmd, VkImage image, VkImageLayout current_layout, VkImageLayout new_layout, + VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src_mask, VkAccessFlags dst_mask, + const VkImageSubresourceRange& range); //Manage 'uininterruptible' state where secondary operations (e.g violation handlers) will have to wait void enter_uninterruptible(); @@ -1217,7 +1222,7 @@ namespace vk public: using image::image; - image_view* get_view(u32 remap_encoding, const std::pair, std::array>& remap, + virtual image_view* get_view(u32 remap_encoding, const std::pair, std::array>& remap, VkImageAspectFlags mask = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT) { auto found = views.equal_range(remap_encoding); @@ -2799,6 +2804,13 @@ public: VkPipelineColorBlendAttachmentState att_state[4]; VkPipelineColorBlendStateCreateInfo cs; VkPipelineRasterizationStateCreateInfo rs; + VkPipelineMultisampleStateCreateInfo ms; + + struct extra_parameters + { + VkSampleMask msaa_sample_mask; + } + temp_storage; graphics_pipeline_state() { @@ -2814,6 +2826,10 @@ public: rs.cullMode = VK_CULL_MODE_NONE; rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; rs.lineWidth = 1.f; + + ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + temp_storage.msaa_sample_mask = 0xFFFFFFFF; } graphics_pipeline_state(const graphics_pipeline_state& other) @@ -2973,6 +2989,27 @@ public: cs.attachmentCount = count; cs.pAttachments = att_state; } + + void set_multisample_state(u8 sample_count, u32 sample_mask, bool msaa_enabled, bool alpha_to_coverage, bool alpha_to_one) + { + temp_storage.msaa_sample_mask = sample_mask; + + ms.rasterizationSamples = static_cast(sample_count); + ms.alphaToCoverageEnable = alpha_to_coverage; + ms.alphaToOneEnable = alpha_to_one; + + if (!msaa_enabled) + { + // This register is likely glMinSampleShading but in reverse; probably sets max sample shading rate of 1 + // I (kd-11) suspect its what the control panel setting affects when MSAA is set to disabled + } + } + + void set_multisample_shading_rate(float shading_rate) + { + ms.sampleShadingEnable = VK_TRUE; + ms.minSampleShading = shading_rate; + } }; namespace glsl @@ -3095,21 +3132,24 @@ public: std::array vs_texture_bindings; bool linked; + void create_impl(); + public: VkPipeline pipeline; u64 attribute_location_mask; u64 vertex_attributes_mask; program(VkDevice dev, VkPipeline p, const std::vector &vertex_input, const std::vector& fragment_inputs); + program(VkDevice dev, VkPipeline p); program(const program&) = delete; program(program&& other) = delete; ~program(); - program& load_uniforms(::glsl::program_domain domain, const std::vector& inputs); + program& load_uniforms(const std::vector& inputs); program& link(); bool has_uniform(program_input_type type, const std::string &uniform_name); - void bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string &uniform_name, VkDescriptorSet &descriptor_set); + void bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string &uniform_name, VkDescriptorType type, VkDescriptorSet &descriptor_set); void bind_uniform(const VkDescriptorImageInfo &image_descriptor, int texture_unit, ::glsl::program_domain domain, VkDescriptorSet &descriptor_set, bool is_stencil_mirror = false); void bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, uint32_t binding_point, VkDescriptorSet &descriptor_set); void bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name, VkDescriptorSet &descriptor_set); diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h index b5efdabaf0..cf11294db1 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.h +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -80,7 +80,7 @@ namespace vk bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; bindings[0].descriptorCount = 1; - bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; bindings[0].binding = 0; bindings[0].pImmutableSamplers = nullptr; @@ -187,11 +187,6 @@ namespace vk vp.scissorCount = 1; vp.viewportCount = 1; - VkPipelineMultisampleStateCreateInfo ms = {}; - ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - ms.pSampleMask = NULL; - ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - VkPipeline pipeline; VkGraphicsPipelineCreateInfo info = {}; info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; @@ -199,7 +194,7 @@ namespace vk info.pInputAssemblyState = &renderpass_config.ia; info.pRasterizationState = &renderpass_config.rs; info.pColorBlendState = &renderpass_config.cs; - info.pMultisampleState = &ms; + info.pMultisampleState = &renderpass_config.ms; info.pViewportState = &vp; info.pDepthStencilState = &renderpass_config.ds; info.stageCount = 2; @@ -253,7 +248,7 @@ namespace vk for (int n = 0; n < src.size(); ++n) { VkDescriptorImageInfo info = { m_sampler->value, src[n]->value, src[n]->image()->current_layout }; - program->bind_uniform(info, "fs" + std::to_string(n), m_descriptor_set); + program->bind_uniform(info, "fs" + std::to_string(n), VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, m_descriptor_set); } vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, program->pipeline); diff --git a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h index 6e0b42c9fc..68d5ea1395 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h +++ b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h @@ -11,36 +11,38 @@ namespace vk struct pipeline_props { graphics_pipeline_state state; - int num_targets; u64 renderpass_key; bool operator==(const pipeline_props& other) const { - if (memcmp(&state.att_state[0], &other.state.att_state[0], sizeof(VkPipelineColorBlendAttachmentState))) - return false; - if (renderpass_key != other.renderpass_key) return false; - if (memcmp(&state.rs, &other.state.rs, sizeof(VkPipelineRasterizationStateCreateInfo))) - return false; - - //Cannot memcmp cs due to pAttachments being a pointer to memory - if (state.cs.attachmentCount != other.state.cs.attachmentCount || - state.cs.flags != other.state.cs.flags || - state.cs.logicOp != other.state.cs.logicOp || - state.cs.logicOpEnable != other.state.cs.logicOpEnable || - state.cs.sType != other.state.cs.sType || - memcmp(state.cs.blendConstants, other.state.cs.blendConstants, 4 * sizeof(f32))) - return false; - if (memcmp(&state.ia, &other.state.ia, sizeof(VkPipelineInputAssemblyStateCreateInfo))) return false; + if (memcmp(&state.att_state[0], &other.state.att_state[0], sizeof(VkPipelineColorBlendAttachmentState))) + return false; + + if (memcmp(&state.rs, &other.state.rs, sizeof(VkPipelineRasterizationStateCreateInfo))) + return false; + + // Cannot memcmp cs due to pAttachments being a pointer to memory + if (state.cs.logicOp != other.state.cs.logicOp || + state.cs.logicOpEnable != other.state.cs.logicOpEnable || + memcmp(state.cs.blendConstants, other.state.cs.blendConstants, 4 * sizeof(f32))) + return false; + if (memcmp(&state.ds, &other.state.ds, sizeof(VkPipelineDepthStencilStateCreateInfo))) return false; - return num_targets == other.num_targets; + if (state.ms.rasterizationSamples != VK_SAMPLE_COUNT_1_BIT) + { + if (memcmp(&state.ms, &other.state.ms, sizeof(VkPipelineMultisampleStateCreateInfo))) + return false; + } + + return true; } }; } @@ -50,10 +52,11 @@ namespace rpcs3 template <> size_t hash_struct(const vk::pipeline_props &pipelineProperties) { - size_t seed = hash_base(pipelineProperties.num_targets); + size_t seed = hash_base(pipelineProperties.renderpass_key); seed ^= hash_struct(pipelineProperties.state.ia); seed ^= hash_struct(pipelineProperties.state.ds); seed ^= hash_struct(pipelineProperties.state.rs); + seed ^= hash_struct(pipelineProperties.state.ms); // Do not compare pointers to memory! VkPipelineColorBlendStateCreateInfo tmp; @@ -134,10 +137,13 @@ struct VKTraits vp.viewportCount = 1; vp.scissorCount = 1; - VkPipelineMultisampleStateCreateInfo ms = {}; - ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - ms.pSampleMask = NULL; - ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + VkPipelineMultisampleStateCreateInfo ms = pipelineProperties.state.ms; + verify("Multisample state mismatch!" HERE), ms.rasterizationSamples == VkSampleCountFlagBits((pipelineProperties.renderpass_key >> 16) & 0xF); + if (ms.rasterizationSamples != VK_SAMPLE_COUNT_1_BIT) + { + // Update the sample mask pointer + ms.pSampleMask = &pipelineProperties.state.temp_storage.msaa_sample_mask; + } // Rebase pointers from pipeline structure in case it is moved/copied VkPipelineColorBlendStateCreateInfo cs = pipelineProperties.state.cs; diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index 731561e537..e83b1d31e0 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -9,13 +9,9 @@ namespace vk { using namespace ::glsl; - program::program(VkDevice dev, VkPipeline p, const std::vector &vertex_input, const std::vector& fragment_inputs) - : m_device(dev), pipeline(p) + void program::create_impl() { linked = false; - - load_uniforms(program_domain::glsl_vertex_program, vertex_input); - load_uniforms(program_domain::glsl_vertex_program, fragment_inputs); attribute_location_mask = 0; vertex_attributes_mask = 0; @@ -24,12 +20,26 @@ namespace vk vs_texture_bindings.fill(~0u); } + program::program(VkDevice dev, VkPipeline p, const std::vector &vertex_input, const std::vector& fragment_inputs) + : m_device(dev), pipeline(p) + { + create_impl(); + load_uniforms(vertex_input); + load_uniforms(fragment_inputs); + } + + program::program(VkDevice dev, VkPipeline p) + : m_device(dev), pipeline(p) + { + create_impl(); + } + program::~program() { vkDestroyPipeline(m_device, pipeline, nullptr); } - program& program::load_uniforms(program_domain domain, const std::vector& inputs) + program& program::load_uniforms(const std::vector& inputs) { verify("Cannot change uniforms in already linked program!" HERE), !linked; @@ -92,7 +102,7 @@ namespace vk return false; } - void program::bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string& uniform_name, VkDescriptorSet &descriptor_set) + void program::bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string& uniform_name, VkDescriptorType type, VkDescriptorSet &descriptor_set) { for (const auto &uniform : uniforms[program_input_type::input_type_texture]) { @@ -106,7 +116,7 @@ namespace vk uniform.location, // dstBinding 0, // dstArrayElement 1, // descriptorCount - VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, // descriptorType + type, // descriptorType &image_descriptor, // pImageInfo nullptr, // pBufferInfo nullptr // pTexelBufferView diff --git a/rpcs3/Emu/RSX/VK/VKRenderPass.cpp b/rpcs3/Emu/RSX/VK/VKRenderPass.cpp index efd5ccd95f..c7f1d8d624 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderPass.cpp +++ b/rpcs3/Emu/RSX/VK/VKRenderPass.cpp @@ -119,7 +119,7 @@ namespace vk } // Decode - VkSampleCountFlagBits samples = VkSampleCountFlagBits((renderpass_key >> 16) & 0x1F); + VkSampleCountFlagBits samples = VkSampleCountFlagBits((renderpass_key >> 16) & 0xF); std::vector rtv_layouts; VkImageLayout dsv_layout; diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 34e35c3fa8..c7465aef5c 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -11,6 +11,9 @@ namespace vk { + void resolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src); + void unresolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src); + struct render_target : public viewable_image, public rsx::ref_counted, public rsx::render_target_descriptor { u64 frame_tag = 0; // frame id when invalidated, 0 if not invalid @@ -19,8 +22,14 @@ namespace vk vk::viewable_image* get_surface(rsx::surface_access access_type) override { - // TODO - return (vk::viewable_image*)this; + if (spp == 1 || access_type == rsx::surface_access::write) + { + return this; + } + + // A read barrier should have been called before this! + verify("Read access without explicit barrier" HERE), resolve_surface, !(msaa_flags & rsx::surface_state_flags::require_resolve); + return resolve_surface.get(); } bool is_depth_surface() const override @@ -39,58 +48,282 @@ namespace vk return (rsx::apply_resolution_scale(_width, true) == width()) && (rsx::apply_resolution_scale(_height, true) == height()); } - void memory_barrier(vk::command_buffer& cmd, bool force_init = false) + image_view* get_view(u32 remap_encoding, const std::pair, std::array>& remap, + VkImageAspectFlags mask = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT) override + { + if (remap_encoding != 0xDEADBEEF && resolve_surface) + { + return resolve_surface->get_view(remap_encoding, remap, mask); + } + else + { + if (remap_encoding == 0xDEADBEEF) + { + // Special encoding to skip the resolve target fetch + remap_encoding = 0xAAE4; + } + + return vk::viewable_image::get_view(remap_encoding, remap, mask); + } + } + + void resolve(vk::command_buffer& cmd) + { + VkImageSubresourceRange range = { aspect(), 0, 1, 0, 1 }; + + // NOTE: This surface can only be in the ATTACHMENT_OPTIMAL layout + // The resolve surface can be in any type of access, but we have to assume it is likely in read-only mode like shader read-only + + if (LIKELY(!is_depth_surface())) + { + verify(HERE), current_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + // This is the source; finish writing before reading + vk::insert_image_memory_barrier( + cmd, this->value, + this->current_layout, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, + range); + + // This is the target; finish reading before writing + vk::insert_image_memory_barrier( + cmd, resolve_surface->value, + resolve_surface->current_layout, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + range); + + this->current_layout = VK_IMAGE_LAYOUT_GENERAL; + resolve_surface->current_layout = VK_IMAGE_LAYOUT_GENERAL; + } + else + { + this->push_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + } + + vk::resolve_image(cmd, resolve_surface.get(), this); + + if (LIKELY(!is_depth_surface())) + { + vk::insert_image_memory_barrier( + cmd, this->value, + this->current_layout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + range); + + vk::insert_image_memory_barrier( + cmd, resolve_surface->value, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + range); + + this->current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + resolve_surface->current_layout = VK_IMAGE_LAYOUT_GENERAL; + } + else + { + this->pop_layout(cmd); + resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL); + } + + msaa_flags &= ~(rsx::surface_state_flags::require_resolve); + } + + void unresolve(vk::command_buffer& cmd) + { + verify(HERE), !(msaa_flags & rsx::surface_state_flags::require_resolve); + VkImageSubresourceRange range = { aspect(), 0, 1, 0, 1 }; + + if (LIKELY(!is_depth_surface())) + { + verify(HERE), current_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + // This is the dest; finish reading before writing + vk::insert_image_memory_barrier( + cmd, this->value, + this->current_layout, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + range); + + // This is the source; finish writing before reading + vk::insert_image_memory_barrier( + cmd, resolve_surface->value, + resolve_surface->current_layout, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, + range); + + this->current_layout = VK_IMAGE_LAYOUT_GENERAL; + resolve_surface->current_layout = VK_IMAGE_LAYOUT_GENERAL; + } + else + { + this->push_layout(cmd, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + + vk::unresolve_image(cmd, this, resolve_surface.get()); + + if (LIKELY(!is_depth_surface())) + { + vk::insert_image_memory_barrier( + cmd, this->value, + this->current_layout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT, + range); + + vk::insert_image_memory_barrier( + cmd, resolve_surface->value, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + range); + + this->current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + resolve_surface->current_layout = VK_IMAGE_LAYOUT_GENERAL; + } + else + { + this->pop_layout(cmd); + resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL); + } + + msaa_flags &= ~(rsx::surface_state_flags::require_unresolve); + } + + void memory_barrier(vk::command_buffer& cmd, rsx::surface_access access) { // Helper to optionally clear/initialize memory contents depending on barrier type - auto clear_surface_impl = [&]() + auto clear_surface_impl = [&cmd, this](vk::image* surface) { - push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - VkImageSubresourceRange range{ aspect(), 0, 1, 0, 1 }; + const auto optimal_layout = (surface->current_layout == VK_IMAGE_LAYOUT_GENERAL) ? + VK_IMAGE_LAYOUT_GENERAL : + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - if (aspect() & VK_IMAGE_ASPECT_COLOR_BIT) + surface->push_layout(cmd, optimal_layout); + + VkImageSubresourceRange range{ surface->aspect(), 0, 1, 0, 1 }; + if (surface->aspect() & VK_IMAGE_ASPECT_COLOR_BIT) { VkClearColorValue color{}; - vkCmdClearColorImage(cmd, value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &color, 1, &range); + vkCmdClearColorImage(cmd, surface->value, surface->current_layout, &color, 1, &range); } else { VkClearDepthStencilValue clear{ 1.f, 255 }; - vkCmdClearDepthStencilImage(cmd, value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear, 1, &range); + vkCmdClearDepthStencilImage(cmd, surface->value, surface->current_layout, &clear, 1, &range); } - pop_layout(cmd); - state_flags &= ~rsx::surface_state_flags::erase_bkgnd; - }; + surface->pop_layout(cmd); - auto null_transfer_impl = [&]() - { - if (dirty() && (force_init || state_flags & rsx::surface_state_flags::erase_bkgnd)) + if (surface == this) { - // Initialize memory contents if we did not find anything usable - // TODO: Properly sync with Cell - clear_surface_impl(); - on_write(); - } - else - { - verify(HERE), state_flags == rsx::surface_state_flags::ready; + state_flags &= ~rsx::surface_state_flags::erase_bkgnd; } }; - if (!old_contents) + auto get_resolve_target = [&]() { - null_transfer_impl(); + if (!resolve_surface) + { + // Create a resolve surface + auto pdev = vk::get_current_renderer(); + const auto resolve_w = width() * samples_x; + const auto resolve_h = height() * samples_y; + + VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + usage |= (this->info.usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)); + + resolve_surface.reset(new vk::viewable_image( + *pdev, + pdev->get_memory_mapping().device_local, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + VK_IMAGE_TYPE_2D, + format(), + resolve_w, resolve_h, 1, 1, 1, + VK_SAMPLE_COUNT_1_BIT, + VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_TILING_OPTIMAL, + usage, + 0)); + + resolve_surface->native_component_map = native_component_map; + resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL); + } + + return resolve_surface.get(); + }; + + const bool read_access = (access != rsx::surface_access::write); + if (spp > 1 && read_access) + { + get_resolve_target(); + } + + if (old_contents && !rsx::pitch_compatible(this, static_cast(old_contents.source))) + { + LOG_TRACE(RSX, "Pitch mismatch, could not transfer inherited memory"); + clear_rw_barrier(); + } + + if (LIKELY(!old_contents)) + { + if (state_flags & rsx::surface_state_flags::erase_bkgnd) + { + clear_surface_impl(this); + + if (resolve_surface && read_access) + { + // Only clear the resolve surface if reading from it, otherwise it's a waste + clear_surface_impl(resolve_surface.get()); + } + + on_write(rsx::get_shared_tag(), rsx::surface_state_flags::ready); + } + else if (msaa_flags & rsx::surface_state_flags::require_resolve) + { + if (read_access) + { + // Only do this step when read access is required + resolve(cmd); + } + } + else if (msaa_flags & rsx::surface_state_flags::require_unresolve) + { + if (!read_access) + { + // Only do this step when it is needed to start rendering + unresolve(cmd); + } + } + return; } auto src_texture = static_cast(old_contents.source); - if (!rsx::pitch_compatible(this, src_texture)) - { - LOG_TRACE(RSX, "Pitch mismatch, could not transfer inherited memory"); - - clear_rw_barrier(); - return; - } + src_texture->read_barrier(cmd); const auto src_bpp = src_texture->get_bpp(); const auto dst_bpp = get_bpp(); @@ -116,29 +349,52 @@ namespace vk vk::blitter hw_blitter; old_contents.init_transfer(this); - if (state_flags & rsx::surface_state_flags::erase_bkgnd) + auto src_area = old_contents.src_rect(); + auto dst_area = old_contents.dst_rect(); + + src_texture->transform_pixels_to_samples(src_area); + this->transform_pixels_to_samples(dst_area); + + vk::image *target_image = (spp > 1) ? get_resolve_target() : this; + if (dst_area.x1 == 0 && dst_area.y1 == 0 && + unsigned(dst_area.x2) == target_image->width() && unsigned(dst_area.y2) == target_image->height()) { - const auto area = old_contents.dst_rect(); - if (area.x1 > 0 || area.y1 > 0 || unsigned(area.x2) < width() || unsigned(area.y2) < height()) - { - clear_surface_impl(); - } - else - { - state_flags &= ~rsx::surface_state_flags::erase_bkgnd; - } + // Skip a bunch of useless work + state_flags &= ~(rsx::surface_state_flags::erase_bkgnd); + msaa_flags = rsx::surface_state_flags::ready; + } + else if (state_flags & rsx::surface_state_flags::erase_bkgnd) + { + clear_surface_impl(target_image); + + state_flags &= ~(rsx::surface_state_flags::erase_bkgnd); + msaa_flags = rsx::surface_state_flags::ready; + } + else if (msaa_flags & rsx::surface_state_flags::require_resolve) + { + // Need to forward resolve this + resolve(cmd); } - hw_blitter.scale_image(cmd, old_contents.source, this, - old_contents.src_rect(), - old_contents.dst_rect(), + hw_blitter.scale_image( + cmd, + src_texture->get_surface(rsx::surface_access::read), + this->get_surface(rsx::surface_access::transfer), + src_area, + dst_area, /*linear?*/false, /*depth?(unused)*/false, typeless_info); - on_write(); + on_write_copy(); + + if (!read_access && spp > 1) + { + // Write barrier, must initialize + unresolve(cmd); + } } - void read_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, true); } - void write_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, false); } + void read_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::read); } + void write_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::write); } }; static inline vk::render_target* as_rtt(vk::image* t) @@ -164,8 +420,19 @@ namespace rsx rsx::surface_antialiasing antialias, vk::render_device &device, vk::command_buffer& cmd) { - auto fmt = vk::get_compatible_surface_format(format); + const auto fmt = vk::get_compatible_surface_format(format); + const auto spp = get_format_sample_count(antialias); VkFormat requested_format = fmt.first; + VkImageUsageFlags usage_flags = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + + if (antialias == rsx::surface_antialiasing::center_1_sample) + { + usage_flags |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + } + else + { + usage_flags |= VK_IMAGE_USAGE_STORAGE_BIT; + } std::unique_ptr rtt; rtt = std::make_unique(device, device.get_memory_mapping().device_local, @@ -173,13 +440,13 @@ namespace rsx VK_IMAGE_TYPE_2D, requested_format, static_cast(rsx::apply_resolution_scale((u16)width, true)), static_cast(rsx::apply_resolution_scale((u16)height, true)), 1, 1, 1, - VK_SAMPLE_COUNT_1_BIT, + static_cast(spp), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_TRANSFER_DST_BIT|VK_IMAGE_USAGE_SAMPLED_BIT, + usage_flags, 0); - change_image_layout(cmd, rtt.get(), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)); + rtt->change_layout(cmd, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); rtt->set_format(format); rtt->set_aa_mode(antialias); @@ -187,7 +454,7 @@ namespace rsx rtt->state_flags = rsx::surface_state_flags::erase_bkgnd; rtt->native_component_map = fmt.second; rtt->rsx_pitch = (u16)pitch; - rtt->native_pitch = (u16)width * get_format_block_size_in_bytes(format) * get_format_sample_count(antialias); + rtt->native_pitch = (u16)width * get_format_block_size_in_bytes(format) * rtt->samples_x; rtt->surface_width = (u16)width; rtt->surface_height = (u16)height; rtt->queue_tag(address); @@ -203,13 +470,14 @@ namespace rsx rsx::surface_antialiasing antialias, vk::render_device &device, vk::command_buffer& cmd) { - VkFormat requested_format = vk::get_compatible_depth_surface_format(device.get_formats_support(), format); - VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_DEPTH_BIT); + const VkFormat requested_format = vk::get_compatible_depth_surface_format(device.get_formats_support(), format); + const auto spp = get_format_sample_count(antialias); + VkImageUsageFlags usage_flags = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; - if (requested_format != VK_FORMAT_D16_UNORM) - range.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; - - const auto scale = rsx::get_resolution_scale(); + if (antialias == rsx::surface_antialiasing::center_1_sample) + { + usage_flags |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + } std::unique_ptr ds; ds = std::make_unique(device, device.get_memory_mapping().device_local, @@ -217,12 +485,13 @@ namespace rsx VK_IMAGE_TYPE_2D, requested_format, static_cast(rsx::apply_resolution_scale((u16)width, true)), static_cast(rsx::apply_resolution_scale((u16)height, true)), 1, 1, 1, - VK_SAMPLE_COUNT_1_BIT, + static_cast(spp), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL, - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT| VK_IMAGE_USAGE_TRANSFER_SRC_BIT| VK_IMAGE_USAGE_TRANSFER_DST_BIT|VK_IMAGE_USAGE_SAMPLED_BIT, + usage_flags, 0); + ds->change_layout(cmd, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); ds->set_format(format); ds->set_aa_mode(antialias); @@ -230,9 +499,7 @@ namespace rsx ds->state_flags = rsx::surface_state_flags::erase_bkgnd; ds->native_component_map = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; - change_image_layout(cmd, ds.get(), VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range); - - ds->native_pitch = (u16)width * 2 * get_format_sample_count(antialias); + ds->native_pitch = (u16)width * 2 * ds->samples_x; if (format == rsx::surface_depth_format::z24s8) ds->native_pitch *= 2; @@ -261,7 +528,7 @@ namespace rsx VK_IMAGE_TYPE_2D, ref->format(), new_w, new_h, 1, 1, 1, - (VkSampleCountFlagBits)ref->samples(), + static_cast(ref->samples()), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL, ref->info.usage, @@ -273,12 +540,16 @@ namespace rsx sink->memory_usage_flags = rsx::surface_usage_flags::storage; sink->state_flags = rsx::surface_state_flags::erase_bkgnd; sink->native_component_map = ref->native_component_map; - sink->native_pitch = u16(prev.width * ref->get_bpp() * ref->get_spp()); + sink->native_pitch = u16(prev.width * ref->get_bpp() * ref->samples_x); sink->surface_width = prev.width; sink->surface_height = prev.height; sink->queue_tag(address); - change_image_layout(cmd, sink.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + const auto best_layout = (ref->info.usage & VK_IMAGE_USAGE_SAMPLED_BIT) ? + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : + ref->current_layout; + + sink->change_layout(cmd, best_layout); } prev.target = sink.get(); @@ -314,7 +585,10 @@ namespace rsx static void prepare_surface_for_sampling(vk::command_buffer& cmd, vk::render_target *surface) { - surface->change_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + if (surface->info.usage & VK_IMAGE_USAGE_SAMPLED_BIT) + { + surface->change_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } } static bool surface_is_pitch_compatible(const std::unique_ptr &surface, size_t pitch) @@ -325,7 +599,6 @@ namespace rsx static void invalidate_surface_contents(vk::command_buffer& /*cmd*/, vk::render_target *surface, u32 address, size_t pitch) { surface->rsx_pitch = (u16)pitch; - surface->set_aa_mode(rsx::surface_antialiasing::center_1_sample); surface->queue_tag(address); surface->last_use_tag = 0; surface->memory_usage_flags = rsx::surface_usage_flags::unknown; @@ -345,7 +618,7 @@ namespace rsx surface->release(); } - static void notify_surface_persist(const std::unique_ptr &surface) + static void notify_surface_persist(const std::unique_ptr& /*surface*/) {} static void notify_surface_reused(const std::unique_ptr &surface) diff --git a/rpcs3/Emu/RSX/VK/VKResolveHelper.cpp b/rpcs3/Emu/RSX/VK/VKResolveHelper.cpp new file mode 100644 index 0000000000..94de236266 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKResolveHelper.cpp @@ -0,0 +1,181 @@ +#include "stdafx.h" + +#include "VKResolveHelper.h" +#include "VKRenderPass.h" + +namespace +{ + const char *get_format_prefix(VkFormat format) + { + switch (format) + { + case VK_FORMAT_R5G6B5_UNORM_PACK16: + return "r16ui"; + case VK_FORMAT_B8G8R8A8_UNORM: + return "rgba8"; + case VK_FORMAT_R16G16B16A16_SFLOAT: + return "rgba16f"; + case VK_FORMAT_R32G32B32A32_SFLOAT: + return "rgba32f"; + case VK_FORMAT_A1R5G5B5_UNORM_PACK16: + return "r16ui"; + case VK_FORMAT_R8_UNORM: + return "r8"; + case VK_FORMAT_R8G8_UNORM: + return "rg8"; + case VK_FORMAT_R32_SFLOAT: + return "r32f"; + default: + fmt::throw_exception("Unhandled VkFormat 0x%x" HERE, u32(format)); + } + } +} + +namespace vk +{ + std::unordered_map> g_resolve_helpers; + std::unordered_map> g_unresolve_helpers; + std::unique_ptr g_depth_resolver; + std::unique_ptr g_depth_unresolver; + std::unique_ptr g_depthstencil_resolverAMD; + std::unique_ptr g_depthstencil_unresolverAMD; + + void resolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src) + { + if (src->aspect() == VK_IMAGE_ASPECT_COLOR_BIT) + { + auto &job = g_resolve_helpers[src->format()]; + + if (!job) + { + job.reset(new vk::cs_resolve_task(get_format_prefix(src->format()))); + } + + job->run(cmd, src, dst); + } + else + { + std::vector surface = { dst }; + auto& dev = cmd.get_command_pool().get_owner(); + + const auto key = vk::get_renderpass_key(surface); + auto renderpass = vk::get_renderpass(dev, key); + + if (src->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT) + { + if (!g_depthstencil_resolverAMD) + { + g_depthstencil_resolverAMD.reset(new vk::depthstencil_resolve_AMD()); + g_depthstencil_resolverAMD->create(dev); + } + + g_depthstencil_resolverAMD->run(cmd, src, dst, renderpass); + } + else + { + if (!g_depth_resolver) + { + g_depth_resolver.reset(new vk::depthonly_resolve()); + g_depth_resolver->create(dev); + } + + g_depth_resolver->run(cmd, src, dst, renderpass); + } + } + } + + void unresolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src) + { + if (src->aspect() == VK_IMAGE_ASPECT_COLOR_BIT) + { + auto &job = g_unresolve_helpers[src->format()]; + + if (!job) + { + job.reset(new vk::cs_unresolve_task(get_format_prefix(src->format()))); + } + + job->run(cmd, dst, src); + } + else + { + std::vector surface = { dst }; + auto& dev = cmd.get_command_pool().get_owner(); + + const auto key = vk::get_renderpass_key(surface); + auto renderpass = vk::get_renderpass(dev, key); + + if (src->aspect() & VK_IMAGE_ASPECT_STENCIL_BIT) + { + if (!g_depthstencil_unresolverAMD) + { + g_depthstencil_unresolverAMD.reset(new vk::depthstencil_unresolve_AMD()); + g_depthstencil_unresolverAMD->create(dev); + } + + g_depthstencil_unresolverAMD->run(cmd, dst, src, renderpass); + } + else + { + if (!g_depth_unresolver) + { + g_depth_unresolver.reset(new vk::depthonly_unresolve()); + g_depth_unresolver->create(dev); + } + + g_depth_unresolver->run(cmd, dst, src, renderpass); + } + } + } + + void clear_resolve_helpers() + { + for (auto &task : g_resolve_helpers) + { + task.second->destroy(); + } + + for (auto &task : g_unresolve_helpers) + { + task.second->destroy(); + } + + g_resolve_helpers.clear(); + g_unresolve_helpers.clear(); + + if (g_depth_resolver) + { + g_depth_resolver->destroy(); + g_depth_resolver.reset(); + } + + if (g_depthstencil_resolverAMD) + { + g_depthstencil_resolverAMD->destroy(); + g_depthstencil_resolverAMD.reset(); + } + + if (g_depth_unresolver) + { + g_depth_unresolver->destroy(); + g_depth_unresolver.reset(); + } + + if (g_depthstencil_unresolverAMD) + { + g_depthstencil_unresolverAMD->destroy(); + g_depthstencil_unresolverAMD.reset(); + } + } + + void reset_resolve_resources() + { + for (auto &e : g_resolve_helpers) e.second->free_resources(); + for (auto &e : g_unresolve_helpers) e.second->free_resources(); + + if (g_depth_resolver) g_depth_resolver->free_resources(); + if (g_depth_unresolver) g_depth_unresolver->free_resources(); + if (g_depthstencil_resolverAMD) g_depthstencil_resolverAMD->free_resources(); + if (g_depthstencil_unresolverAMD) g_depthstencil_unresolverAMD->free_resources(); + } +} \ No newline at end of file diff --git a/rpcs3/Emu/RSX/VK/VKResolveHelper.h b/rpcs3/Emu/RSX/VK/VKResolveHelper.h new file mode 100644 index 0000000000..8839e43e89 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKResolveHelper.h @@ -0,0 +1,406 @@ +#pragma once + +#include "VKHelpers.h" +#include "VKCompute.h" +#include "VKOverlays.h" + +namespace vk +{ + struct cs_resolve_base : compute_task + { + vk::viewable_image* multisampled; + vk::viewable_image* resolve; + + u32 cs_wave_x = 1; + u32 cs_wave_y = 1; + + cs_resolve_base() + {} + + virtual ~cs_resolve_base() + {} + + void build(const std::string& kernel, const std::string& format_prefix, int direction) + { + create(); + + // TODO: Tweak occupancy + switch (optimal_group_size) + { + default: + case 64: + cs_wave_x = 8; + cs_wave_y = 8; + break; + case 32: + cs_wave_x = 8; + cs_wave_y = 4; + break; + } + + const std::pair syntax_replace[] = + { + { "%wx", std::to_string(cs_wave_x) }, + { "%wy", std::to_string(cs_wave_y) }, + }; + + m_src = + "#version 430\n" + "layout(local_size_x=%wx, local_size_y=%wy, local_size_z=1) in;\n" + "\n"; + + m_src = fmt::replace_all(m_src, syntax_replace); + + if (direction == 0) + { + m_src += + "layout(set=0, binding=0, " + format_prefix + ") uniform readonly restrict image2DMS multisampled;\n" + "layout(set=0, binding=1) uniform writeonly restrict image2D resolve;\n"; + } + else + { + m_src += + "layout(set=0, binding=0) uniform writeonly restrict image2DMS multisampled;\n" + "layout(set=0, binding=1, " + format_prefix + ") uniform readonly restrict image2D resolve;\n"; + } + + m_src += + "\n" + "void main()\n" + "{\n" + " ivec2 resolve_size = imageSize(resolve);\n" + " ivec2 aa_size = imageSize(multisampled);\n" + " ivec2 sample_count = resolve_size / aa_size;\n" + "\n" + " if (any(greaterThanEqual(gl_GlobalInvocationID.xy, uvec2(resolve_size)))) return;" + "\n" + " ivec2 resolve_coords = ivec2(gl_GlobalInvocationID.xy);\n" + " ivec2 aa_coords = resolve_coords / sample_count;\n" + " ivec2 sample_loc = ivec2(resolve_coords % sample_count);\n" + " int sample_index = sample_loc.x + (sample_loc.y * sample_count.y);\n" + + kernel + + "}\n"; + + LOG_ERROR(RSX, "Compute shader:\n%s", m_src); + } + + std::vector> get_descriptor_layout() override + { + return + { + { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2 } + }; + } + + void declare_inputs() override + { + std::vector inputs = + { + { + ::glsl::program_domain::glsl_compute_program, + vk::glsl::program_input_type::input_type_texture, + {}, {}, + 0, + "multisampled" + }, + { + ::glsl::program_domain::glsl_compute_program, + vk::glsl::program_input_type::input_type_texture, + {}, {}, + 1, + "resolve" + } + }; + + m_program->load_uniforms(inputs); + } + + void bind_resources() override + { + auto msaa_view = multisampled->get_view(0xDEADBEEF, rsx::default_remap_vector); + auto resolved_view = resolve->get_view(0xAAE4, rsx::default_remap_vector); + m_program->bind_uniform({ VK_NULL_HANDLE, msaa_view->value, multisampled->current_layout }, "multisampled", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, m_descriptor_set); + m_program->bind_uniform({ VK_NULL_HANDLE, resolved_view->value, resolve->current_layout }, "resolve", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, m_descriptor_set); + } + + void run(VkCommandBuffer cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image) + { + verify(HERE), msaa_image->samples() > 1, resolve_image->samples() == 1; + + multisampled = msaa_image; + resolve = resolve_image; + + const u32 invocations_x = align(resolve_image->width(), cs_wave_x) / cs_wave_x; + const u32 invocations_y = align(resolve_image->height(), cs_wave_y) / cs_wave_y; + + compute_task::run(cmd, invocations_x, invocations_y); + } + }; + + struct cs_resolve_task : cs_resolve_base + { + cs_resolve_task(const std::string& format_prefix) + { + std::string kernel = + " vec4 aa_sample = imageLoad(multisampled, aa_coords, sample_index);\n" + " imageStore(resolve, resolve_coords, aa_sample);\n"; + + build(kernel, format_prefix, 0); + } + }; + + struct cs_unresolve_task : cs_resolve_base + { + cs_unresolve_task(const std::string& format_prefix) + { + std::string kernel = + " vec4 resolved_sample = imageLoad(resolve, resolve_coords);\n" + " imageStore(multisampled, aa_coords, sample_index, resolved_sample);\n"; + + build(kernel, format_prefix, 1); + } + }; + + struct depth_resolve_base : public overlay_pass + { + u8 samples_x = 1; + u8 samples_y = 1; + + depth_resolve_base() + { + renderpass_config.set_depth_mask(true); + renderpass_config.enable_depth_test(VK_COMPARE_OP_ALWAYS); + } + + void build(const std::string& kernel, const std::string& extensions, bool stencil_texturing, bool input_is_multisampled) + { + vs_src = + "#version 450\n" + "#extension GL_ARB_separate_shader_objects : enable\n\n" + "\n" + "void main()\n" + "{\n" + " vec2 positions[] = {vec2(-1., -1.), vec2(1., -1.), vec2(-1., 1.), vec2(1., 1.)};\n" + " gl_Position = vec4(positions[gl_VertexIndex % 4], 0., 1.);\n" + "}\n"; + + fs_src = + "#version 420\n" + "#extension GL_ARB_separate_shader_objects : enable\n"; + fs_src += extensions + + "\n" + "layout(std140, set=0, binding=0) uniform static_data{ ivec4 regs[8]; };\n" + "layout(set=0, binding=1) uniform sampler2D fs0;\n"; + + if (stencil_texturing) + { + m_num_usable_samplers = 2; + + fs_src += + "layout(set=0, binding=2) uniform usampler2D fs1;\n"; + } + + fs_src += + "layout(pixel_center_integer) in vec4 gl_FragCoord;\n" + "\n" + "void main()\n" + "{\n"; + fs_src += kernel + + "}\n"; + + if (input_is_multisampled) + { + auto sampler_loc = fs_src.find("sampler2D fs0"); + fs_src.insert(sampler_loc + 9, "MS"); + + if (stencil_texturing) + { + sampler_loc = fs_src.find("sampler2D fs1"); + fs_src.insert(sampler_loc + 9, "MS"); + } + } + + LOG_ERROR(RSX, "Resolve shader:\n%s", fs_src); + } + + void update_uniforms(vk::glsl::program* /*program*/) override + { + m_ubo_offset = (u32)m_ubo.alloc<256>(8); + auto dst = (s32*)m_ubo.map(m_ubo_offset, 128); + dst[0] = samples_x; + dst[1] = samples_y; + m_ubo.unmap(); + } + + void update_sample_configuration(vk::image* msaa_image) + { + switch (msaa_image->samples()) + { + case 1: + fmt::throw_exception("MSAA input not multisampled!" HERE); + case 2: + samples_x = 2; + samples_y = 1; + break; + case 4: + samples_x = samples_y = 2; + break; + default: + fmt::throw_exception("Unsupported sample count %d" HERE, msaa_image->samples()); + } + } + }; + + struct depthonly_resolve : depth_resolve_base + { + depthonly_resolve() + { + build( + " ivec2 out_coord = ivec2(gl_FragCoord.xy);\n" + " ivec2 in_coord = (out_coord / regs[0].xy);\n" + " ivec2 sample_loc = out_coord % ivec2(regs[0].xy);\n" + " int sample_index = sample_loc.x + (sample_loc.y * regs[0].y);\n" + " float frag_depth = texelFetch(fs0, in_coord, sample_index).x;\n" + " gl_FragDepth = frag_depth;\n", + "", + false, + true); + } + + void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass) + { + update_sample_configuration(msaa_image); + auto src_view = msaa_image->get_view(0xDEADBEEF, rsx::default_remap_vector); + + overlay_pass::run( + cmd, + (u16)resolve_image->width(), (u16)resolve_image->height(), + resolve_image, src_view, + render_pass); + } + }; + + struct depthonly_unresolve : depth_resolve_base + { + depthonly_unresolve() + { + build( + " ivec2 pixel_coord = ivec2(gl_FragCoord.xy);\n" + " pixel_coord *= regs[0].xy;\n" + " pixel_coord.x += (gl_SampleID % regs[0].x);\n" + " pixel_coord.y += (gl_SampleID / regs[0].x);\n" + " float frag_depth = texelFetch(fs0, pixel_coord, 0).x;\n" + " gl_FragDepth = frag_depth;\n", + "", + false, + false); + } + + void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass) + { + renderpass_config.set_multisample_state(msaa_image->samples(), 0xFFFF, true, false, false); + renderpass_config.set_multisample_shading_rate(1.f); + update_sample_configuration(msaa_image); + + auto src_view = resolve_image->get_view(0xAAE4, rsx::default_remap_vector); + + overlay_pass::run( + cmd, + (u16)msaa_image->width(), (u16)msaa_image->height(), + msaa_image, src_view, + render_pass); + } + }; + + struct depthstencil_resolve_AMD : depth_resolve_base + { + depthstencil_resolve_AMD() + { + renderpass_config.enable_stencil_test( + VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, // Always replace + VK_COMPARE_OP_ALWAYS, // Always pass + 0xFF, // Full write-through + 0); // Unused + + m_num_usable_samplers = 2; + + build( + " ivec2 out_coord = ivec2(gl_FragCoord.xy);\n" + " ivec2 in_coord = (out_coord / regs[0].xy);\n" + " ivec2 sample_loc = out_coord % ivec2(regs[0].xy);\n" + " int sample_index = sample_loc.x + (sample_loc.y * regs[0].y);\n" + " float frag_depth = texelFetch(fs0, in_coord, sample_index).x;\n" + " uint frag_stencil = texelFetch(fs1, in_coord, sample_index).x;\n" + " gl_FragDepth = frag_depth;\n" + " gl_FragStencilRefARB = int(frag_stencil);\n", + + "#extension GL_ARB_shader_stencil_export : enable\n", + + true, + true); + } + + void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass) + { + update_sample_configuration(msaa_image); + auto depth_view = msaa_image->get_view(0xDEADBEEF, rsx::default_remap_vector, VK_IMAGE_ASPECT_DEPTH_BIT); + auto stencil_view = msaa_image->get_view(0xDEADBEEF, rsx::default_remap_vector, VK_IMAGE_ASPECT_STENCIL_BIT); + + overlay_pass::run( + cmd, + (u16)resolve_image->width(), (u16)resolve_image->height(), + resolve_image, { depth_view, stencil_view }, + render_pass); + } + }; + + struct depthstencil_unresolve_AMD : depth_resolve_base + { + depthstencil_unresolve_AMD() + { + renderpass_config.enable_stencil_test( + VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE, // Always replace + VK_COMPARE_OP_ALWAYS, // Always pass + 0xFF, // Full write-through + 0); // Unused + + m_num_usable_samplers = 2; + + build( + " ivec2 pixel_coord = ivec2(gl_FragCoord.xy);\n" + " pixel_coord *= regs[0].xy;\n" + " pixel_coord.x += (gl_SampleID % regs[0].x);\n" + " pixel_coord.y += (gl_SampleID / regs[0].x);\n" + " float frag_depth = texelFetch(fs0, pixel_coord, 0).x;\n" + " uint frag_stencil = texelFetch(fs1, pixel_coord, 0).x;\n" + " gl_FragDepth = frag_depth;\n" + " gl_FragStencilRefARB = int(frag_stencil);\n", + + "#extension GL_ARB_shader_stencil_export : enable\n", + + true, + false); + } + + void run(vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image, VkRenderPass render_pass) + { + renderpass_config.set_multisample_state(msaa_image->samples(), 0xFFFF, true, false, false); + renderpass_config.set_multisample_shading_rate(1.f); + update_sample_configuration(msaa_image); + + auto depth_view = resolve_image->get_view(0xAAE4, rsx::default_remap_vector, VK_IMAGE_ASPECT_DEPTH_BIT); + auto stencil_view = resolve_image->get_view(0xAAE4, rsx::default_remap_vector, VK_IMAGE_ASPECT_STENCIL_BIT); + + overlay_pass::run( + cmd, + (u16)msaa_image->width(), (u16)msaa_image->height(), + msaa_image, { depth_view, stencil_view }, + render_pass); + } + }; + + void resolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src); + void unresolve_image(vk::command_buffer& cmd, vk::viewable_image* dst, vk::viewable_image* src); + void reset_resolve_resources(); + void clear_resolve_helpers(); +} diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index e1a4712e26..df208f7c28 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -577,20 +577,24 @@ namespace rsx template std::tuple get_transferable_region(const SurfaceType* surface) { - const u16 src_w = surface->old_contents.source->width(); - const u16 src_h = surface->old_contents.source->height(); - u16 dst_w = src_w; - u16 dst_h = src_h; - auto src = static_cast(surface->old_contents.source); - dst_w = (dst_w * src->samples_x) / surface->samples_x; - dst_h = (dst_h * src->samples_y) / surface->samples_y; + auto area1 = surface->get_normalized_memory_area(); + auto area2 = surface->get_normalized_memory_area(); - const f32 scale_x = (f32)dst_w / src_w; - const f32 scale_y = (f32)dst_h / src_h; + auto w = std::min(area1.x2, area2.x2); + auto h = std::min(area1.y2, area2.y2); - std::tie(std::ignore, std::ignore, dst_w, dst_h) = clip_region(dst_w, dst_h, 0, 0, surface->width(), surface->height(), true); - return std::make_tuple(u16(dst_w / scale_x), u16(dst_h / scale_y), dst_w, dst_h); + const auto src_scale_x = src->get_bpp() * src->samples_x; + const auto src_scale_y = src->samples_y; + const auto dst_scale_x = surface->get_bpp() * surface->samples_x; + const auto dst_scale_y = surface->samples_y; + + const u16 src_w = u16(w / src_scale_x); + const u16 src_h = u16(h / src_scale_y); + const u16 dst_w = u16(w / dst_scale_x); + const u16 dst_h = u16(h / dst_scale_y); + + return std::make_tuple(src_w, src_h, dst_w, dst_h); } template diff --git a/rpcs3/VKGSRender.vcxproj b/rpcs3/VKGSRender.vcxproj index 6c47594e9a..6f18249ee2 100644 --- a/rpcs3/VKGSRender.vcxproj +++ b/rpcs3/VKGSRender.vcxproj @@ -34,6 +34,7 @@ + @@ -45,9 +46,10 @@ - - + + + diff --git a/rpcs3/VKGSRender.vcxproj.filters b/rpcs3/VKGSRender.vcxproj.filters index c0440bbb31..27b5c369fe 100644 --- a/rpcs3/VKGSRender.vcxproj.filters +++ b/rpcs3/VKGSRender.vcxproj.filters @@ -45,9 +45,12 @@ Source Files - + Source Files + + + Source Files Source Files @@ -86,9 +89,12 @@ Source Files - + Source Files + + + Source Files Source Files