From 9609767c516b039761120fc866a29a8d17254b38 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 25 Apr 2021 23:57:26 +0300 Subject: [PATCH] vk: Refactor render target code --- rpcs3/Emu/CMakeLists.txt | 1 + rpcs3/Emu/RSX/VK/VKRenderTargets.cpp | 565 ++++++++++++++++++++++++++ rpcs3/Emu/RSX/VK/VKRenderTargets.h | 580 +-------------------------- rpcs3/VKGSRender.vcxproj | 1 + rpcs3/VKGSRender.vcxproj.filters | 7 +- 5 files changed, 589 insertions(+), 565 deletions(-) create mode 100644 rpcs3/Emu/RSX/VK/VKRenderTargets.cpp diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index d6c669fc2a..e4a2ebe256 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -485,6 +485,7 @@ if(TARGET 3rdparty_vulkan) RSX/VK/VKProgramPipeline.cpp RSX/VK/VKQueryPool.cpp RSX/VK/VKRenderPass.cpp + RSX/VK/VKRenderTargets.cpp RSX/VK/VKResolveHelper.cpp RSX/VK/VKResourceManager.cpp RSX/VK/VKShaderInterpreter.cpp diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp b/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp new file mode 100644 index 0000000000..4407083020 --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.cpp @@ -0,0 +1,565 @@ +#include "VKRenderTargets.h" + +namespace vk +{ + // Get the linear resolve target bound to this surface. Initialize if none exists + vk::viewable_image* render_target::get_resolve_target_safe(vk::command_buffer& cmd) + { + if (!resolve_surface) + { + // Create a resolve surface + const auto resolve_w = width() * samples_x; + const auto resolve_h = height() * samples_y; + + VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + usage |= (this->info.usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)); + + resolve_surface.reset(new vk::viewable_image( + *g_render_device, + g_render_device->get_memory_mapping().device_local, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + VK_IMAGE_TYPE_2D, + format(), + resolve_w, resolve_h, 1, 1, 1, + VK_SAMPLE_COUNT_1_BIT, + VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_TILING_OPTIMAL, + usage, + 0, + format_class())); + + resolve_surface->native_component_map = native_component_map; + resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL); + } + + return resolve_surface.get(); + } + + // Resolve the planar MSAA data into a linear block + void render_target::resolve(vk::command_buffer& cmd) + { + VkImageSubresourceRange range = { aspect(), 0, 1, 0, 1 }; + + // NOTE: This surface can only be in the ATTACHMENT_OPTIMAL layout + // The resolve surface can be in any type of access, but we have to assume it is likely in read-only mode like shader read-only + + if (!is_depth_surface()) [[likely]] + { + ensure(current_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + // This is the source; finish writing before reading + vk::insert_image_memory_barrier( + cmd, this->value, + this->current_layout, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, + range); + + // This is the target; finish reading before writing + vk::insert_image_memory_barrier( + cmd, resolve_surface->value, + resolve_surface->current_layout, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + range); + + this->current_layout = VK_IMAGE_LAYOUT_GENERAL; + resolve_surface->current_layout = VK_IMAGE_LAYOUT_GENERAL; + } + else + { + this->push_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + } + + vk::resolve_image(cmd, resolve_surface.get(), this); + + if (!is_depth_surface()) [[likely]] + { + vk::insert_image_memory_barrier( + cmd, this->value, + this->current_layout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + range); + + vk::insert_image_memory_barrier( + cmd, resolve_surface->value, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, + range); + + this->current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + resolve_surface->current_layout = VK_IMAGE_LAYOUT_GENERAL; + } + else + { + this->pop_layout(cmd); + resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL); + } + + msaa_flags &= ~(rsx::surface_state_flags::require_resolve); + } + + // Unresolve the linear data into planar MSAA data + void render_target::unresolve(vk::command_buffer& cmd) + { + ensure(!(msaa_flags & rsx::surface_state_flags::require_resolve)); + VkImageSubresourceRange range = { aspect(), 0, 1, 0, 1 }; + + if (!is_depth_surface()) [[likely]] + { + ensure(current_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); + + // This is the dest; finish reading before writing + vk::insert_image_memory_barrier( + cmd, this->value, + this->current_layout, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + range); + + // This is the source; finish writing before reading + vk::insert_image_memory_barrier( + cmd, resolve_surface->value, + resolve_surface->current_layout, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT, + range); + + this->current_layout = VK_IMAGE_LAYOUT_GENERAL; + resolve_surface->current_layout = VK_IMAGE_LAYOUT_GENERAL; + } + else + { + this->push_layout(cmd, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); + resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + + vk::unresolve_image(cmd, this, resolve_surface.get()); + + if (!is_depth_surface()) [[likely]] + { + vk::insert_image_memory_barrier( + cmd, this->value, + this->current_layout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT, + range); + + vk::insert_image_memory_barrier( + cmd, resolve_surface->value, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + range); + + this->current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + resolve_surface->current_layout = VK_IMAGE_LAYOUT_GENERAL; + } + else + { + this->pop_layout(cmd); + resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL); + } + + msaa_flags &= ~(rsx::surface_state_flags::require_unresolve); + } + + // Default-initialize memory without loading + void render_target::clear_memory(vk::command_buffer& cmd, vk::image* surface) + { + const auto optimal_layout = (surface->current_layout == VK_IMAGE_LAYOUT_GENERAL) ? + VK_IMAGE_LAYOUT_GENERAL : + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + surface->push_layout(cmd, optimal_layout); + + VkImageSubresourceRange range{ surface->aspect(), 0, 1, 0, 1 }; + if (surface->aspect() & VK_IMAGE_ASPECT_COLOR_BIT) + { + VkClearColorValue color = { {0.f, 0.f, 0.f, 1.f} }; + vkCmdClearColorImage(cmd, surface->value, surface->current_layout, &color, 1, &range); + } + else + { + VkClearDepthStencilValue clear{ 1.f, 255 }; + vkCmdClearDepthStencilImage(cmd, surface->value, surface->current_layout, &clear, 1, &range); + } + + surface->pop_layout(cmd); + + if (surface == this) + { + state_flags &= ~rsx::surface_state_flags::erase_bkgnd; + } + } + + // Load memory from cell and use to initialize the surface + void render_target::load_memory(vk::command_buffer& cmd) + { + auto& upload_heap = *vk::get_upload_heap(); + const bool is_swizzled = (raster_type == rsx::surface_raster_type::swizzle); + + rsx::subresource_layout subres{}; + subres.width_in_block = subres.width_in_texel = surface_width * samples_x; + subres.height_in_block = subres.height_in_texel = surface_height * samples_y; + subres.pitch_in_block = rsx_pitch / get_bpp(); + subres.depth = 1; + subres.data = { vm::get_super_ptr(base_addr), static_cast::index_type>(rsx_pitch * surface_height * samples_y) }; + + if (g_cfg.video.resolution_scale_percent == 100 && spp == 1) [[likely]] + { + push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + vk::upload_image(cmd, this, { subres }, get_gcm_format(), is_swizzled, 1, aspect(), upload_heap, rsx_pitch, upload_contents_inline); + pop_layout(cmd); + } + else + { + vk::image* content = nullptr; + vk::image* final_dst = (samples() > 1) ? get_resolve_target_safe(cmd) : this; + + // Prepare dst image + final_dst->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + if (final_dst->width() == subres.width_in_block && final_dst->height() == subres.height_in_block) + { + // Possible if MSAA is enabled with 100% resolution scale or + // surface dimensions are less than resolution scale threshold and no MSAA. + // Writethrough. + content = final_dst; + } + else + { + content = vk::get_typeless_helper(format(), format_class(), subres.width_in_block, subres.height_in_block); + content->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + } + + // Load Cell data into temp buffer + vk::upload_image(cmd, content, { subres }, get_gcm_format(), is_swizzled, 1, aspect(), upload_heap, rsx_pitch, upload_contents_inline); + + // Write into final image + if (content != final_dst) + { + // Avoid layout push/pop on scratch memory by setting explicit layout here + content->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + + vk::copy_scaled_image(cmd, content, final_dst, + { 0, 0, subres.width_in_block, subres.height_in_block }, + { 0, 0, static_cast(final_dst->width()), static_cast(final_dst->height()) }, + 1, true, aspect() == VK_IMAGE_ASPECT_COLOR_BIT ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); + } + + final_dst->pop_layout(cmd); + + if (samples() > 1) + { + // Trigger unresolve + msaa_flags = rsx::surface_state_flags::require_unresolve; + } + } + + state_flags &= ~rsx::surface_state_flags::erase_bkgnd; + } + + void render_target::initialize_memory(vk::command_buffer& cmd, bool read_access) + { + const bool is_depth = is_depth_surface(); + const bool should_read_buffers = is_depth ? !!g_cfg.video.read_depth_buffer : !!g_cfg.video.read_color_buffers; + + if (!should_read_buffers) + { + clear_memory(cmd, this); + + if (read_access && samples() > 1) + { + // Only clear the resolve surface if reading from it, otherwise it's a waste + clear_memory(cmd, get_resolve_target_safe(cmd)); + } + + msaa_flags = rsx::surface_state_flags::ready; + } + else + { + load_memory(cmd); + } + } + + vk::viewable_image* render_target::get_surface(rsx::surface_access access_type) + { + if (samples() == 1 || access_type == rsx::surface_access::write) + { + return this; + } + + // A read barrier should have been called before this! + ensure(resolve_surface); // "Read access without explicit barrier" + ensure(!(msaa_flags & rsx::surface_state_flags::require_resolve)); + return resolve_surface.get(); + } + + bool render_target::is_depth_surface() const + { + return !!(aspect() & VK_IMAGE_ASPECT_DEPTH_BIT); + } + + void render_target::release_ref(vk::viewable_image* t) const + { + static_cast(t)->release(); + } + + bool render_target::matches_dimensions(u16 _width, u16 _height) const + { + // Use forward scaling to account for rounding and clamping errors + const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(_width, _height); + return (scaled_w == width()) && (scaled_h == height()); + } + + void render_target::texture_barrier(vk::command_buffer& cmd) + { + if (samples() == 1) + { + if (!write_barrier_sync_tag) write_barrier_sync_tag++; // Activate barrier sync + cyclic_reference_sync_tag = write_barrier_sync_tag; // Match tags + } + + vk::insert_texture_barrier(cmd, this, VK_IMAGE_LAYOUT_GENERAL); + } + + void render_target::reset_surface_counters() + { + frame_tag = 0; + write_barrier_sync_tag = 0; + } + + image_view* render_target::get_view(u32 remap_encoding, const std::pair, std::array>& remap, VkImageAspectFlags mask) + { + if (remap_encoding == VK_REMAP_VIEW_MULTISAMPLED) + { + // Special remap flag, intercept here + return vk::viewable_image::get_view(VK_REMAP_IDENTITY, remap, mask); + } + + if (!resolve_surface) [[likely]] + { + return vk::viewable_image::get_view(remap_encoding, remap, mask); + } + else + { + return resolve_surface->get_view(remap_encoding, remap, mask); + } + } + + void render_target::memory_barrier(vk::command_buffer& cmd, rsx::surface_access access) + { + const bool read_access = (access != rsx::surface_access::write); + const bool is_depth = is_depth_surface(); + const bool should_read_buffers = is_depth ? !!g_cfg.video.read_depth_buffer : !!g_cfg.video.read_color_buffers; + + if (should_read_buffers) + { + // TODO: Decide what to do when memory loads are disabled but the underlying has memory changed + // NOTE: Assume test() is expensive when in a pinch + if (last_use_tag && state_flags == rsx::surface_state_flags::ready && !test()) + { + // TODO: Figure out why merely returning and failing the test does not work when reading (TLoU) + // The result should have been the same either way + state_flags |= rsx::surface_state_flags::erase_bkgnd; + } + } + + if (!read_access && write_barrier_sync_tag != 0) + { + if (current_layout == VK_IMAGE_LAYOUT_GENERAL) + { + if (write_barrier_sync_tag != cyclic_reference_sync_tag) + { + // This barrier catches a very specific case where 2 draw calls are executed with general layout (cyclic ref) but no texture barrier in between. + // This happens when a cyclic ref is broken. In this case previous draw must finish drawing before the new one renders to avoid current draw breaking previous one. + VkPipelineStageFlags src_stage, dst_stage; + VkAccessFlags src_access, dst_access; + + if (!is_depth_surface()) [[likely]] + { + src_stage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dst_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + src_access = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + dst_access = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } + else + { + src_stage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + dst_stage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + src_access = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + dst_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } + + vk::insert_image_memory_barrier(cmd, value, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, + src_stage, dst_stage, src_access, dst_access, { aspect(), 0, 1, 0, 1 }); + + write_barrier_sync_tag = 0; // Disable for next draw + } + else + { + // Synced externally for this draw + write_barrier_sync_tag++; + } + } + else + { + write_barrier_sync_tag = 0; // Disable + } + } + + if (old_contents.empty()) [[likely]] + { + if (state_flags & rsx::surface_state_flags::erase_bkgnd) + { + // NOTE: This step CAN introduce MSAA flags! + initialize_memory(cmd, read_access); + + ensure(state_flags == rsx::surface_state_flags::ready); + on_write(rsx::get_shared_tag(), static_cast(msaa_flags)); + } + + if (msaa_flags & rsx::surface_state_flags::require_resolve) + { + if (read_access) + { + // Only do this step when read access is required + get_resolve_target_safe(cmd); + resolve(cmd); + } + } + else if (msaa_flags & rsx::surface_state_flags::require_unresolve) + { + if (!read_access) + { + // Only do this step when it is needed to start rendering + ensure(resolve_surface); + unresolve(cmd); + } + } + + return; + } + + // Memory transfers + vk::image* target_image = (samples() > 1) ? get_resolve_target_safe(cmd) : this; + vk::blitter hw_blitter; + const auto dst_bpp = get_bpp(); + + unsigned first = prepare_rw_barrier_for_transfer(this); + const bool accept_all = (last_use_tag && test()); + bool optimize_copy = true; + u64 newest_tag = 0; + + for (auto i = first; i < old_contents.size(); ++i) + { + auto& section = old_contents[i]; + auto src_texture = static_cast(section.source); + src_texture->read_barrier(cmd); + + if (!accept_all && !src_texture->test()) [[likely]] + { + // If this surface is intact, accept all incoming data as it is guaranteed to be safe + // If this surface has not been initialized or is dirty, do not add more dirty data to it + continue; + } + + const auto src_bpp = src_texture->get_bpp(); + rsx::typeless_xfer typeless_info{}; + + if (src_texture->aspect() != aspect() || + !formats_are_bitcast_compatible(this, src_texture)) + { + typeless_info.src_is_typeless = true; + typeless_info.src_context = rsx::texture_upload_context::framebuffer_storage; + typeless_info.src_native_format_override = static_cast(info.format); + typeless_info.src_gcm_format = src_texture->get_gcm_format(); + typeless_info.src_scaling_hint = f32(src_bpp) / dst_bpp; + } + + section.init_transfer(this); + auto src_area = section.src_rect(); + auto dst_area = section.dst_rect(); + + if (g_cfg.video.antialiasing_level != msaa_level::none) + { + src_texture->transform_pixels_to_samples(src_area); + this->transform_pixels_to_samples(dst_area); + } + + bool memory_load = true; + if (dst_area.x1 == 0 && dst_area.y1 == 0 && + unsigned(dst_area.x2) == target_image->width() && unsigned(dst_area.y2) == target_image->height()) + { + // Skip a bunch of useless work + state_flags &= ~(rsx::surface_state_flags::erase_bkgnd); + msaa_flags = rsx::surface_state_flags::ready; + + memory_load = false; + stencil_init_flags = src_texture->stencil_init_flags; + } + else if (state_flags & rsx::surface_state_flags::erase_bkgnd) + { + // Might introduce MSAA flags + initialize_memory(cmd, false); + ensure(state_flags == rsx::surface_state_flags::ready); + } + + if (msaa_flags & rsx::surface_state_flags::require_resolve) + { + // Need to forward resolve this + resolve(cmd); + } + + hw_blitter.scale_image( + cmd, + src_texture->get_surface(rsx::surface_access::read), + this->get_surface(rsx::surface_access::transfer), + src_area, + dst_area, + /*linear?*/false, typeless_info); + + optimize_copy = optimize_copy && !memory_load; + newest_tag = src_texture->last_use_tag; + } + + if (!newest_tag) [[unlikely]] + { + // Underlying memory has been modified and we could not find valid data to fill it + clear_rw_barrier(); + + state_flags |= rsx::surface_state_flags::erase_bkgnd; + initialize_memory(cmd, read_access); + ensure(state_flags == rsx::surface_state_flags::ready); + } + + // NOTE: Optimize flag relates to stencil resolve/unresolve for NVIDIA. + on_write_copy(newest_tag, optimize_copy); + + if (!read_access && samples() > 1) + { + // Write barrier, must initialize + unresolve(cmd); + } + } +} diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index c0ab48ac50..69e1ca895a 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -21,582 +21,38 @@ namespace vk u64 cyclic_reference_sync_tag = 0; u64 write_barrier_sync_tag = 0; + // MSAA support: // Get the linear resolve target bound to this surface. Initialize if none exists - vk::viewable_image* get_resolve_target_safe(vk::command_buffer& cmd) - { - if (!resolve_surface) - { - // Create a resolve surface - const auto resolve_w = width() * samples_x; - const auto resolve_h = height() * samples_y; - - VkImageUsageFlags usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; - usage |= (this->info.usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)); - - resolve_surface.reset(new vk::viewable_image( - *g_render_device, - g_render_device->get_memory_mapping().device_local, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - VK_IMAGE_TYPE_2D, - format(), - resolve_w, resolve_h, 1, 1, 1, - VK_SAMPLE_COUNT_1_BIT, - VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_TILING_OPTIMAL, - usage, - 0, - format_class())); - - resolve_surface->native_component_map = native_component_map; - resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL); - } - - return resolve_surface.get(); - } - + vk::viewable_image* get_resolve_target_safe(vk::command_buffer& cmd); // Resolve the planar MSAA data into a linear block - void resolve(vk::command_buffer& cmd) - { - VkImageSubresourceRange range = { aspect(), 0, 1, 0, 1 }; - - // NOTE: This surface can only be in the ATTACHMENT_OPTIMAL layout - // The resolve surface can be in any type of access, but we have to assume it is likely in read-only mode like shader read-only - - if (!is_depth_surface()) [[likely]] - { - ensure(current_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - // This is the source; finish writing before reading - vk::insert_image_memory_barrier( - cmd, this->value, - this->current_layout, VK_IMAGE_LAYOUT_GENERAL, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, - VK_ACCESS_SHADER_READ_BIT, - range); - - // This is the target; finish reading before writing - vk::insert_image_memory_barrier( - cmd, resolve_surface->value, - resolve_surface->current_layout, VK_IMAGE_LAYOUT_GENERAL, - VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT, - VK_ACCESS_SHADER_WRITE_BIT, - range); - - this->current_layout = VK_IMAGE_LAYOUT_GENERAL; - resolve_surface->current_layout = VK_IMAGE_LAYOUT_GENERAL; - } - else - { - this->push_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); - } - - vk::resolve_image(cmd, resolve_surface.get(), this); - - if (!is_depth_surface()) [[likely]] - { - vk::insert_image_memory_barrier( - cmd, this->value, - this->current_layout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_ACCESS_SHADER_READ_BIT, - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, - range); - - vk::insert_image_memory_barrier( - cmd, resolve_surface->value, - VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_SHADER_WRITE_BIT, - VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, - range); - - this->current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - resolve_surface->current_layout = VK_IMAGE_LAYOUT_GENERAL; - } - else - { - this->pop_layout(cmd); - resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL); - } - - msaa_flags &= ~(rsx::surface_state_flags::require_resolve); - } - + void resolve(vk::command_buffer& cmd); // Unresolve the linear data into planar MSAA data - void unresolve(vk::command_buffer& cmd) - { - ensure(!(msaa_flags & rsx::surface_state_flags::require_resolve)); - VkImageSubresourceRange range = { aspect(), 0, 1, 0, 1 }; - - if (!is_depth_surface()) [[likely]] - { - ensure(current_layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); - - // This is the dest; finish reading before writing - vk::insert_image_memory_barrier( - cmd, this->value, - this->current_layout, VK_IMAGE_LAYOUT_GENERAL, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT, - VK_ACCESS_SHADER_WRITE_BIT, - range); - - // This is the source; finish writing before reading - vk::insert_image_memory_barrier( - cmd, resolve_surface->value, - resolve_surface->current_layout, VK_IMAGE_LAYOUT_GENERAL, - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, - VK_ACCESS_SHADER_READ_BIT, - range); - - this->current_layout = VK_IMAGE_LAYOUT_GENERAL; - resolve_surface->current_layout = VK_IMAGE_LAYOUT_GENERAL; - } - else - { - this->push_layout(cmd, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); - resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - } - - vk::unresolve_image(cmd, this, resolve_surface.get()); - - if (!is_depth_surface()) [[likely]] - { - vk::insert_image_memory_barrier( - cmd, this->value, - this->current_layout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_ACCESS_SHADER_WRITE_BIT, - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT, - range); - - vk::insert_image_memory_barrier( - cmd, resolve_surface->value, - VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_SHADER_READ_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, - range); - - this->current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - resolve_surface->current_layout = VK_IMAGE_LAYOUT_GENERAL; - } - else - { - this->pop_layout(cmd); - resolve_surface->change_layout(cmd, VK_IMAGE_LAYOUT_GENERAL); - } - - msaa_flags &= ~(rsx::surface_state_flags::require_unresolve); - } + void unresolve(vk::command_buffer& cmd); + // Memory management: // Default-initialize memory without loading - void clear_memory(vk::command_buffer& cmd, vk::image *surface) - { - const auto optimal_layout = (surface->current_layout == VK_IMAGE_LAYOUT_GENERAL) ? - VK_IMAGE_LAYOUT_GENERAL : - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - - surface->push_layout(cmd, optimal_layout); - - VkImageSubresourceRange range{ surface->aspect(), 0, 1, 0, 1 }; - if (surface->aspect() & VK_IMAGE_ASPECT_COLOR_BIT) - { - VkClearColorValue color = {{0.f, 0.f, 0.f, 1.f}}; - vkCmdClearColorImage(cmd, surface->value, surface->current_layout, &color, 1, &range); - } - else - { - VkClearDepthStencilValue clear{ 1.f, 255 }; - vkCmdClearDepthStencilImage(cmd, surface->value, surface->current_layout, &clear, 1, &range); - } - - surface->pop_layout(cmd); - - if (surface == this) - { - state_flags &= ~rsx::surface_state_flags::erase_bkgnd; - } - } - + void clear_memory(vk::command_buffer& cmd, vk::image* surface); // Load memory from cell and use to initialize the surface - void load_memory(vk::command_buffer& cmd) - { - auto& upload_heap = *vk::get_upload_heap(); - const bool is_swizzled = (raster_type == rsx::surface_raster_type::swizzle); - - rsx::subresource_layout subres{}; - subres.width_in_block = subres.width_in_texel = surface_width * samples_x; - subres.height_in_block = subres.height_in_texel = surface_height * samples_y; - subres.pitch_in_block = rsx_pitch / get_bpp(); - subres.depth = 1; - subres.data = { vm::get_super_ptr(base_addr), static_cast::index_type>(rsx_pitch * surface_height * samples_y) }; - - if (g_cfg.video.resolution_scale_percent == 100 && spp == 1) [[likely]] - { - push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - vk::upload_image(cmd, this, { subres }, get_gcm_format(), is_swizzled, 1, aspect(), upload_heap, rsx_pitch, upload_contents_inline); - pop_layout(cmd); - } - else - { - vk::image* content = nullptr; - vk::image* final_dst = (samples() > 1) ? get_resolve_target_safe(cmd) : this; - - // Prepare dst image - final_dst->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - - if (final_dst->width() == subres.width_in_block && final_dst->height() == subres.height_in_block) - { - // Possible if MSAA is enabled with 100% resolution scale or - // surface dimensions are less than resolution scale threshold and no MSAA. - // Writethrough. - content = final_dst; - } - else - { - content = vk::get_typeless_helper(format(), format_class(), subres.width_in_block, subres.height_in_block); - content->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - } - - // Load Cell data into temp buffer - vk::upload_image(cmd, content, { subres }, get_gcm_format(), is_swizzled, 1, aspect(), upload_heap, rsx_pitch, upload_contents_inline); - - // Write into final image - if (content != final_dst) - { - // Avoid layout push/pop on scratch memory by setting explicit layout here - content->change_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); - - vk::copy_scaled_image(cmd, content, final_dst, - { 0, 0, subres.width_in_block, subres.height_in_block }, - { 0, 0, static_cast(final_dst->width()), static_cast(final_dst->height()) }, - 1, true, aspect() == VK_IMAGE_ASPECT_COLOR_BIT ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); - } - - final_dst->pop_layout(cmd); - - if (samples() > 1) - { - // Trigger unresolve - msaa_flags = rsx::surface_state_flags::require_unresolve; - } - } - - state_flags &= ~rsx::surface_state_flags::erase_bkgnd; - } - - void initialize_memory(vk::command_buffer& cmd, bool read_access) - { - const bool memory_load = is_depth_surface() ? - !!g_cfg.video.read_depth_buffer : - !!g_cfg.video.read_color_buffers; - - if (!memory_load) - { - clear_memory(cmd, this); - - if (read_access && samples() > 1) - { - // Only clear the resolve surface if reading from it, otherwise it's a waste - clear_memory(cmd, get_resolve_target_safe(cmd)); - } - - msaa_flags = rsx::surface_state_flags::ready; - } - else - { - load_memory(cmd); - } - } + void load_memory(vk::command_buffer& cmd); + // Generic - chooses whether to clear or load + void initialize_memory(vk::command_buffer& cmd, bool read_access); public: u64 frame_tag = 0; // frame id when invalidated, 0 if not invalid using viewable_image::viewable_image; - vk::viewable_image* get_surface(rsx::surface_access access_type) override - { - if (samples() == 1 || access_type == rsx::surface_access::write) - { - return this; - } - - // A read barrier should have been called before this! - ensure(resolve_surface); // "Read access without explicit barrier" - ensure(!(msaa_flags & rsx::surface_state_flags::require_resolve)); - return resolve_surface.get(); - } - - bool is_depth_surface() const override - { - return !!(aspect() & VK_IMAGE_ASPECT_DEPTH_BIT); - } - - void release_ref(vk::viewable_image* t) const override - { - static_cast(t)->release(); - } - - bool matches_dimensions(u16 _width, u16 _height) const - { - // Use forward scaling to account for rounding and clamping errors - const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(_width, _height); - return (scaled_w == width()) && (scaled_h == height()); - } - - void texture_barrier(vk::command_buffer& cmd) - { - if (samples() == 1) - { - if (!write_barrier_sync_tag) write_barrier_sync_tag++; // Activate barrier sync - cyclic_reference_sync_tag = write_barrier_sync_tag; // Match tags - } - - vk::insert_texture_barrier(cmd, this, VK_IMAGE_LAYOUT_GENERAL); - } - - void reset_surface_counters() - { - frame_tag = 0; - write_barrier_sync_tag = 0; - } + vk::viewable_image* get_surface(rsx::surface_access access_type) override; + bool is_depth_surface() const override; + void release_ref(vk::viewable_image* t) const override; + bool matches_dimensions(u16 _width, u16 _height) const; + void reset_surface_counters(); image_view* get_view(u32 remap_encoding, const std::pair, std::array>& remap, - VkImageAspectFlags mask = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT) override - { - if (remap_encoding == VK_REMAP_VIEW_MULTISAMPLED) - { - // Special remap flag, intercept here - return vk::viewable_image::get_view(VK_REMAP_IDENTITY, remap, mask); - } - - if (!resolve_surface) [[likely]] - { - return vk::viewable_image::get_view(remap_encoding, remap, mask); - } - else - { - return resolve_surface->get_view(remap_encoding, remap, mask); - } - } - - void memory_barrier(vk::command_buffer& cmd, rsx::surface_access access) - { - const bool read_access = (access != rsx::surface_access::write); - const bool is_depth = is_depth_surface(); - - if ((g_cfg.video.read_color_buffers && !is_depth) || - (g_cfg.video.read_depth_buffer && is_depth)) - { - // TODO: Decide what to do when memory loads are disabled but the underlying has memory changed - // NOTE: Assume test() is expensive when in a pinch - if (last_use_tag && state_flags == rsx::surface_state_flags::ready && !test()) - { - // TODO: Figure out why merely returning and failing the test does not work when reading (TLoU) - // The result should have been the same either way - state_flags |= rsx::surface_state_flags::erase_bkgnd; - } - } - - if (!read_access && write_barrier_sync_tag != 0) - { - if (current_layout == VK_IMAGE_LAYOUT_GENERAL) - { - if (write_barrier_sync_tag != cyclic_reference_sync_tag) - { - // This barrier catches a very specific case where 2 draw calls are executed with general layout (cyclic ref) but no texture barrier in between. - // This happens when a cyclic ref is broken. In this case previous draw must finish drawing before the new one renders to avoid current draw breaking previous one. - VkPipelineStageFlags src_stage, dst_stage; - VkAccessFlags src_access, dst_access; - - if (!is_depth_surface()) [[likely]] - { - src_stage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - dst_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - src_access = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - dst_access = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - } - else - { - src_stage = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - dst_stage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - src_access = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - dst_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - } - - vk::insert_image_memory_barrier(cmd, value, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, - src_stage, dst_stage, src_access, dst_access, { aspect(), 0, 1, 0, 1 }); - - write_barrier_sync_tag = 0; // Disable for next draw - } - else - { - // Synced externally for this draw - write_barrier_sync_tag++; - } - } - else - { - write_barrier_sync_tag = 0; // Disable - } - } - - if (old_contents.empty()) [[likely]] - { - if (state_flags & rsx::surface_state_flags::erase_bkgnd) - { - // NOTE: This step CAN introduce MSAA flags! - initialize_memory(cmd, read_access); - - ensure(state_flags == rsx::surface_state_flags::ready); - on_write(rsx::get_shared_tag(), static_cast(msaa_flags)); - } - - if (msaa_flags & rsx::surface_state_flags::require_resolve) - { - if (read_access) - { - // Only do this step when read access is required - get_resolve_target_safe(cmd); - resolve(cmd); - } - } - else if (msaa_flags & rsx::surface_state_flags::require_unresolve) - { - if (!read_access) - { - // Only do this step when it is needed to start rendering - ensure(resolve_surface); - unresolve(cmd); - } - } - - return; - } - - // Memory transfers - vk::image *target_image = (samples() > 1) ? get_resolve_target_safe(cmd) : this; - vk::blitter hw_blitter; - const auto dst_bpp = get_bpp(); - - unsigned first = prepare_rw_barrier_for_transfer(this); - bool optimize_copy = true; - bool any_valid_writes = false; - u64 newest_tag = 0; - - for (auto i = first; i < old_contents.size(); ++i) - { - auto §ion = old_contents[i]; - auto src_texture = static_cast(section.source); - src_texture->read_barrier(cmd); - - if (src_texture->test()) [[likely]] - { - any_valid_writes = true; - } - else - { - continue; - } - - const auto src_bpp = src_texture->get_bpp(); - rsx::typeless_xfer typeless_info{}; - - if (src_texture->aspect() != aspect() || - !formats_are_bitcast_compatible(this, src_texture)) - { - typeless_info.src_is_typeless = true; - typeless_info.src_context = rsx::texture_upload_context::framebuffer_storage; - typeless_info.src_native_format_override = static_cast(info.format); - typeless_info.src_gcm_format = src_texture->get_gcm_format(); - typeless_info.src_scaling_hint = f32(src_bpp) / dst_bpp; - } - - section.init_transfer(this); - auto src_area = section.src_rect(); - auto dst_area = section.dst_rect(); - - if (g_cfg.video.antialiasing_level != msaa_level::none) - { - src_texture->transform_pixels_to_samples(src_area); - this->transform_pixels_to_samples(dst_area); - } - - bool memory_load = true; - if (dst_area.x1 == 0 && dst_area.y1 == 0 && - unsigned(dst_area.x2) == target_image->width() && unsigned(dst_area.y2) == target_image->height()) - { - // Skip a bunch of useless work - state_flags &= ~(rsx::surface_state_flags::erase_bkgnd); - msaa_flags = rsx::surface_state_flags::ready; - - memory_load = false; - stencil_init_flags = src_texture->stencil_init_flags; - } - else if (state_flags & rsx::surface_state_flags::erase_bkgnd) - { - // Might introduce MSAA flags - initialize_memory(cmd, false); - ensure(state_flags == rsx::surface_state_flags::ready); - } - - if (msaa_flags & rsx::surface_state_flags::require_resolve) - { - // Need to forward resolve this - resolve(cmd); - } - - hw_blitter.scale_image( - cmd, - src_texture->get_surface(rsx::surface_access::read), - this->get_surface(rsx::surface_access::transfer), - src_area, - dst_area, - /*linear?*/false, typeless_info); - - optimize_copy = optimize_copy && !memory_load; - newest_tag = src_texture->last_use_tag; - } - - if (!any_valid_writes) [[unlikely]] - { - rsx_log.warning("Surface at 0x%x inherited stale references", base_addr); - - clear_rw_barrier(); - shuffle_tag(); - - if (!read_access) - { - // This will be modified either way - state_flags |= rsx::surface_state_flags::erase_bkgnd; - memory_barrier(cmd, access); - } - - return; - } - - // NOTE: Optimize flag relates to stencil resolve/unresolve for NVIDIA. - on_write_copy(newest_tag, optimize_copy); - - if (!read_access && samples() > 1) - { - // Write barrier, must initialize - unresolve(cmd); - } - } + VkImageAspectFlags mask = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT) override; + // Synchronization + void texture_barrier(vk::command_buffer& cmd); + void memory_barrier(vk::command_buffer& cmd, rsx::surface_access access); void read_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::read); } void write_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::write); } }; diff --git a/rpcs3/VKGSRender.vcxproj b/rpcs3/VKGSRender.vcxproj index 4b3dd242ee..2f19e91565 100644 --- a/rpcs3/VKGSRender.vcxproj +++ b/rpcs3/VKGSRender.vcxproj @@ -74,6 +74,7 @@ + diff --git a/rpcs3/VKGSRender.vcxproj.filters b/rpcs3/VKGSRender.vcxproj.filters index d3cd6cbff3..905d0ac2bc 100644 --- a/rpcs3/VKGSRender.vcxproj.filters +++ b/rpcs3/VKGSRender.vcxproj.filters @@ -61,10 +61,11 @@ vkutils - + - - + + +