diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index d7e92ea768..b74cda1d63 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -2,6 +2,7 @@ #include "Utilities/GSL.h" #include "Emu/Memory/vm.h" +#include "TextureUtils.h" #include "../GCM.h" #include "../rsx_utils.h" #include @@ -59,24 +60,65 @@ namespace rsx u8 bpp; }; - template - struct surface_hierachy_info + template + struct deferred_clipped_region { - struct memory_overlap_t + u16 src_x, src_y, dst_x, dst_y, width, height; + f32 transfer_scale_x, transfer_scale_y; + surface_type target; + surface_type source; + + template + deferred_clipped_region cast() const { - image_storage_type _ref; - u32 memory_address; - u32 x; - u32 y; - u32 w; - u32 h; - }; + deferred_clipped_region ret; + ret.src_x = src_x; + ret.src_y = src_y; + ret.dst_x = dst_x; + ret.dst_y = dst_y; + ret.width = width; + ret.height = height; + ret.transfer_scale_x = transfer_scale_x; + ret.transfer_scale_y = transfer_scale_y; + ret.target = (T)(target); + ret.source = (T)(source); - u32 memory_address; - u32 memory_range; - image_storage_type memory_contents; + return ret; + } - std::vector overlapping_set; + operator bool() const + { + return (source != nullptr); + } + + template + void init_transfer(T target_surface) + { + if (!width) + { + // Perform intersection here + const auto region = rsx::get_transferable_region(target_surface); + width = std::get<0>(region); + height = std::get<1>(region); + + transfer_scale_x = f32(std::get<2>(region)) / width; + transfer_scale_y = f32(std::get<3>(region)) / height; + + target = target_surface; + } + } + + areai src_rect() const + { + verify(HERE), width; + return { src_x, src_y, src_x + width, src_y + height }; + } + + areai dst_rect() const + { + verify(HERE), width; + return { dst_x, dst_y, dst_x + u16(width * transfer_scale_x + 0.5f), dst_y + u16(height * transfer_scale_y + 0.5f) }; + } }; template @@ -86,7 +128,7 @@ namespace rsx std::array, 5> memory_tag_samples; bool dirty = false; - image_storage_type old_contents = nullptr; + deferred_clipped_region old_contents{}; rsx::surface_antialiasing read_aa_mode = rsx::surface_antialiasing::center_1_sample; GcmTileInfo *tile = nullptr; @@ -142,11 +184,60 @@ namespace rsx { if (!other || other->get_rsx_pitch() != this->get_rsx_pitch()) { - old_contents = nullptr; + old_contents = {}; return; } - old_contents = other; + old_contents = {}; + old_contents.source = other; + } + + template + void set_old_contents_region(const T& region, bool normalized) + { + // NOTE: This method will not perform pitch verification! + verify(HERE), region.source; + old_contents = region.template cast(); + + // Reverse normalization process if needed + if (normalized) + { + const u16 bytes_to_texels_x = region.source->get_bpp() * (region.source->write_aa_mode == rsx::surface_antialiasing::center_1_sample? 1 : 2); + const u16 rows_to_texels_y = (region.source->write_aa_mode > rsx::surface_antialiasing::diagonal_centered_2_samples? 2 : 1); + old_contents.src_x /= bytes_to_texels_x; + old_contents.src_y /= rows_to_texels_y; + old_contents.width /= bytes_to_texels_x; + old_contents.height /= rows_to_texels_y; + + const u16 bytes_to_texels_x2 = (get_bpp() * (write_aa_mode == rsx::surface_antialiasing::center_1_sample? 1 : 2)); + const u16 rows_to_texels_y2 = (write_aa_mode > rsx::surface_antialiasing::diagonal_centered_2_samples)? 2 : 1; + old_contents.dst_x /= bytes_to_texels_x2; + old_contents.dst_y /= rows_to_texels_y2; + + old_contents.transfer_scale_x = f32(bytes_to_texels_x2) / bytes_to_texels_x; + old_contents.transfer_scale_y = f32(rows_to_texels_y2) / rows_to_texels_y; + } + + // Apply resolution scale if needed + if (g_cfg.video.resolution_scale_percent != 100) + { + auto src_width = rsx::apply_resolution_scale(old_contents.width, true, old_contents.source->width()); + auto src_height = rsx::apply_resolution_scale(old_contents.height, true, old_contents.source->height()); + + auto dst_width = rsx::apply_resolution_scale(old_contents.width, true, old_contents.target->width()); + auto dst_height = rsx::apply_resolution_scale(old_contents.height, true, old_contents.target->height()); + + old_contents.transfer_scale_x *= f32(dst_width) / src_width; + old_contents.transfer_scale_y *= f32(dst_height) / src_height; + + old_contents.width = src_width; + old_contents.height = src_height; + + old_contents.src_x = rsx::apply_resolution_scale(old_contents.src_x, false, old_contents.source->width()); + old_contents.src_y = rsx::apply_resolution_scale(old_contents.src_y, false, old_contents.source->height()); + old_contents.dst_x = rsx::apply_resolution_scale(old_contents.dst_x, false, old_contents.target->width()); + old_contents.dst_y = rsx::apply_resolution_scale(old_contents.dst_y, false, old_contents.target->height()); + } } void queue_tag(u32 address) @@ -207,7 +298,22 @@ namespace rsx read_aa_mode = write_aa_mode; dirty = false; - old_contents = nullptr; + old_contents = {}; + } + + // Returns the rect area occupied by this surface expressed as an 8bpp image with no AA + areau get_normalized_memory_area() const + { + const u16 internal_width = get_native_pitch() * (write_aa_mode > rsx::surface_antialiasing::center_1_sample? 2: 1); + const u16 internal_height = get_surface_height() * (write_aa_mode > rsx::surface_antialiasing::diagonal_centered_2_samples? 2: 1); + + return { 0, 0, internal_width, internal_height }; + } + + rsx::address_range get_memory_range() const + { + const u32 internal_height = get_surface_height() * (write_aa_mode > rsx::surface_antialiasing::diagonal_centered_2_samples? 2: 1); + return rsx::address_range::start_length(memory_tag_samples[0].first, internal_height * get_rsx_pitch()); } }; @@ -257,6 +363,11 @@ namespace rsx } } + constexpr u32 get_aa_factor_u(surface_antialiasing aa_mode) + { + return (aa_mode == surface_antialiasing::center_1_sample)? 1 : 2; + } + constexpr u32 get_aa_factor_v(surface_antialiasing aa_mode) { switch (aa_mode) @@ -284,100 +395,251 @@ namespace rsx rsx::address_range m_depth_stencil_memory_range; public: - std::array, 4> m_bound_render_targets = {}; - std::tuple m_bound_depth_stencil = {}; + std::array, 4> m_bound_render_targets = {}; + std::pair m_bound_depth_stencil = {}; std::list invalidated_resources; - std::vector> m_memory_tree; u64 cache_tag = 0ull; u64 write_tag = 0ull; - u64 memory_tag = 0ull; surface_store() = default; ~surface_store() = default; surface_store(const surface_store&) = delete; private: - void generate_render_target_memory_tree() + template + void split_surface_region(command_list_type cmd, u32 address, surface_type prev_surface, u16 width, u16 height, u8 bpp, rsx::surface_antialiasing aa) { - auto process_entry = [](surface_hierachy_info& block_info, - const surface_format_info& info, - u32 memory_address, u32 memory_end, - u32 address, surface_type surface) +#ifndef INCOMPLETE_SURFACE_CACHE_IMPL + auto insert_new_surface = [&]( + u32 new_address, + deferred_clipped_region& region, + std::unordered_map& data) { - if (address <= memory_address) // also intentionally fails on self-test - return; - - if (address >= memory_end) - return; - - surface_format_info info2{}; - Traits::get_surface_info(surface, &info2); - const auto offset = (address - memory_address); - const auto offset_y = (offset / info.rsx_pitch); - const auto offset_x = (offset % info.rsx_pitch) / info.bpp; - const auto pitch2 = info2.bpp * info2.surface_width; - - const bool fits_w = ((offset % info.rsx_pitch) + pitch2) <= info.rsx_pitch; - const bool fits_h = ((offset_y + info2.surface_height) * info.rsx_pitch) <= (memory_end - memory_address); - - if (fits_w && fits_h) + verify(HERE), prev_surface; + if (prev_surface->read_barrier(cmd); !prev_surface->test()) { - typename surface_hierachy_info::memory_overlap_t overlap{}; - overlap._ref = surface; - overlap.memory_address = address; - overlap.x = offset_x; - overlap.y = offset_y; - overlap.w = info2.surface_width; - overlap.h = info2.surface_height; + return; + } - block_info.overlapping_set.push_back(overlap); + surface_storage_type sink; + if (const auto found = data.find(new_address); + found != data.end()) + { + if (Traits::is_compatible_surface(Traits::get(found->second), region.source, region.width, region.height, 1)) + { + // There is no need to erase due to the reinsertion below + sink = std::move(found->second); + } + else + { + // TODO: Merge the 2 regions + invalidated_resources.push_back(std::move(found->second)); + data.erase(new_address); + + auto &old = invalidated_resources.back(); + Traits::notify_surface_invalidated(old); + } + } + + Traits::clone_surface(cmd, sink, region.source, new_address, region); + verify(HERE), region.target == Traits::get(sink); + data[new_address] = std::move(sink); + }; + + // Define incoming region + size2u old, _new; + + const auto prev_area = prev_surface->get_normalized_memory_area(); + old.width = prev_area.x2; + old.height = prev_area.y2; + + _new.width = width * bpp * get_aa_factor_u(aa); + _new.height = height * get_aa_factor_v(aa); + + if (old.width > _new.width) + { + // Split in X + const u32 baseaddr = address + _new.width; + const u32 bytes_to_texels_x = (bpp * get_aa_factor_u(prev_surface->write_aa_mode)); + + deferred_clipped_region copy; + copy.src_x = _new.width / bytes_to_texels_x; + copy.src_y = 0; + copy.dst_x = 0; + copy.dst_y = 0; + copy.width = (old.width - _new.width) / bytes_to_texels_x; + copy.height = prev_surface->get_surface_height(); + copy.transfer_scale_x = 1.f; + copy.transfer_scale_y = 1.f; + copy.target = nullptr; + copy.source = prev_surface; + + if constexpr (is_depth_surface) + { + insert_new_surface(baseaddr, copy, m_depth_stencil_storage); } else { - // TODO - } - }; - - auto process_block = [this, process_entry](u32 memory_address, surface_type surface) - { - surface_hierachy_info block_info; - surface_format_info info{}; - Traits::get_surface_info(surface, &info); - const auto memory_end = memory_address + (info.rsx_pitch * info.surface_height); - - for (const auto &rtt : m_render_targets_storage) - { - process_entry(block_info, info, memory_address, memory_end, rtt.first, Traits::get(rtt.second)); - } - - for (const auto &ds : m_depth_stencil_storage) - { - process_entry(block_info, info, memory_address, memory_end, ds.first, Traits::get(ds.second)); - } - - if (!block_info.overlapping_set.empty()) - { - block_info.memory_address = memory_address; - block_info.memory_range = (memory_end - memory_address); - block_info.memory_contents = surface; - - m_memory_tree.push_back(block_info); - } - }; - - for (auto &rtt : m_bound_render_targets) - { - if (const auto address = std::get<0>(rtt)) - { - process_block(address, std::get<1>(rtt)); + insert_new_surface(baseaddr, copy, m_render_targets_storage); } } - if (const auto address = std::get<0>(m_bound_depth_stencil)) + if (old.height > _new.height) { - process_block(address, std::get<1>(m_bound_depth_stencil)); + // Split in Y + const u32 baseaddr = address + (_new.height * prev_surface->get_rsx_pitch()); + const u32 bytes_to_texels_x = (bpp * get_aa_factor_u(prev_surface->write_aa_mode)); + + deferred_clipped_region copy; + copy.src_x = 0; + copy.src_y = _new.height / get_aa_factor_v(prev_surface->write_aa_mode); + copy.dst_x = 0; + copy.dst_y = 0; + copy.width = std::min(_new.width, old.width) / bytes_to_texels_x; + copy.height = (old.height - _new.height) / get_aa_factor_v(prev_surface->write_aa_mode); + copy.transfer_scale_x = 1.f; + copy.transfer_scale_y = 1.f; + copy.target = nullptr; + copy.source = prev_surface; + + if constexpr (is_depth_surface) + { + insert_new_surface(baseaddr, copy, m_depth_stencil_storage); + } + else + { + insert_new_surface(baseaddr, copy, m_render_targets_storage); + } } +#endif + } + + template + void intersect_surface_region(command_list_type cmd, u32 address, surface_type new_surface) + { +#ifndef INCOMPLETE_SURFACE_CACHE_IMPL + auto scan_list = [&new_surface](const rsx::address_range& mem_range, + std::unordered_map& data) -> std::vector> + { + std::vector> result; + for (const auto &e : data) + { + auto surface = Traits::get(e.second); + + if (new_surface == surface || e.second->dirty || e.second->last_use_tag <= new_surface->last_use_tag) + { + // Do not bother synchronizing with uninitialized data + continue; + } + + // Memory partition check + if (mem_range.start >= 0xc0000000) + { + if (e.first < 0xc0000000) continue; + } + else + { + if (e.first >= 0xc0000000) continue; + } + + // Pitch check + if (!rsx::pitch_compatible(surface, new_surface)) + { + continue; + } + + // Range check + const rsx::address_range this_range = surface->get_memory_range(); + if (!this_range.overlaps(mem_range)) + { + continue; + } + + result.push_back({ e.first, surface }); + } + + return result; + }; + + const rsx::address_range mem_range = new_surface->get_memory_range(); + const auto list1 = scan_list(mem_range, m_render_targets_storage); + const auto list2 = scan_list(mem_range, m_depth_stencil_storage); + + if (list1.empty() && list2.empty()) + { + return; + } + + std::vector> surface_info; + if (list1.empty()) + { + surface_info = std::move(list2); + } + else if (list2.empty()) + { + surface_info = std::move(list1); + } + else + { + surface_info = std::move(list1); + surface_info.reserve(list1.size() + list2.size()); + + for (const auto& e : list2) surface_info.push_back(e); + } + + if (UNLIKELY(surface_info.size() > 1)) + { + // Sort with newest first for early exit + std::sort(surface_info.begin(), surface_info.end(), [](const auto& a, const auto& b) + { + return (a.second->last_use_tag > b.second->last_use_tag); + }); + } + + // TODO: Modify deferred_clip_region::direct_copy() to take a few more things into account! + const areau child_region = new_surface->get_normalized_memory_area(); + const auto child_w = child_region.width(); + const auto child_h = child_region.height(); + + const auto pitch = new_surface->get_rsx_pitch(); + for (const auto &e: surface_info) + { + const auto parent_region = e.second->get_normalized_memory_area(); + const auto parent_w = parent_region.width(); + const auto parent_h = parent_region.height(); + const auto rect = rsx::intersect_region(e.first, parent_w, parent_h, 1, address, child_w, child_h, 1, pitch); + + const auto src_offset = std::get<0>(rect); + const auto dst_offset = std::get<1>(rect); + const auto size = std::get<2>(rect); + + if (src_offset.x >= parent_w || src_offset.y >= parent_h) + { + continue; + } + + if (dst_offset.x >= child_w || dst_offset.y >= child_h) + { + continue; + } + + // TODO: Eventually need to stack all the overlapping regions, but for now just do the latest rect in the space + deferred_clipped_region region; + region.src_x = src_offset.x; + region.src_y = src_offset.y; + region.dst_x = dst_offset.x; + region.dst_y = dst_offset.y; + region.width = size.width; + region.height = size.height; + region.source = e.second; + region.target = new_surface; + + new_surface->set_old_contents_region(region, true); + new_surface->dirty = true; + break; + } +#endif } protected: @@ -401,87 +663,124 @@ namespace rsx surface_storage_type new_surface_storage; surface_type old_surface = nullptr; surface_type new_surface = nullptr; - surface_type convert_surface = nullptr; - - // Remove any depth surfaces occupying this memory address (TODO: Discard all overlapping range) - auto aliased_depth_surface = m_depth_stencil_storage.find(address); - if (aliased_depth_surface != m_depth_stencil_storage.end()) - { - Traits::notify_surface_invalidated(aliased_depth_surface->second); - convert_surface = Traits::get(aliased_depth_surface->second); - invalidated_resources.push_back(std::move(aliased_depth_surface->second)); - m_depth_stencil_storage.erase(aliased_depth_surface); - } + bool store = true; + // Check if render target already exists auto It = m_render_targets_storage.find(address); if (It != m_render_targets_storage.end()) { surface_storage_type &rtt = It->second; + const bool pitch_compatible = Traits::surface_is_pitch_compatible(rtt, pitch); + + if (pitch_compatible) + { + // Preserve memory outside the area to be inherited if needed + const u8 bpp = get_format_block_size_in_bytes(color_format); + split_surface_region(command_list, address, Traits::get(rtt), (u16)width, (u16)height, bpp, antialias); + } + if (Traits::rtt_has_format_width_height(rtt, color_format, width, height)) { - if (Traits::surface_is_pitch_compatible(rtt, pitch)) + if (pitch_compatible) Traits::notify_surface_persist(rtt); else - Traits::invalidate_surface_contents(command_list, Traits::get(rtt), nullptr, address, pitch); + Traits::invalidate_surface_contents(command_list, Traits::get(rtt), address, pitch); Traits::prepare_rtt_for_drawing(command_list, Traits::get(rtt)); - return Traits::get(rtt); + new_surface = Traits::get(rtt); + store = false; } - - old_surface = Traits::get(rtt); - old_surface_storage = std::move(rtt); - m_render_targets_storage.erase(address); - } - - // Range test - const auto aa_factor_v = get_aa_factor_v(antialias); - rsx::address_range range = rsx::address_range::start_length(address, u32(pitch * height * aa_factor_v)); - m_render_targets_memory_range = range.get_min_max(m_render_targets_memory_range); - - // Select source of original data if any - auto contents_to_copy = old_surface == nullptr ? convert_surface : old_surface; - - // Search invalidated resources for a suitable surface - for (auto It = invalidated_resources.begin(); It != invalidated_resources.end(); It++) - { - auto &rtt = *It; - if (Traits::rtt_has_format_width_height(rtt, color_format, width, height, true)) + else { - new_surface_storage = std::move(rtt); - - if (old_surface) - { - //Exchange this surface with the invalidated one - Traits::notify_surface_invalidated(old_surface_storage); - rtt = std::move(old_surface_storage); - } - else - //rtt is now empty - erase it - invalidated_resources.erase(It); - - new_surface = Traits::get(new_surface_storage); - Traits::invalidate_surface_contents(command_list, new_surface, contents_to_copy, address, pitch); - Traits::prepare_rtt_for_drawing(command_list, new_surface); - break; + old_surface = Traits::get(rtt); + old_surface_storage = std::move(rtt); + m_render_targets_storage.erase(address); } } + if (!new_surface) + { + // Range test + const auto aa_factor_v = get_aa_factor_v(antialias); + rsx::address_range range = rsx::address_range::start_length(address, u32(pitch * height * aa_factor_v)); + m_render_targets_memory_range = range.get_min_max(m_render_targets_memory_range); + + // Search invalidated resources for a suitable surface + for (auto It = invalidated_resources.begin(); It != invalidated_resources.end(); It++) + { + auto &rtt = *It; + if (Traits::rtt_has_format_width_height(rtt, color_format, width, height, true)) + { + new_surface_storage = std::move(rtt); + + if (old_surface) + { + // Exchange this surface with the invalidated one + Traits::notify_surface_invalidated(old_surface_storage); + rtt = std::move(old_surface_storage); + } + else + { + // rtt is now empty - erase it + invalidated_resources.erase(It); + } + + new_surface = Traits::get(new_surface_storage); + Traits::invalidate_surface_contents(command_list, new_surface, address, pitch); + Traits::prepare_rtt_for_drawing(command_list, new_surface); + break; + } + } + } + + // Check for stale storage if (old_surface != nullptr && new_surface == nullptr) { - //This was already determined to be invalid and is excluded from testing above + // This was already determined to be invalid and is excluded from testing above Traits::notify_surface_invalidated(old_surface_storage); invalidated_resources.push_back(std::move(old_surface_storage)); } - if (new_surface != nullptr) + if (!new_surface) { - //New surface was found among existing surfaces + m_render_targets_storage[address] = Traits::create_new_surface(address, color_format, width, height, pitch, std::forward(extra_params)...); + new_surface = Traits::get(m_render_targets_storage[address]); + } + else if (store) + { + // New surface was found among invalidated surfaces m_render_targets_storage[address] = std::move(new_surface_storage); - return new_surface; } - m_render_targets_storage[address] = Traits::create_new_surface(address, color_format, width, height, pitch, contents_to_copy, std::forward(extra_params)...); - return Traits::get(m_render_targets_storage[address]); +#ifndef INCOMPLETE_SURFACE_CACHE_IMPL + // Check if old_surface is 'new' and avoid intersection + if (old_surface && old_surface->last_use_tag == write_tag) + { + new_surface->set_old_contents(old_surface); + } + else +#endif + { + intersect_surface_region(command_list, address, new_surface); + } + + // Remove and preserve if possible any overlapping/replaced depth surface + auto aliased_depth_surface = m_depth_stencil_storage.find(address); + if (aliased_depth_surface != m_depth_stencil_storage.end()) + { + if (Traits::surface_is_pitch_compatible(aliased_depth_surface->second, pitch)) + { + // Preserve memory outside the area to be inherited if needed + const u8 bpp = get_format_block_size_in_bytes(color_format); + split_surface_region(command_list, address, Traits::get(aliased_depth_surface->second), (u16)width, (u16)height, bpp, antialias); + } + + Traits::notify_surface_invalidated(aliased_depth_surface->second); + invalidated_resources.push_back(std::move(aliased_depth_surface->second)); + m_depth_stencil_storage.erase(aliased_depth_surface); + } + + return new_surface; } template @@ -497,67 +796,68 @@ namespace rsx surface_storage_type new_surface_storage; surface_type old_surface = nullptr; surface_type new_surface = nullptr; - surface_type convert_surface = nullptr; - - // Remove any color surfaces occupying this memory range (TODO: Discard all overlapping surfaces) - auto aliased_rtt_surface = m_render_targets_storage.find(address); - if (aliased_rtt_surface != m_render_targets_storage.end()) - { - Traits::notify_surface_invalidated(aliased_rtt_surface->second); - convert_surface = Traits::get(aliased_rtt_surface->second); - invalidated_resources.push_back(std::move(aliased_rtt_surface->second)); - m_render_targets_storage.erase(aliased_rtt_surface); - } + bool store = true; auto It = m_depth_stencil_storage.find(address); if (It != m_depth_stencil_storage.end()) { surface_storage_type &ds = It->second; - if (Traits::ds_has_format_width_height(ds, depth_format, width, height)) - { - if (Traits::surface_is_pitch_compatible(ds, pitch)) - Traits::notify_surface_persist(ds); - else - Traits::invalidate_surface_contents(command_list, Traits::get(ds), nullptr, address, pitch); + const bool pitch_compatible = Traits::surface_is_pitch_compatible(ds, pitch); - Traits::prepare_ds_for_drawing(command_list, Traits::get(ds)); - return Traits::get(ds); + if (pitch_compatible) + { + const u8 bpp = (depth_format == rsx::surface_depth_format::z16)? 2 : 4; + split_surface_region(command_list, address, Traits::get(ds), (u16)width, (u16)height, bpp, antialias); } - old_surface = Traits::get(ds); - old_surface_storage = std::move(ds); - m_depth_stencil_storage.erase(address); + if (Traits::ds_has_format_width_height(ds, depth_format, width, height)) + { + if (pitch_compatible) + Traits::notify_surface_persist(ds); + else + Traits::invalidate_surface_contents(command_list, Traits::get(ds), address, pitch); + + Traits::prepare_ds_for_drawing(command_list, Traits::get(ds)); + new_surface = Traits::get(ds); + store = false; + } + else + { + old_surface = Traits::get(ds); + old_surface_storage = std::move(ds); + m_depth_stencil_storage.erase(address); + } } - // Range test - const auto aa_factor_v = get_aa_factor_v(antialias); - rsx::address_range range = rsx::address_range::start_length(address, u32(pitch * height * aa_factor_v)); - m_depth_stencil_memory_range = range.get_min_max(m_depth_stencil_memory_range); - - // Select source of original data if any - auto contents_to_copy = old_surface == nullptr ? convert_surface : old_surface; - - //Search invalidated resources for a suitable surface - for (auto It = invalidated_resources.begin(); It != invalidated_resources.end(); It++) + if (!new_surface) { - auto &ds = *It; - if (Traits::ds_has_format_width_height(ds, depth_format, width, height, true)) + // Range test + const auto aa_factor_v = get_aa_factor_v(antialias); + rsx::address_range range = rsx::address_range::start_length(address, u32(pitch * height * aa_factor_v)); + m_depth_stencil_memory_range = range.get_min_max(m_depth_stencil_memory_range); + + //Search invalidated resources for a suitable surface + for (auto It = invalidated_resources.begin(); It != invalidated_resources.end(); It++) { - new_surface_storage = std::move(ds); - - if (old_surface) + auto &ds = *It; + if (Traits::ds_has_format_width_height(ds, depth_format, width, height, true)) { - //Exchange this surface with the invalidated one - Traits::notify_surface_invalidated(old_surface_storage); - ds = std::move(old_surface_storage); - } - else - invalidated_resources.erase(It); + new_surface_storage = std::move(ds); - new_surface = Traits::get(new_surface_storage); - Traits::prepare_ds_for_drawing(command_list, new_surface); - Traits::invalidate_surface_contents(command_list, new_surface, contents_to_copy, address, pitch); - break; + if (old_surface) + { + //Exchange this surface with the invalidated one + Traits::notify_surface_invalidated(old_surface_storage); + ds = std::move(old_surface_storage); + } + else + invalidated_resources.erase(It); + + new_surface = Traits::get(new_surface_storage); + Traits::prepare_ds_for_drawing(command_list, new_surface); + Traits::invalidate_surface_contents(command_list, new_surface, address, pitch); + break; + } } } @@ -568,15 +868,45 @@ namespace rsx invalidated_resources.push_back(std::move(old_surface_storage)); } - if (new_surface != nullptr) + if (!new_surface) { - //New surface was found among existing surfaces + m_depth_stencil_storage[address] = Traits::create_new_surface(address, depth_format, width, height, pitch, std::forward(extra_params)...); + new_surface = Traits::get(m_depth_stencil_storage[address]); + } + else if (store) + { + // New surface was found among invalidated surfaces m_depth_stencil_storage[address] = std::move(new_surface_storage); - return new_surface; } - m_depth_stencil_storage[address] = Traits::create_new_surface(address, depth_format, width, height, pitch, contents_to_copy, std::forward(extra_params)...); - return Traits::get(m_depth_stencil_storage[address]); +#ifndef INCOMPLETE_SURFACE_CACHE_IMPL + // Check if old_surface is 'new' and avoid intersection + if (old_surface && old_surface->last_use_tag == write_tag) + { + new_surface->set_old_contents(old_surface); + } + else +#endif + { + intersect_surface_region(command_list, address, new_surface); + } + + // Remove and preserve if possible any overlapping/replaced color surface + auto aliased_rtt_surface = m_render_targets_storage.find(address); + if (aliased_rtt_surface != m_render_targets_storage.end()) + { + if (Traits::surface_is_pitch_compatible(aliased_rtt_surface->second, pitch)) + { + const u8 bpp = (depth_format == rsx::surface_depth_format::z16) ? 2 : 4; + split_surface_region(command_list, address, Traits::get(aliased_rtt_surface->second), (u16)width, (u16)height, bpp, antialias); + } + + Traits::notify_surface_invalidated(aliased_rtt_surface->second); + invalidated_resources.push_back(std::move(aliased_rtt_surface->second)); + m_render_targets_storage.erase(aliased_rtt_surface); + } + + return new_surface; } public: /** @@ -600,14 +930,13 @@ namespace rsx // u32 clip_y = clip_vertical_reg; cache_tag = rsx::get_shared_tag(); - m_memory_tree.clear(); // Make previous RTTs sampleable - for (std::tuple &rtt : m_bound_render_targets) + for (auto &rtt : m_bound_render_targets) { if (std::get<1>(rtt) != nullptr) Traits::prepare_rtt_for_sampling(command_list, std::get<1>(rtt)); - rtt = std::make_tuple(0, nullptr); + rtt = std::make_pair(0, nullptr); } // Create/Reuse requested rtts @@ -616,7 +945,7 @@ namespace rsx if (surface_addresses[surface_index] == 0) continue; - m_bound_render_targets[surface_index] = std::make_tuple(surface_addresses[surface_index], + m_bound_render_targets[surface_index] = std::make_pair(surface_addresses[surface_index], bind_address_as_render_targets(command_list, surface_addresses[surface_index], color_format, antialias, clip_width, clip_height, surface_pitch[surface_index], std::forward(extra_params)...)); } @@ -625,12 +954,12 @@ namespace rsx if (std::get<1>(m_bound_depth_stencil) != nullptr) Traits::prepare_ds_for_sampling(command_list, std::get<1>(m_bound_depth_stencil)); - m_bound_depth_stencil = std::make_tuple(0, nullptr); + m_bound_depth_stencil = std::make_pair(0, nullptr); if (!address_z) return; - m_bound_depth_stencil = std::make_tuple(address_z, + m_bound_depth_stencil = std::make_pair(address_z, bind_address_as_depth_stencil(command_list, address_z, depth_format, antialias, clip_width, clip_height, zeta_pitch, std::forward(extra_params)...)); } @@ -1054,49 +1383,48 @@ namespace rsx { write_tag = cache_tag; } - } - if (memory_tag != cache_tag) - { - generate_render_target_memory_tree(); - memory_tag = cache_tag; - } - - if (!m_memory_tree.empty()) - { - for (auto &e : m_memory_tree) + // Tag all available surfaces + for (int i = 0; i < m_bound_render_targets.size(); ++i) { - if (address && e.memory_address != address) + // Usually only 1 or 2 buffers are bound anyway + if (LIKELY(!m_bound_render_targets[i].first)) + { + if (i) break; + + // B-surface binding + continue; + } + + m_bound_render_targets[i].second->on_write(write_tag); + } + + if (m_bound_depth_stencil.first) + { + m_bound_depth_stencil.second->on_write(write_tag); + } + } + else + { + for (int i = 0; i < m_bound_render_targets.size(); ++i) + { + if (LIKELY(!m_bound_render_targets[i].first)) + { + if (i) break; + continue; + } + + if (m_bound_render_targets[i].first != address) { continue; } - for (auto &entry : e.overlapping_set) - { - // GPU-side contents changed - entry._ref->dirty = true; - } - } - } - - for (auto &rtt : m_bound_render_targets) - { - if (address && std::get<0>(rtt) != address) - { - continue; + m_bound_render_targets[i].second->on_write(write_tag); } - if (auto surface = std::get<1>(rtt)) + if (m_bound_depth_stencil.first == address) { - surface->on_write(write_tag); - } - } - - if (auto ds = std::get<1>(m_bound_depth_stencil)) - { - if (!address || std::get<0>(m_bound_depth_stencil) == address) - { - ds->on_write(write_tag); + m_bound_depth_stencil.second->on_write(write_tag); } } } diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 66fc91d5cf..3db9be7fda 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -1750,8 +1750,11 @@ namespace rsx // Intersect this resource with the original one const auto section_bpp = get_format_block_size_in_bytes(section->get_gcm_format()); const auto normalized_width = (section->get_width() * section_bpp) / bpp; - const auto clipped = rsx::intersect_region(address, slice_w, slice_h, bpp, - section->get_section_base(), normalized_width, section->get_height(), section_bpp, pitch); + + const auto clipped = rsx::intersect_region( + section->get_section_base(), normalized_width, section->get_height(), section_bpp, /* parent region (extractee) */ + address, slice_w, slice_h, bpp, /* child region (extracted) */ + pitch); // Rect intersection test // TODO: Make the intersection code cleaner with proper 2D regions diff --git a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h index 07ed7a9b79..e54558d870 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12RenderTargetSets.h @@ -1,5 +1,7 @@ #pragma once +#define INCOMPLETE_SURFACE_CACHE_IMPL + #include #include #include "d3dx12.h" @@ -25,7 +27,6 @@ struct render_target_traits ComPtr create_new_surface( u32 address, surface_color_format color_format, size_t width, size_t height, size_t /*pitch*/, - ID3D12Resource* /*old*/, ID3D12Device* device, const std::array &clear_color, float, u8) { DXGI_FORMAT dxgi_format = get_color_surface_format(color_format); @@ -86,7 +87,6 @@ struct render_target_traits ComPtr create_new_surface( u32 address, surface_depth_format surfaceDepthFormat, size_t width, size_t height, size_t /*pitch*/, - ID3D12Resource* /*old*/, ID3D12Device* device, const std::array& , float clear_depth, u8 clear_stencil) { D3D12_CLEAR_VALUE clear_depth_value = {}; @@ -131,7 +131,7 @@ struct render_target_traits static void invalidate_surface_contents( ID3D12GraphicsCommandList*, - ID3D12Resource*, ID3D12Resource*, + ID3D12Resource*, u32, size_t) {} diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index b80d5c9a56..e09c79c6a6 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -214,19 +214,19 @@ void GLGSRender::end() gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); // Handle special memory barrier for ARGB8->D24S8 in an active DSV - if (ds && ds->old_contents != nullptr && - ds->old_contents->get_internal_format() == gl::texture::internal_format::rgba8 && - rsx::pitch_compatible(ds, static_cast(ds->old_contents))) + if (ds && ds->old_contents && + ds->old_contents.source->get_internal_format() == gl::texture::internal_format::rgba8 && + rsx::pitch_compatible(ds, gl::as_rtt(ds->old_contents.source))) { gl_state.enable(GL_FALSE, GL_SCISSOR_TEST); // TODO: Stencil transfer gl::g_hw_blitter->fast_clear_image(cmd, ds, 1.f, 0xFF); + ds->old_contents.init_transfer(ds); - const auto region = rsx::get_transferable_region(ds); - m_depth_converter.run({0, 0, std::get<0>(region), std::get<1>(region)}, - {0, 0, std::get<2>(region), std::get<3>(region)}, - ds->old_contents, ds); + m_depth_converter.run(ds->old_contents.src_rect(), + ds->old_contents.dst_rect(), + ds->old_contents.source, ds); ds->on_write(); } @@ -400,7 +400,7 @@ void GLGSRender::end() std::chrono::time_point draw_start = textures_end; // Optionally do memory synchronization if the texture stage has not yet triggered this - if (g_cfg.video.strict_rendering_mode) + if (1)//g_cfg.video.strict_rendering_mode) { gl_state.enable(GL_FALSE, GL_SCISSOR_TEST); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index c363857487..9a83ece1fe 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -215,7 +215,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk return; } - m_rtts.prepare_render_target(nullptr, + gl::command_context cmd{ gl_state }; + m_rtts.prepare_render_target(cmd, layout.color_format, layout.depth_format, layout.width, layout.height, layout.target, layout.aa_mode, @@ -234,8 +235,6 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk const u8 color_bpp = get_format_block_size_in_bytes(layout.color_format); const u8 depth_bpp = (layout.depth_format == rsx::surface_depth_format::z16 ? 2 : 4); - gl::command_context cmd{ gl_state }; - for (int i = 0; i < rsx::limits::color_buffers_count; ++i) { if (m_surface_info[i].pitch && g_cfg.video.write_color_buffers) @@ -606,7 +605,7 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init return; } - auto src_texture = static_cast(old_contents); + auto src_texture = gl::as_rtt(old_contents.source); if (!rsx::pitch_compatible(this, src_texture)) { LOG_TRACE(RSX, "Pitch mismatch, could not transfer inherited memory"); @@ -617,8 +616,6 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init const auto dst_bpp = get_bpp(); rsx::typeless_xfer typeless_info{}; - const auto region = rsx::get_transferable_region(this); - if (get_internal_format() == src_texture->get_internal_format()) { // Copy data from old contents onto this one @@ -639,9 +636,11 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init } const bool dst_is_depth = !!(aspect() & gl::image_aspect::depth); - gl::g_hw_blitter->scale_image(cmd, old_contents, this, - { 0, 0, std::get<0>(region), std::get<1>(region) }, - { 0, 0, std::get<2>(region) , std::get<3>(region) }, + old_contents.init_transfer(this); + + gl::g_hw_blitter->scale_image(cmd, old_contents.source, this, + old_contents.src_rect(), + old_contents.dst_rect(), !dst_is_depth, dst_is_depth, typeless_info); // Memory has been transferred, discard old contents and update memory flags diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index c2bd0291d4..73a9899917 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -149,21 +149,26 @@ namespace gl { using gl::fbo::fbo; }; + + static inline gl::render_target* as_rtt(gl::texture* t) + { + return reinterpret_cast(t); + } } struct gl_render_target_traits { using surface_storage_type = std::unique_ptr; using surface_type = gl::render_target*; - using command_list_type = void*; + using command_list_type = gl::command_context&; using download_buffer_object = std::vector; + using barrier_descriptor_t = rsx::deferred_clipped_region; static std::unique_ptr create_new_surface( u32 address, rsx::surface_color_format surface_color_format, - size_t width, size_t height, size_t pitch, - gl::render_target* old_surface + size_t width, size_t height, size_t pitch ) { auto format = rsx::internals::surface_color_format_to_gl(surface_color_format); @@ -176,7 +181,6 @@ struct gl_render_target_traits std::array native_layout = { (GLenum)format.swizzle.a, (GLenum)format.swizzle.r, (GLenum)format.swizzle.g, (GLenum)format.swizzle.b }; result->set_native_component_layout(native_layout); - result->set_old_contents(old_surface); result->set_cleared(false); result->queue_tag(address); @@ -187,8 +191,7 @@ struct gl_render_target_traits std::unique_ptr create_new_surface( u32 address, rsx::surface_depth_format surface_depth_format, - size_t width, size_t height, size_t pitch, - gl::render_target* old_surface + size_t width, size_t height, size_t pitch ) { auto format = rsx::internals::surface_depth_format_to_gl(surface_depth_format); @@ -203,13 +206,47 @@ struct gl_render_target_traits result->set_native_pitch(native_pitch); result->set_surface_dimensions((u16)width, (u16)height, (u16)pitch); result->set_native_component_layout(native_layout); - result->set_old_contents(old_surface); result->set_cleared(false); result->queue_tag(address); return result; } + static + void clone_surface( + gl::command_context&, + std::unique_ptr& sink, gl::render_target* ref, + u32 address, barrier_descriptor_t& prev) + { + if (!sink) + { + auto internal_format = (GLenum)ref->get_internal_format(); + const auto new_w = rsx::apply_resolution_scale(prev.width, true, ref->get_surface_width()); + const auto new_h = rsx::apply_resolution_scale(prev.height, true, ref->get_surface_height()); + + sink.reset(new gl::render_target(new_w, new_h, internal_format)); + } + + prev.target = sink.get(); + + sink->set_native_pitch(prev.width * ref->get_bpp()); + sink->set_surface_dimensions(prev.width, prev.height, ref->get_rsx_pitch()); + sink->set_native_component_layout(ref->get_native_component_layout()); + sink->queue_tag(address); + sink->sync_tag(); + sink->set_old_contents_region(prev, false); + sink->set_cleared(false); + sink->last_use_tag = ref->last_use_tag; + } + + static + bool is_compatible_surface(const gl::render_target* surface, const gl::render_target* ref, u16 width, u16 height, u8 /*sample_count*/) + { + return (surface->get_internal_format() == ref->get_internal_format() && + surface->get_surface_width() == width && + surface->get_surface_height() == height); + } + static void get_surface_info(gl::render_target *surface, rsx::surface_format_info *info) { @@ -220,11 +257,11 @@ struct gl_render_target_traits info->bpp = surface->get_bpp(); } - static void prepare_rtt_for_drawing(void *, gl::render_target *rtt) { rtt->reset_refs(); } - static void prepare_rtt_for_sampling(void *, gl::render_target*) {} + static void prepare_rtt_for_drawing(gl::command_context&, gl::render_target *rtt) { rtt->reset_refs(); } + static void prepare_rtt_for_sampling(gl::command_context&, gl::render_target*) {} - static void prepare_ds_for_drawing(void *, gl::render_target *ds) { ds->reset_refs(); } - static void prepare_ds_for_sampling(void *, gl::render_target*) {} + static void prepare_ds_for_drawing(gl::command_context&, gl::render_target *ds) { ds->reset_refs(); } + static void prepare_ds_for_sampling(gl::command_context&, gl::render_target*) {} static bool surface_is_pitch_compatible(const std::unique_ptr &surface, size_t pitch) @@ -233,10 +270,9 @@ struct gl_render_target_traits } static - void invalidate_surface_contents(void *, gl::render_target *surface, gl::render_target* old_surface, u32 address, size_t pitch) + void invalidate_surface_contents(gl::command_context&, gl::render_target *surface, u32 address, size_t pitch) { surface->set_rsx_pitch((u16)pitch); - surface->set_old_contents(old_surface); surface->reset_aa_mode(); surface->queue_tag(address); surface->set_cleared(false); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index a2d2149395..7037002ec6 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1459,8 +1459,8 @@ void VKGSRender::end() // Check for data casts auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); if (ds && ds->old_contents && - ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM && - rsx::pitch_compatible(ds, static_cast(ds->old_contents))) + ds->old_contents.source->info.format == VK_FORMAT_B8G8R8A8_UNORM && + rsx::pitch_compatible(ds, vk::as_rtt(ds->old_contents.source))) { auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0); auto render_pass = m_render_passes[rp]; @@ -1475,11 +1475,11 @@ void VKGSRender::end() vk::change_image_layout(*m_current_command_buffer, ds, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range); // TODO: Stencil transfer - const auto region = rsx::get_transferable_region(ds); + ds->old_contents.init_transfer(ds); m_depth_converter->run(*m_current_command_buffer, - { 0, 0, std::get<0>(region), std::get<1>(region) }, - { 0, 0, std::get<2>(region), std::get<3>(region) }, - static_cast(ds->old_contents)->get_view(0xAAE4, rsx::default_remap_vector), + ds->old_contents.src_rect(), + ds->old_contents.dst_rect(), + vk::as_rtt(ds->old_contents.source)->get_view(0xAAE4, rsx::default_remap_vector), ds, render_pass, m_framebuffers_to_clean); // TODO: Flush management to avoid pass running out of ubo space (very unlikely) @@ -1827,7 +1827,7 @@ void VKGSRender::end() } // Apply write memory barriers - if (g_cfg.video.strict_rendering_mode) + if (1)//g_cfg.video.strict_rendering_mode) { if (ds) ds->write_barrier(*m_current_command_buffer); @@ -2976,13 +2976,13 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) return; } - m_rtts.prepare_render_target(&*m_current_command_buffer, + m_rtts.prepare_render_target(*m_current_command_buffer, layout.color_format, layout.depth_format, layout.width, layout.height, layout.target, layout.aa_mode, layout.color_addresses, layout.zeta_address, layout.actual_color_pitch, layout.actual_zeta_pitch, - (*m_device), &*m_current_command_buffer); + (*m_device), *m_current_command_buffer); // Reset framebuffer information VkFormat old_format = VK_FORMAT_UNDEFINED; diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 7288e7d540..d58454ab0f 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -99,7 +99,7 @@ namespace vk return; } - auto src_texture = static_cast(old_contents); + auto src_texture = static_cast(old_contents.source); if (!rsx::pitch_compatible(this, src_texture)) { LOG_TRACE(RSX, "Pitch mismatch, could not transfer inherited memory"); @@ -110,8 +110,6 @@ namespace vk const auto dst_bpp = get_bpp(); rsx::typeless_xfer typeless_info{}; - const auto region = rsx::get_transferable_region(this); - if (src_texture->info.format == info.format) { verify(HERE), src_bpp == dst_bpp; @@ -130,9 +128,11 @@ namespace vk } vk::blitter hw_blitter; - hw_blitter.scale_image(cmd, old_contents, this, - { 0, 0, std::get<0>(region), std::get<1>(region) }, - { 0, 0, std::get<2>(region) , std::get<3>(region) }, + old_contents.init_transfer(this); + + hw_blitter.scale_image(cmd, old_contents.source, this, + old_contents.src_rect(), + old_contents.dst_rect(), /*linear?*/false, /*depth?(unused)*/false, typeless_info); on_write(); @@ -152,6 +152,11 @@ namespace vk : framebuffer(dev, pass, width, height, std::move(atts)) {} }; + + static inline vk::render_target* as_rtt(vk::image* t) + { + return static_cast(t); + } } namespace rsx @@ -160,15 +165,15 @@ namespace rsx { using surface_storage_type = std::unique_ptr; using surface_type = vk::render_target*; - using command_list_type = vk::command_buffer*; + using command_list_type = vk::command_buffer&; using download_buffer_object = void*; + using barrier_descriptor_t = rsx::deferred_clipped_region; static std::unique_ptr create_new_surface( u32 address, surface_color_format format, size_t width, size_t height, size_t pitch, - vk::render_target* old_surface, - vk::render_device &device, vk::command_buffer *cmd) + vk::render_device &device, vk::command_buffer& cmd) { auto fmt = vk::get_compatible_surface_format(format); VkFormat requested_format = fmt.first; @@ -185,14 +190,13 @@ namespace rsx VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT|VK_IMAGE_USAGE_TRANSFER_DST_BIT|VK_IMAGE_USAGE_SAMPLED_BIT, 0)); - change_image_layout(*cmd, rtt.get(), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)); + change_image_layout(cmd, rtt.get(), VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT)); rtt->native_component_map = fmt.second; rtt->rsx_pitch = (u16)pitch; rtt->native_pitch = (u16)width * get_format_block_size_in_bytes(format); rtt->surface_width = (u16)width; rtt->surface_height = (u16)height; - rtt->set_old_contents(old_surface); rtt->queue_tag(address); rtt->dirty = true; @@ -203,8 +207,7 @@ namespace rsx u32 address, surface_depth_format format, size_t width, size_t height, size_t pitch, - vk::render_target* old_surface, - vk::render_device &device, vk::command_buffer *cmd) + vk::render_device &device, vk::command_buffer& cmd) { VkFormat requested_format = vk::get_compatible_depth_surface_format(device.get_formats_support(), format); VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, VK_IMAGE_ASPECT_DEPTH_BIT); @@ -227,7 +230,7 @@ namespace rsx 0)); ds->native_component_map = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; - change_image_layout(*cmd, ds.get(), VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range); + change_image_layout(cmd, ds.get(), VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range); ds->native_pitch = (u16)width * 2; if (format == rsx::surface_depth_format::z24s8) @@ -237,13 +240,58 @@ namespace rsx ds->rsx_pitch = (u16)pitch; ds->surface_width = (u16)width; ds->surface_height = (u16)height; - ds->set_old_contents(old_surface); ds->queue_tag(address); ds->dirty = true; return ds; } + static void clone_surface( + vk::command_buffer& cmd, + std::unique_ptr& sink, vk::render_target* ref, + u32 address, barrier_descriptor_t& prev) + { + if (!sink) + { + const auto new_w = rsx::apply_resolution_scale(prev.width, true, ref->get_surface_width()); + const auto new_h = rsx::apply_resolution_scale(prev.height, true, ref->get_surface_height()); + + auto& dev = cmd.get_command_pool().get_owner(); + sink.reset(new vk::render_target(dev, dev.get_memory_mapping().device_local, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + VK_IMAGE_TYPE_2D, + ref->format(), + new_w, new_h, 1, 1, 1, + VK_SAMPLE_COUNT_1_BIT, + VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_TILING_OPTIMAL, + ref->info.usage, + ref->info.flags)); + } + + prev.target = sink.get(); + + sink->native_component_map = ref->native_component_map; + sink->rsx_pitch = ref->get_rsx_pitch(); + sink->native_pitch = u16(prev.width * ref->get_bpp()); + sink->surface_width = prev.width; + sink->surface_height = prev.height; + sink->queue_tag(address); + sink->sync_tag(); + sink->set_old_contents_region(prev, false); + sink->dirty = true; + sink->last_use_tag = ref->last_use_tag; + + change_image_layout(cmd, sink.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + + static bool is_compatible_surface(const vk::render_target* surface, const vk::render_target* ref, u16 width, u16 height, u8 /*sample_count*/) + { + return (surface->format() == ref->format() && + surface->get_surface_width() == width && + surface->get_surface_height() == height); + } + static void get_surface_info(vk::render_target *surface, rsx::surface_format_info *info) { info->rsx_pitch = surface->rsx_pitch; @@ -253,36 +301,36 @@ namespace rsx info->bpp = surface->get_bpp(); } - static void prepare_rtt_for_drawing(vk::command_buffer* pcmd, vk::render_target *surface) + static void prepare_rtt_for_drawing(vk::command_buffer& cmd, vk::render_target *surface) { VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, surface->attachment_aspect_flag); - change_image_layout(*pcmd, surface, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, range); + change_image_layout(cmd, surface, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, range); //Reset deref count surface->deref_count = 0; surface->frame_tag = 0; } - static void prepare_rtt_for_sampling(vk::command_buffer* pcmd, vk::render_target *surface) + static void prepare_rtt_for_sampling(vk::command_buffer& cmd, vk::render_target *surface) { VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, surface->attachment_aspect_flag); - change_image_layout(*pcmd, surface, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, range); + change_image_layout(cmd, surface, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, range); } - static void prepare_ds_for_drawing(vk::command_buffer* pcmd, vk::render_target *surface) + static void prepare_ds_for_drawing(vk::command_buffer& cmd, vk::render_target *surface) { VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, surface->attachment_aspect_flag); - change_image_layout(*pcmd, surface, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range); + change_image_layout(cmd, surface, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, range); //Reset deref count surface->deref_count = 0; surface->frame_tag = 0; } - static void prepare_ds_for_sampling(vk::command_buffer* pcmd, vk::render_target *surface) + static void prepare_ds_for_sampling(vk::command_buffer& cmd, vk::render_target *surface) { VkImageSubresourceRange range = vk::get_image_subresource_range(0, 0, 1, 1, surface->attachment_aspect_flag); - change_image_layout(*pcmd, surface, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, range); + change_image_layout(cmd, surface, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, range); } static bool surface_is_pitch_compatible(const std::unique_ptr &surface, size_t pitch) @@ -290,10 +338,9 @@ namespace rsx return surface->rsx_pitch == pitch; } - static void invalidate_surface_contents(vk::command_buffer* /*pcmd*/, vk::render_target *surface, vk::render_target *old_surface, u32 address, size_t pitch) + static void invalidate_surface_contents(vk::command_buffer& /*cmd*/, vk::render_target *surface, u32 address, size_t pitch) { surface->rsx_pitch = (u16)pitch; - surface->set_old_contents(old_surface); surface->reset_aa_mode(); surface->queue_tag(address); surface->dirty = true; diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index e1198a22a3..a95858c1bc 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -470,32 +470,35 @@ namespace rsx return std::make_tuple(x, y, width, height); } + /** + * Extracts from 'parent' a region that fits in 'child' + */ static inline std::tuple intersect_region( - u32 dst_address, u16 dst_w, u16 dst_h, u16 dst_bpp, - u32 src_address, u16 src_w, u16 src_h, u32 src_bpp, + u32 parent_address, u16 parent_w, u16 parent_h, u16 parent_bpp, + u32 child_address, u16 child_w, u16 child_h, u32 child_bpp, u32 pitch) { - if (src_address < dst_address) + if (child_address < parent_address) { - const auto offset = dst_address - src_address; - const auto src_y = (offset / pitch); - const auto src_x = (offset % pitch) / src_bpp; - const auto dst_x = 0u; - const auto dst_y = 0u; - const auto w = std::min(dst_w, src_w - src_x); - const auto h = std::min(dst_h, src_h - src_y); + const auto offset = parent_address - child_address; + const auto src_x = 0u; + const auto src_y = 0u; + const auto dst_y = (offset / pitch); + const auto dst_x = (offset % pitch) / child_bpp; + const auto w = std::min(parent_w, child_w - dst_x); + const auto h = std::min(parent_h, child_h - dst_y); return std::make_tuple({ src_x, src_y }, { dst_x, dst_y }, { w, h }); } else { - const auto offset = src_address - dst_address; - const auto src_x = 0u; - const auto src_y = 0u; - const auto dst_y = (offset / pitch); - const auto dst_x = (offset % pitch) / dst_bpp; - const auto w = std::min(src_w, dst_w - dst_x); - const auto h = std::min(src_h, dst_h - dst_y); + const auto offset = child_address - parent_address; + const auto src_y = (offset / pitch); + const auto src_x = (offset % pitch) / parent_bpp; + const auto dst_x = 0u; + const auto dst_y = 0u; + const auto w = std::min(child_w, parent_w - src_x); + const auto h = std::min(child_h, parent_h - src_y); return std::make_tuple({ src_x, src_y }, { dst_x, dst_y }, { w, h }); } @@ -511,10 +514,14 @@ namespace rsx return g_cfg.video.strict_rendering_mode ? 100 : g_cfg.video.resolution_scale_percent; } - static inline const u16 apply_resolution_scale(u16 value, bool clamp) + static inline const u16 apply_resolution_scale(u16 value, bool clamp, u16 ref = 0) { - if (value <= g_cfg.video.min_scalable_dimension) + if (ref == 0) + ref = value; + + if (ref <= g_cfg.video.min_scalable_dimension) return value; + else if (clamp) return (u16)std::max((get_resolution_scale_percent() * value) / 100, 1); else @@ -541,14 +548,14 @@ namespace rsx * Returns */ template - std::tuple get_transferable_region(SurfaceType* surface) + std::tuple get_transferable_region(const SurfaceType* surface) { - const u16 src_w = surface->old_contents->width(); - const u16 src_h = surface->old_contents->height(); + const u16 src_w = surface->old_contents.source->width(); + const u16 src_h = surface->old_contents.source->height(); u16 dst_w = src_w; u16 dst_h = src_h; - switch (static_cast(surface->old_contents)->read_aa_mode) + switch (static_cast(surface->old_contents.source)->read_aa_mode) { case rsx::surface_antialiasing::center_1_sample: break; @@ -584,7 +591,7 @@ namespace rsx } template - inline bool pitch_compatible(SurfaceType* a, SurfaceType* b) + inline bool pitch_compatible(const SurfaceType* a, const SurfaceType* b) { if (a->get_surface_height() == 1 || b->get_surface_height() == 1) return true; @@ -593,7 +600,7 @@ namespace rsx } template - inline bool pitch_compatible(SurfaceType* surface, u16 pitch_required, u16 height_required) + inline bool pitch_compatible(const SurfaceType* surface, u16 pitch_required, u16 height_required) { if constexpr (__is_surface) {