diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index ec853a23c2..55842dd5bf 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -7,7 +7,7 @@ namespace rsx { - enum texture_upload_context + enum texture_upload_context : u32 { shader_read = 1, blit_engine_src = 2, @@ -15,7 +15,7 @@ namespace rsx framebuffer_storage = 8 }; - enum texture_colorspace + enum texture_colorspace : u32 { rgb_linear = 0, srgb_nonlinear = 1 diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 45f09a78d7..c377c0c9bd 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -108,6 +108,7 @@ namespace rsx virtual u16 get_surface_height() const = 0; virtual u16 get_rsx_pitch() const = 0; virtual u16 get_native_pitch() const = 0; + virtual bool is_depth_surface() const = 0; void save_aa_mode() { @@ -216,6 +217,9 @@ namespace rsx std::unordered_map m_render_targets_storage = {}; std::unordered_map m_depth_stencil_storage = {}; + rsx::address_range m_render_targets_memory_range; + rsx::address_range m_depth_stencil_memory_range; + public: std::array, 4> m_bound_render_targets = {}; std::tuple m_bound_depth_stencil = {}; @@ -323,7 +327,8 @@ namespace rsx surface_type bind_address_as_render_targets( command_list_type command_list, u32 address, - surface_color_format color_format, size_t width, size_t height, + surface_color_format color_format, + size_t width, size_t height, size_t pitch, Args&&... extra_params) { // TODO: Fix corner cases @@ -360,6 +365,10 @@ namespace rsx m_render_targets_storage.erase(address); } + // Range test + rsx::address_range range = rsx::address_range::start_length(address, u32(pitch * height)); + m_render_targets_memory_range = range.get_min_max(m_render_targets_memory_range); + // Select source of original data if any auto contents_to_copy = old_surface == nullptr ? convert_surface : old_surface; @@ -410,7 +419,8 @@ namespace rsx surface_type bind_address_as_depth_stencil( command_list_type command_list, u32 address, - surface_depth_format depth_format, size_t width, size_t height, + surface_depth_format depth_format, + size_t width, size_t height, size_t pitch, Args&&... extra_params) { surface_storage_type old_surface_storage; @@ -445,6 +455,10 @@ namespace rsx m_depth_stencil_storage.erase(address); } + // Range test + rsx::address_range range = rsx::address_range::start_length(address, u32(pitch * height)); + m_depth_stencil_memory_range = range.get_min_max(m_depth_stencil_memory_range); + // Select source of original data if any auto contents_to_copy = old_surface == nullptr ? convert_surface : old_surface; @@ -525,8 +539,9 @@ namespace rsx if (surface_addresses[surface_index] == 0) continue; + const auto pitch = clip_width * 4; // TODO m_bound_render_targets[surface_index] = std::make_tuple(surface_addresses[surface_index], - bind_address_as_render_targets(command_list, surface_addresses[surface_index], color_format, clip_width, clip_height, std::forward(extra_params)...)); + bind_address_as_render_targets(command_list, surface_addresses[surface_index], color_format, clip_width, clip_height, pitch, std::forward(extra_params)...)); } // Same for depth buffer @@ -538,8 +553,10 @@ namespace rsx if (!address_z) return; + // TODO + const auto pitch = (depth_format == rsx::surface_depth_format::z16) ? clip_width * 2 : clip_width * 4; m_bound_depth_stencil = std::make_tuple(address_z, - bind_address_as_depth_stencil(command_list, address_z, depth_format, clip_width, clip_height, std::forward(extra_params)...)); + bind_address_as_depth_stencil(command_list, address_z, depth_format, clip_width, clip_height, pitch, std::forward(extra_params)...)); } /** @@ -566,6 +583,19 @@ namespace rsx return surface_type(); } + surface_type get_surface_at(u32 address) + { + auto It = m_render_targets_storage.find(address); + if (It != m_render_targets_storage.end()) + return Traits::get(It->second); + + auto _It = m_depth_stencil_storage.find(address); + if (_It != m_depth_stencil_storage.end()) + return Traits::get(_It->second); + + fmt::throw_exception("Unreachable" HERE); + } + /** * Get bound color surface raw data. */ @@ -749,7 +779,7 @@ namespace rsx */ void invalidate_surface_address(u32 addr, bool depth) { - if (address_is_bound(addr, depth)) + if (address_is_bound(addr)) { LOG_ERROR(RSX, "Cannot invalidate a currently bound render target!"); return; @@ -862,14 +892,8 @@ namespace rsx return (offset < range); } - bool address_is_bound(u32 address, bool is_depth) const + bool address_is_bound(u32 address) const { - if (is_depth) - { - const u32 bound_depth_address = std::get<0>(m_bound_depth_stencil); - return (bound_depth_address == address); - } - for (auto &surface : m_bound_render_targets) { const u32 bound_address = std::get<0>(surface); @@ -877,6 +901,9 @@ namespace rsx return true; } + if (std::get<0>(m_bound_depth_stencil) == address) + return true; + return false; } @@ -966,7 +993,7 @@ namespace rsx } if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped)) - return{ this_address, surface, x_offset, y_offset, w, h, address_is_bound(this_address, false), false, clipped }; + return{ this_address, surface, x_offset, y_offset, w, h, address_is_bound(this_address), false, clipped }; } } @@ -992,16 +1019,18 @@ namespace rsx } if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped)) - return{ this_address, surface, x_offset, y_offset, w, h, address_is_bound(this_address, true), true, clipped }; + return{ this_address, surface, x_offset, y_offset, w, h, address_is_bound(this_address), true, clipped }; } } return{}; } - std::vector get_merged_texture_memory_region(u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch, u32 bpp) + template + std::vector get_merged_texture_memory_region(commandbuffer_type& cmd, u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch, u32 bpp) { std::vector result; + std::vector> dirty; const u32 limit = texaddr + (required_pitch * required_height); auto process_list_function = [&](std::unordered_map& data, bool is_depth) @@ -1021,6 +1050,12 @@ namespace rsx if ((this_address + texture_size) <= texaddr) continue; + if (surface->read_barrier(cmd); !surface->test()) + { + dirty.emplace_back(this_address, is_depth); + continue; + } + surface_overlap_info info; info.surface = surface; info.is_depth = is_depth; @@ -1050,8 +1085,27 @@ namespace rsx } }; - process_list_function(m_render_targets_storage, false); - process_list_function(m_depth_stencil_storage, true); + // Range test helper to quickly discard blocks + // Fortunately, render targets tend to be clustered anyway + rsx::address_range test = rsx::address_range::start_end(texaddr, limit-1); + + if (test.overlaps(m_render_targets_memory_range)) + { + process_list_function(m_render_targets_storage, false); + } + + if (test.overlaps(m_depth_stencil_memory_range)) + { + process_list_function(m_depth_stencil_storage, true); + } + + if (!dirty.empty()) + { + for (const auto& p : dirty) + { + invalidate_surface_address(p.first, p.second); + } + } if (result.size() > 1) { diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 576cd76d4b..0216a639b1 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -154,6 +154,7 @@ namespace rsx enum deferred_request_command : u32 { + nop = 0, copy_image_static, copy_image_dynamic, cubemap_gather, @@ -169,7 +170,7 @@ namespace rsx image_resource_type external_handle = 0; std::vector sections_to_copy; texture_channel_remap_t remap; - deferred_request_command op; + deferred_request_command op = deferred_request_command::nop; u32 base_address = 0; u32 gcm_format = 0; u16 x = 0; @@ -235,6 +236,26 @@ namespace rsx image_type = type; } + void simplify() + { + // Optimizations in the straightforward methods copy_image_static and copy_image_dynamic make them preferred over the atlas method + if (external_subresource_desc.op == deferred_request_command::atlas_gather && + external_subresource_desc.sections_to_copy.size() == 1) + { + // Check if the subresource fills the target, if so, change the command to copy_image_static + const auto &cpy = external_subresource_desc.sections_to_copy.front(); + if (cpy.dst_x == 0 && cpy.dst_y == 0 && + cpy.dst_w == external_subresource_desc.width && cpy.dst_h == external_subresource_desc.height && + cpy.src_w == cpy.dst_w && cpy.src_h == cpy.dst_h) + { + external_subresource_desc.external_handle = cpy.src; + external_subresource_desc.x = cpy.src_x; + external_subresource_desc.y = cpy.src_y; + external_subresource_desc.op = deferred_request_command::copy_image_static; + } + } + } + u32 encoded_component_map() const override { if (image_handle) @@ -244,6 +265,11 @@ namespace rsx return 0; } + + bool validate() const + { + return (image_handle || external_subresource_desc.op != deferred_request_command::nop); + } }; @@ -881,6 +907,43 @@ namespace rsx } } + inline u32 get_compatible_depth_format(u32 gcm_format) const + { + switch (gcm_format) + { + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + case CELL_GCM_TEXTURE_A8R8G8B8: + return CELL_GCM_TEXTURE_DEPTH24_D8; + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_X16: + case CELL_GCM_TEXTURE_A4R4G4B4: + case CELL_GCM_TEXTURE_G8B8: + case CELL_GCM_TEXTURE_A1R5G5B5: + case CELL_GCM_TEXTURE_R5G5B5A1: + case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_R6G5B5: + return CELL_GCM_TEXTURE_DEPTH16; + } + + LOG_ERROR(RSX, "Unsupported depth conversion (0x%X)", gcm_format); + return gcm_format; + } + + inline bool is_compressed_gcm_format(u32 format) + { + switch (format) + { + default: + return false; + case CELL_GCM_TEXTURE_COMPRESSED_DXT1: + case CELL_GCM_TEXTURE_COMPRESSED_DXT23: + case CELL_GCM_TEXTURE_COMPRESSED_DXT45: + return true; + } + } + /** * Scaling helpers * - get_native_dimensions() returns w and h for the native texture given rsx dimensions @@ -973,8 +1036,8 @@ namespace rsx reset_frame_statistics(); } - - std::vector find_texture_from_range(const address_range &test_range, u32 context_mask=0xFF) + template + std::vector find_texture_from_range(const address_range &test_range, u16 required_pitch = 0, u32 context_mask=0xFF) { std::vector results; @@ -988,6 +1051,17 @@ namespace rsx if (!tex.is_dirty() && (context_mask & (u32)tex.get_context())) { + if constexpr (check_unlocked) + { + if (!tex.is_locked()) + continue; + } + + if (required_pitch && tex.get_rsx_pitch() != required_pitch) + { + continue; + } + results.push_back(&tex); } } @@ -1179,6 +1253,18 @@ namespace rsx #endif // TEXTURE_CACHE_DEBUG } + template + void commit_framebuffer_memory_region(commandbuffer_type& cmd, const address_range &rsx_range, Args&&... extras) + { + AUDIT(!g_cfg.video.write_color_buffers && !g_cfg.video.write_depth_buffer); + + if (!region_intersects_cache(rsx_range, true)) + return; + + std::lock_guard lock(m_cache_mutex); + invalidate_range_impl_base(cmd, rsx_range, invalidation_cause::write, std::forward(extras)...); + } + void set_memory_read_flags(const address_range &memory_range, memory_read_flags flags) { std::lock_guard lock(m_cache_mutex); @@ -1457,50 +1543,231 @@ namespace rsx m_temporary_subresource_cache.erase(base_address); } - template - std::vector gather_texture_slices_from_framebuffers(commandbuffer_type& cmd, - u32 texaddr, u16 slice_w, u16 slice_h, u16 pitch, u16 count, u8 bpp, surface_store_type& m_rtts) + template + std::vector gather_texture_slices(commandbuffer_type& cmd, + const surface_store_list_type& fbos, const std::vector& local, + u32 texaddr, u16 slice_w, u16 slice_h, u16 src_padding, u16 pitch, u16 count, u8 bpp, bool is_depth) { - std::vector surfaces; - u32 current_address = texaddr; - u32 slice_size = (pitch * slice_h); - bool unsafe = false; - - for (u16 slice = 0; slice < count; ++slice) + // Need to preserve sorting order + struct sort_helper { - auto overlapping = m_rtts.get_merged_texture_memory_region(current_address, slice_w, slice_h, pitch, bpp); - current_address += (pitch * slice_h); + u64 tag; // Timestamp + u32 list; // List source, 0 = fbo, 1 = local + u32 index; // Index in list + }; - if (overlapping.empty()) + std::vector surfaces; + std::vector sort_list; + const u16 src_slice_h = slice_h + src_padding; + + if (!fbos.empty() && !local.empty()) + { + // Generate sorting tree if both resources are available and overlapping + sort_list.reserve(fbos.size() + local.size()); + + for (u32 index = 0; index < fbos.size(); ++index) { - unsafe = true; - surfaces.push_back({}); + sort_list.push_back({ fbos[index].surface->last_use_tag, 0, index }); + } + + for (u32 index = 0; index < local.size(); ++index) + { + if (local[index]->get_rsx_pitch() != pitch || + local[index]->get_context() != rsx::texture_upload_context::blit_engine_dst) + continue; + + sort_list.push_back({ local[index]->last_write_tag, 1, index }); + } + + std::sort(sort_list.begin(), sort_list.end(), [](const auto &a, const auto &b) + { + return (a.tag < b.tag); + }); + } + + auto add_rtt_resource = [&](auto& section, u16 slice) + { + if (section.is_depth != is_depth) + { + // TODO + return; + } + + const auto slice_begin = (slice * src_slice_h); + const auto slice_end = (slice_begin + slice_h); + + const auto section_end = section.dst_y + section.height; + if (section.dst_y >= slice_end || section_end <= slice_begin) + { + // Belongs to a different slice + return; + } + + section.surface->read_barrier(cmd); + + // How much of this slice to read? + int rebased = int(section.dst_y) - slice_begin; + const auto src_x = section.src_x; + const auto dst_x = section.dst_x; + auto src_y = section.src_y; + auto dst_y = section.dst_y; + + if (rebased < 0) + { + const u16 delta = u16(-rebased); + src_y += delta; + dst_y += delta; + } + + verify(HERE), dst_y >= slice_begin; + dst_y = (dst_y - slice_begin); + + const auto scale_x = 1.f / get_internal_scaling_x(section.surface); + const auto scale_y = 1.f / get_internal_scaling_y(section.surface); + + const auto h = std::min(section_end, slice_end) - section.dst_y; + auto src_width = rsx::apply_resolution_scale(section.width, true); + auto src_height = rsx::apply_resolution_scale(h, true); + auto dst_width = src_width; + auto dst_height = src_height; + + if (scale_x > 1.f) + { + // Clipping + const auto limit_x = dst_x + dst_width; + const auto limit_y = dst_x + dst_height; + + if (limit_x > slice_w) + { + dst_width = (limit_x - dst_x); + src_width = dst_width / scale_x; + } + + if (limit_y > slice_h) + { + dst_height = (limit_y - dst_y); + src_height = dst_height / scale_y; + } + } + + surfaces.push_back + ({ + section.surface->get_surface(), + surface_transform::identity, + rsx::apply_resolution_scale(src_x, true), + rsx::apply_resolution_scale(src_y, true), + rsx::apply_resolution_scale(dst_x, true), + rsx::apply_resolution_scale(dst_y, true), + slice, + src_width, src_height, + dst_width, dst_height + }); + }; + + auto add_local_resource = [&](auto& section, u32 address, u16 slice, bool scaling = true) + { + if (section->is_depth_texture() != is_depth) + { + // TODO + return; + } + + // Intersect this resource with the original one + const auto section_bpp = get_format_block_size_in_bytes(section->get_gcm_format()); + const auto clipped = rsx::intersect_region(address, slice_w, slice_h, bpp, + section->get_section_base(), section->get_width(), section->get_height(), section_bpp, pitch); + + if (scaling) + { + // Since output is upscaled, also upscale on dst + surfaces.push_back + ({ + section->get_raw_texture(), + is_depth ? surface_transform::identity : surface_transform::argb_to_bgra, + (u16)std::get<0>(clipped).x, + (u16)std::get<0>(clipped).y, + rsx::apply_resolution_scale((u16)std::get<1>(clipped).x, true), + rsx::apply_resolution_scale((u16)std::get<1>(clipped).y, true), + slice, + (u16)std::get<2>(clipped).width, + (u16)std::get<2>(clipped).height, + rsx::apply_resolution_scale((u16)std::get<2>(clipped).width, true), + rsx::apply_resolution_scale((u16)std::get<2>(clipped).height, true), + }); } else { - for (auto §ion : overlapping) - { - section.surface->read_barrier(cmd); + const auto src_width = (u16)std::get<2>(clipped).width, dst_width = src_width; + const auto src_height = (u16)std::get<2>(clipped).height, dst_height = src_height; + surfaces.push_back + ({ + section->get_raw_texture(), + is_depth ? surface_transform::identity : surface_transform::argb_to_bgra, + (u16)std::get<0>(clipped).x, + (u16)std::get<0>(clipped).y, + (u16)std::get<1>(clipped).x, + (u16)std::get<1>(clipped).y, + 0, + src_width, + src_height, + dst_width, + dst_height, + }); + } + }; - const auto src_width = rsx::apply_resolution_scale(section.width, true), dst_width = src_width; - const auto src_height = rsx::apply_resolution_scale(section.height, true), dst_height = src_height; - surfaces.push_back - ({ - section.surface->get_surface(), - surface_transform::identity, - rsx::apply_resolution_scale(section.src_x, true), - rsx::apply_resolution_scale(section.src_y, true), - rsx::apply_resolution_scale(section.dst_x, true), - rsx::apply_resolution_scale(section.dst_y, true), - slice, - src_width, src_height, - dst_width, dst_height - }); + u32 current_address = texaddr; + u16 current_src_offset = 0; + u16 current_dst_offset = 0; + u32 slice_size = (pitch * src_slice_h); + + surfaces.reserve(count); + u16 found_slices = 0; + + for (u16 slice = 0; slice < count; ++slice) + { + auto num_surface = surfaces.size(); + + if (LIKELY(local.empty())) + { + for (auto §ion : fbos) + { + add_rtt_resource(section, slice); } } + else if (fbos.empty()) + { + for (auto §ion : local) + { + if (section->get_rsx_pitch() != pitch) + continue; + + add_local_resource(section, current_address, slice, false); + } + } + else + { + for (const auto &e : sort_list) + { + if (e.list == 0) + { + add_rtt_resource(fbos[e.index], slice); + } + else + { + add_local_resource(local[e.index], current_address, slice); + } + } + } + + current_address += slice_size; + if (surfaces.size() != num_surface) + { + found_slices++; + } } - if (unsafe) + if (found_slices < count) { //TODO: Gather remaining sides from the texture cache or upload from cpu (too slow?) LOG_ERROR(RSX, "Could not gather all required slices for cubemap/3d generation"); @@ -1509,11 +1776,16 @@ namespace rsx return surfaces; } - template - sampled_image_descriptor process_framebuffer_resource(commandbuffer_type& cmd, render_target_type texptr, u32 texaddr, u32 gcm_format, surface_store_type& m_rtts, - u16 tex_width, u16 tex_height, u16 tex_depth, u16 tex_pitch, rsx::texture_dimension_extended extended_dimension, bool is_depth, bool is_bound, u32 encoded_remap, const texture_channel_remap_t& decoded_remap) + template + bool check_framebuffer_resource(commandbuffer_type& cmd, render_target_type texptr, + u16 tex_width, u16 tex_height, u16 tex_depth, u16 tex_pitch, + rsx::texture_dimension_extended extended_dimension) { - const u32 format = gcm_format & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); + if (texptr->get_rsx_pitch() != tex_pitch) + { + return false; + } + const auto surface_width = texptr->get_surface_width(); const auto surface_height = texptr->get_surface_height(); @@ -1521,254 +1793,159 @@ namespace rsx u32 internal_height = tex_height; get_native_dimensions(internal_width, internal_height, texptr); + switch (extended_dimension) + { + case rsx::texture_dimension_extended::texture_dimension_1d: + return (surface_width >= internal_width); + case rsx::texture_dimension_extended::texture_dimension_2d: + return (surface_width >= internal_width && surface_height >= internal_height); + case rsx::texture_dimension_extended::texture_dimension_3d: + return (surface_width >= internal_width && surface_height >= (internal_height * tex_depth)); + case rsx::texture_dimension_extended::texture_dimension_cubemap: + return (surface_width == internal_height && surface_width >= internal_width && surface_height >= (internal_height * 6)); + } + + return false; + } + + template + sampled_image_descriptor process_framebuffer_resource_fast(commandbuffer_type& cmd, render_target_type texptr, + u32 texaddr, u32 gcm_format, u16 tex_width, u16 tex_height, u16 tex_depth, + rsx::texture_dimension_extended extended_dimension, u32 encoded_remap, const texture_channel_remap_t& decoded_remap, + bool assume_bound = true) + { texptr->read_barrier(cmd); - if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d && - extended_dimension != rsx::texture_dimension_extended::texture_dimension_1d) + const u32 format = gcm_format & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); + const bool unnormalized = (gcm_format & CELL_GCM_TEXTURE_UN) != 0; + const bool is_depth = texptr->is_depth_surface(); + f32 scale_x = (unnormalized) ? (1.f / tex_width) : 1.f; + f32 scale_y = (unnormalized) ? (1.f / tex_height) : 1.f; + + const auto surface_width = texptr->get_surface_width(); + const auto surface_height = texptr->get_surface_height(); + + u32 internal_width = tex_width; + u32 internal_height = tex_height; + get_native_dimensions(internal_width, internal_height, texptr); + + if (LIKELY(extended_dimension == rsx::texture_dimension_extended::texture_dimension_2d || + extended_dimension == rsx::texture_dimension_extended::texture_dimension_1d)) { - if (extended_dimension == rsx::texture_dimension_extended::texture_dimension_cubemap) + if (extended_dimension == rsx::texture_dimension_extended::texture_dimension_1d) { - const auto scaled_size = rsx::apply_resolution_scale(internal_width, true); - if (surface_height == (surface_width * 6)) - { - return{ texptr->get_surface(), deferred_request_command::cubemap_unwrap, texaddr, format, 0, 0, - scaled_size, scaled_size, 1, - texture_upload_context::framebuffer_storage, is_depth, 1.f, 1.f, - rsx::texture_dimension_extended::texture_dimension_cubemap, decoded_remap }; - } - - sampled_image_descriptor desc = { texptr->get_surface(), deferred_request_command::cubemap_gather, texaddr, format, 0, 0, - scaled_size, scaled_size, 1, - texture_upload_context::framebuffer_storage, is_depth, 1.f, 1.f, - rsx::texture_dimension_extended::texture_dimension_cubemap, decoded_remap }; - - auto bpp = get_format_block_size_in_bytes(format); - desc.external_subresource_desc.sections_to_copy = std::move(gather_texture_slices_from_framebuffers(cmd, texaddr, tex_width, tex_height, tex_pitch, 6, bpp, m_rtts)); - return desc; + internal_height = 1; + scale_y = 0.f; } - else if (extended_dimension == rsx::texture_dimension_extended::texture_dimension_3d && tex_depth > 1) - { - auto minimum_height = (tex_height * tex_depth); - auto scaled_w = rsx::apply_resolution_scale(internal_width, true); - auto scaled_h = rsx::apply_resolution_scale(internal_height, true); - if (surface_height >= minimum_height && surface_width >= tex_width) - { - return{ texptr->get_surface(), deferred_request_command::_3d_unwrap, texaddr, format, 0, 0, - scaled_w, scaled_h, tex_depth, - texture_upload_context::framebuffer_storage, is_depth, 1.f, 1.f, - rsx::texture_dimension_extended::texture_dimension_3d, decoded_remap }; - } - sampled_image_descriptor desc = { texptr->get_surface(), deferred_request_command::_3d_gather, texaddr, format, 0, 0, + if ((assume_bound && g_cfg.video.strict_rendering_mode) || + internal_width < surface_width || + internal_height < surface_height || + !render_target_format_is_compatible(texptr, format)) + { + const auto scaled_w = rsx::apply_resolution_scale(internal_width, true); + const auto scaled_h = rsx::apply_resolution_scale(internal_height, true); + + auto command = assume_bound ? deferred_request_command::copy_image_dynamic : deferred_request_command::copy_image_static; + return { texptr->get_surface(), command, texaddr, format, 0, 0, scaled_w, scaled_h, 1, + texture_upload_context::framebuffer_storage, is_depth, scale_x, scale_y, + extended_dimension, decoded_remap }; + } + + if (assume_bound) + { + insert_texture_barrier(cmd, texptr); + } + + return{ texptr->get_view(encoded_remap, decoded_remap), texture_upload_context::framebuffer_storage, + is_depth, scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d }; + } + + const auto scaled_w = rsx::apply_resolution_scale(internal_width, true); + const auto scaled_h = rsx::apply_resolution_scale(internal_height, true); + + if (extended_dimension == rsx::texture_dimension_extended::texture_dimension_3d) + { + return{ texptr->get_surface(), deferred_request_command::_3d_unwrap, texaddr, format, 0, 0, scaled_w, scaled_h, tex_depth, texture_upload_context::framebuffer_storage, is_depth, 1.f, 1.f, rsx::texture_dimension_extended::texture_dimension_3d, decoded_remap }; + } - const auto bpp = get_format_block_size_in_bytes(format); - desc.external_subresource_desc.sections_to_copy = std::move(gather_texture_slices_from_framebuffers(cmd, texaddr, tex_width, tex_height, tex_pitch, tex_depth, bpp, m_rtts)); - return desc; + verify(HERE), extended_dimension == rsx::texture_dimension_extended::texture_dimension_cubemap; + return{ texptr->get_surface(), deferred_request_command::cubemap_unwrap, texaddr, format, 0, 0, + scaled_w, scaled_h, 1, + texture_upload_context::framebuffer_storage, is_depth, 1.f, 1.f, + rsx::texture_dimension_extended::texture_dimension_cubemap, decoded_remap }; + } + + template + sampled_image_descriptor merge_cache_resources(commandbuffer_type& cmd, const surface_store_list_type& fbos, const std::vector& local, + u32 texaddr, u32 gcm_format, u16 tex_width, u16 tex_height, u16 tex_depth, u16 tex_pitch, u16 slice_h, + rsx::texture_dimension_extended extended_dimension, u32 encoded_remap, const texture_channel_remap_t& decoded_remap, int select_hint = -1) + { + u32 format = gcm_format & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); + bool is_depth = false; + + const auto bpp = get_format_block_size_in_bytes(format); + + if (LIKELY(!fbos.empty())) + { + verify(HERE), (select_hint & 0x1) == select_hint; + if (select_hint == 0 && fbos.back().is_depth) + { + is_depth = true; + format = get_compatible_depth_format(format); } } + // If this method was called, there is no easy solution, likely means atlas gather is needed + auto scaled_w = rsx::apply_resolution_scale(tex_width, true); + auto scaled_h = rsx::apply_resolution_scale(tex_height, true); + + if (extended_dimension == rsx::texture_dimension_extended::texture_dimension_cubemap) + { + sampled_image_descriptor desc = { nullptr, deferred_request_command::cubemap_gather, texaddr, format, 0, 0, + scaled_w, scaled_w, 1, + texture_upload_context::framebuffer_storage, is_depth, 1.f, 1.f, + rsx::texture_dimension_extended::texture_dimension_cubemap, decoded_remap }; + + u16 padding = u16(slice_h - tex_width); + desc.external_subresource_desc.sections_to_copy = std::move(gather_texture_slices(cmd, fbos, local, texaddr, tex_width, tex_height, padding, tex_pitch, 6, bpp, is_depth)); + return desc; + } + else if (extended_dimension == rsx::texture_dimension_extended::texture_dimension_3d && tex_depth > 1) + { + sampled_image_descriptor desc = { nullptr, deferred_request_command::_3d_gather, texaddr, format, 0, 0, + scaled_w, scaled_h, tex_depth, + texture_upload_context::framebuffer_storage, is_depth, 1.f, 1.f, + rsx::texture_dimension_extended::texture_dimension_3d, decoded_remap }; + + u16 padding = u16(slice_h - tex_height); + desc.external_subresource_desc.sections_to_copy = std::move(gather_texture_slices(cmd, fbos, local, texaddr, tex_width, tex_height, padding, tex_pitch, 6, bpp, is_depth)); + return desc; + } + const bool unnormalized = (gcm_format & CELL_GCM_TEXTURE_UN) != 0; f32 scale_x = (unnormalized)? (1.f / tex_width) : 1.f; f32 scale_y = (unnormalized)? (1.f / tex_height) : 1.f; if (extended_dimension == rsx::texture_dimension_extended::texture_dimension_1d) { - internal_height = 1; + verify(HERE), tex_height == 1; scale_y = 0.f; } - auto bpp = get_format_block_size_in_bytes(format); - auto overlapping = m_rtts.get_merged_texture_memory_region(texaddr, tex_width, tex_height, tex_pitch, bpp); - bool requires_merging = false; + const auto w = fbos.empty()? tex_width : rsx::apply_resolution_scale(tex_width, true); + const auto h = fbos.empty()? tex_height : rsx::apply_resolution_scale(tex_height, true); - verify(HERE), !overlapping.empty(); - if (LIKELY(overlapping.back().surface == texptr)) - { - // The texture 'proposed' by the previous lookup is the newest one - // If it occupies the entire requested region, just use it as-is - requires_merging = (internal_width > surface_width || internal_height > surface_height); - } - else - { - verify(HERE), overlapping.size() > 1; - requires_merging = true; - } + sampled_image_descriptor result = { nullptr, deferred_request_command::atlas_gather, + texaddr, format, 0, 0, w, h, 1, texture_upload_context::framebuffer_storage, is_depth, + scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d, decoded_remap }; - if (requires_merging) - { - // TODO: For now we're only testing against blit engine dst, should add other types as wel - const auto range = rsx::address_range::start_length(texaddr, tex_pitch * tex_height); - auto local_resources = find_texture_from_range(range, rsx::texture_upload_context::blit_engine_dst); - - if (local_resources.empty() && overlapping.size() == 1) - { - // TODO: Fall back to full upload and merge - } - else - { - const auto w = rsx::apply_resolution_scale(internal_width, true); - const auto h = rsx::apply_resolution_scale(internal_height, true); - - sampled_image_descriptor result = { texptr->get_surface(), deferred_request_command::atlas_gather, - texaddr, format, 0, 0, w, h, 1, texture_upload_context::framebuffer_storage, is_depth, - scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d, decoded_remap }; - - result.external_subresource_desc.sections_to_copy.reserve(overlapping.size() + local_resources.size()); - - auto add_rtt_resource = [&](auto& section) - { - section.surface->read_barrier(cmd); - - const auto src_width = rsx::apply_resolution_scale(section.width, true), dst_width = src_width; - const auto src_height = rsx::apply_resolution_scale(section.height, true), dst_height = src_height; - result.external_subresource_desc.sections_to_copy.push_back - ({ - section.surface->get_surface(), - surface_transform::identity, - rsx::apply_resolution_scale(section.src_x, true), - rsx::apply_resolution_scale(section.src_y, true), - rsx::apply_resolution_scale(section.dst_x, true), - rsx::apply_resolution_scale(section.dst_y, true), - 0, - src_width, src_height, - dst_width, dst_height - }); - }; - - auto add_local_resource = [&](auto& section) - { - // Intersect this resource with the original one - const auto section_bpp = get_format_block_size_in_bytes(section->get_gcm_format()); - const auto clipped = rsx::intersect_region(texaddr, tex_width, tex_height, bpp, - section->get_section_base(), section->get_width(), section->get_height(), section_bpp, tex_pitch); - - // Since output is upscaled, also upscale on dst - result.external_subresource_desc.sections_to_copy.push_back - ({ - section->get_raw_texture(), - is_depth ? surface_transform::identity : surface_transform::argb_to_bgra, - (u16)std::get<0>(clipped).x, - (u16)std::get<0>(clipped).y, - rsx::apply_resolution_scale((u16)std::get<1>(clipped).x, true), - rsx::apply_resolution_scale((u16)std::get<1>(clipped).y, true), - 0, - (u16)std::get<2>(clipped).width, - (u16)std::get<2>(clipped).height, - rsx::apply_resolution_scale((u16)std::get<2>(clipped).width, true), - rsx::apply_resolution_scale((u16)std::get<2>(clipped).height, true), - }); - }; - - if (LIKELY(local_resources.empty())) - { - for (auto §ion : overlapping) - { - add_rtt_resource(section); - } - } - else - { - // Need to preserve sorting order - struct sort_helper - { - u64 tag; // Timestamp - u32 list; // List source, 0 = fbo, 1 = local - u32 index; // Index in list - }; - - std::vector sort_list; - sort_list.reserve(overlapping.size() + local_resources.size()); - - for (u32 index = 0; index < overlapping.size(); ++index) - { - sort_list.push_back({ overlapping[index].surface->last_use_tag, 0, index }); - } - - for (u32 index = 0; index < local_resources.size(); ++index) - { - if (local_resources[index]->get_rsx_pitch() != tex_pitch) - continue; - - // TODO: Typeless transfers - if (local_resources[index]->is_depth_texture() != is_depth) - continue; - - sort_list.push_back({ local_resources[index]->last_write_tag, 1, index }); - } - - std::sort(sort_list.begin(), sort_list.end(), [](const auto &a, const auto &b) - { - return (a.tag < b.tag); - }); - - for (const auto &e : sort_list) - { - if (e.list == 0) - { - add_rtt_resource(overlapping[e.index]); - } - else - { - add_local_resource(local_resources[e.index]); - } - } - } - - return result; - } - } - - bool requires_processing = surface_width > internal_width || surface_height > internal_height; - bool update_subresource_cache = false; - if (!requires_processing) - { - //NOTE: The scale also accounts for sampling outside the RTT region, e.g render to one quadrant but send whole texture for sampling - //In these cases, internal dimensions will exceed available surface dimensions. Account for the missing information using scaling (missing data will result in border color) - //TODO: Proper gather and stitching without performance loss - if (internal_width > surface_width) - scale_x *= ((f32)internal_width / surface_width); - - if (internal_height > surface_height) - scale_y *= ((f32)internal_height / surface_height); - - if (is_bound) - { - if (g_cfg.video.strict_rendering_mode) - { - LOG_TRACE(RSX, "Attempting to sample a currently bound %s target @ 0x%x", is_depth? "depth" : "color", texaddr); - requires_processing = true; - update_subresource_cache = true; - } - else - { - // Issue a texture barrier to ensure previous writes are visible - insert_texture_barrier(cmd, texptr); - } - } - } - - if (!requires_processing) - { - //Check if we need to do anything about the formats - requires_processing = !render_target_format_is_compatible(texptr, format); - } - - if (requires_processing) - { - const auto w = rsx::apply_resolution_scale(std::min(internal_width, surface_width), true); - const auto h = rsx::apply_resolution_scale(std::min(internal_height, surface_height), true); - - auto command = update_subresource_cache ? deferred_request_command::copy_image_dynamic : deferred_request_command::copy_image_static; - return { texptr->get_surface(), command, texaddr, format, 0, 0, w, h, 1, - texture_upload_context::framebuffer_storage, is_depth, scale_x, scale_y, - rsx::texture_dimension_extended::texture_dimension_2d, decoded_remap }; - } - - return{ texptr->get_view(encoded_remap, decoded_remap), texture_upload_context::framebuffer_storage, - is_depth, scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d }; + result.external_subresource_desc.sections_to_copy = gather_texture_slices(cmd, fbos, local, texaddr, tex_width, tex_height, 0, tex_pitch, 1, bpp, is_depth); + result.simplify(); + return result; } template @@ -1778,13 +1955,12 @@ namespace rsx const u32 tex_size = (u32)get_texture_size(tex); const address_range tex_range = address_range::start_length(texaddr, tex_size); const u32 format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - const bool is_compressed_format = (format == CELL_GCM_TEXTURE_COMPRESSED_DXT1 || format == CELL_GCM_TEXTURE_COMPRESSED_DXT23 || format == CELL_GCM_TEXTURE_COMPRESSED_DXT45); + const bool is_compressed_format = is_compressed_gcm_format(format); const auto extended_dimension = tex.get_extended_texture_dimension(); const u16 tex_width = tex.width(); u16 tex_height = tex.height(); - u16 tex_pitch = (u16)tex.pitch(); - if (tex_pitch == 0) tex_pitch = get_format_packed_pitch(format, tex_width); + u16 tex_pitch = (tex.format() & CELL_GCM_TEXTURE_LN)? (u16)tex.pitch() : get_format_packed_pitch(format, tex_width); u16 depth; switch (extended_dimension) @@ -1803,42 +1979,13 @@ namespace rsx break; } - if (!is_compressed_format) + if (UNLIKELY(m_rtts.address_is_bound(texaddr))) { - // Check for sampleable rtts from previous render passes - // TODO: When framebuffer Y compression is properly handled, this section can be removed. A more accurate framebuffer storage check exists below this block - if (auto texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr); - texptr && texptr->get_rsx_pitch() == tex_pitch) + if (auto texptr = m_rtts.get_surface_at(texaddr); + check_framebuffer_resource(cmd, texptr, tex_width, tex_height, depth, tex_pitch, extended_dimension)) { - if (const bool is_active = m_rtts.address_is_bound(texaddr, false); - is_active || texptr->test()) - { - return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, - tex_width, tex_height, depth, tex_pitch, extended_dimension, false, is_active, - tex.remap(), tex.decoded_remap()); - } - else - { - m_rtts.invalidate_surface_address(texaddr, false); - invalidate_address(cmd, texaddr, invalidation_cause::read, std::forward(extras)...); - } - } - - if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr); - texptr && texptr->get_rsx_pitch() == tex_pitch) - { - if (const bool is_active = m_rtts.address_is_bound(texaddr, true); - is_active || texptr->test()) - { - return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, - tex_width, tex_height, depth, tex_pitch, extended_dimension, true, is_active, - tex.remap(), tex.decoded_remap()); - } - else - { - m_rtts.invalidate_surface_address(texaddr, true); - invalidate_address(cmd, texaddr, invalidation_cause::read, std::forward(extras)...); - } + return process_framebuffer_resource_fast(cmd, texptr, texaddr, tex.format(), + tex_width, tex_height, depth, extended_dimension, tex.remap(), tex.decoded_remap()); } } @@ -1849,149 +1996,109 @@ namespace rsx if (extended_dimension == rsx::texture_dimension_extended::texture_dimension_1d) scale_y = 0.f; - if (!is_compressed_format) + reader_lock lock(m_cache_mutex); + + // Check shader_read storage. In a given scene, reads from local memory far outnumber reads from the surface cache + const u32 lookup_mask = (is_compressed_format)? rsx::texture_upload_context::shader_read : + rsx::texture_upload_context::shader_read | rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::blit_engine_src; + const auto overlapping_locals = find_texture_from_range(tex_range, tex_pitch, lookup_mask); + + for (auto& cached_texture : overlapping_locals) { - // Check if we are re-sampling a subresource of an RTV/DSV texture, bound or otherwise - - const auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, tex_width, tex_height, tex_pitch); - if (rsc.surface) + if (cached_texture->matches(texaddr, tex_width, tex_height, depth, 0)) { - if (!rsc.surface->test() && !m_rtts.address_is_bound(rsc.base_address, rsc.is_depth_surface)) - { - m_rtts.invalidate_surface_address(rsc.base_address, rsc.is_depth_surface); - invalidate_address(cmd, rsc.base_address, invalidation_cause::read, std::forward(extras)...); - } - else if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d && - extended_dimension != rsx::texture_dimension_extended::texture_dimension_1d) - { - LOG_ERROR(RSX, "Sampling of RTT region as non-2D texture! addr=0x%x, Type=%d, dims=%dx%d", - texaddr, (u8)tex.get_extended_texture_dimension(), tex.width(), tex.height()); - } - else - { - u16 internal_width = tex_width; - u16 internal_height = tex_height; - - get_native_dimensions(internal_width, internal_height, rsc.surface); - if (!rsc.x && !rsc.y && rsc.w == internal_width && rsc.h == internal_height) - { - //Full sized hit from the surface cache. This should have been already found before getting here - fmt::throw_exception("Unreachable" HERE); - } - - internal_width = rsx::apply_resolution_scale(internal_width, true); - internal_height = (extended_dimension == rsx::texture_dimension_extended::texture_dimension_1d)? 1: rsx::apply_resolution_scale(internal_height, true); - - return{ rsc.surface->get_surface(), deferred_request_command::copy_image_static, rsc.base_address, format, - rsx::apply_resolution_scale(rsc.x, false), rsx::apply_resolution_scale(rsc.y, false), - internal_width, internal_height, 1, texture_upload_context::framebuffer_storage, rsc.is_depth_surface, scale_x, scale_y, - rsx::texture_dimension_extended::texture_dimension_2d, tex.decoded_remap() }; - } + return{ cached_texture->get_view(tex.remap(), tex.decoded_remap()), cached_texture->get_context(), cached_texture->is_depth_texture(), scale_x, scale_y, cached_texture->get_image_type() }; } } + if (!is_compressed_format) { - //Search in cache and upload/bind - reader_lock lock(m_cache_mutex); - - auto cached_texture = find_texture_from_dimensions(texaddr, tex_width, tex_height, depth); - if (cached_texture) + // Next, attempt to merge blit engine and surface store + // Blit sources contain info from any shader-read stuff in range + // NOTE: Compressed formats require a reupload, facilitated by blit synchronization and/or WCB and are not handled here + u32 required_surface_height, slice_h; + switch (extended_dimension) { - // TODO: Handle invalidated framebuffer textures better. This is awful - while (cached_texture->get_context() == rsx::texture_upload_context::framebuffer_storage) - { - if (!cached_texture->is_locked()) - { - lock.upgrade(); - cached_texture->set_dirty(true); - - // Check again for another match if possible - cached_texture = find_texture_from_dimensions(texaddr, tex_width, tex_height, depth); - if (!cached_texture) break; - } - else - { - // Let it play out (will be unlocked and flushed before a shader_read is uploaded) - break; - } - } - - if (cached_texture && cached_texture->get_context() != rsx::texture_upload_context::framebuffer_storage) - { - if (cached_texture->get_image_type() == rsx::texture_dimension_extended::texture_dimension_1d) - scale_y = 0.f; - - return{ cached_texture->get_view(tex.remap(), tex.decoded_remap()), cached_texture->get_context(), cached_texture->is_depth_texture(), scale_x, scale_y, cached_texture->get_image_type() }; - } - } - - if (is_hw_blit_engine_compatible(format)) - { - //Find based on range instead - auto overlapping_surfaces = find_texture_from_range(tex_range, rsx::texture_upload_context::blit_engine_dst); - if (!overlapping_surfaces.empty()) - { - for (const auto &surface : overlapping_surfaces) - { - if (!surface->overlaps(tex_range, rsx::section_bounds::confirmed_range)) - continue; - - if (surface->get_width() >= tex_width && surface->get_height() >= tex_height) - { - u16 offset_x = 0, offset_y = 0; - if (const u32 address_offset = texaddr - surface->get_section_base()) - { - const auto bpp = get_format_block_size_in_bytes(format); - offset_y = address_offset / tex_pitch; - offset_x = (address_offset % tex_pitch) / bpp; - } - - if ((offset_x + tex_width) <= surface->get_width() && - (offset_y + tex_height) <= surface->get_height()) - { - if (extended_dimension != rsx::texture_dimension_extended::texture_dimension_2d && - extended_dimension != rsx::texture_dimension_extended::texture_dimension_1d) - { - LOG_ERROR(RSX, "Texture resides in blit engine memory, but requested type is not 2D (%d)", (u32)extended_dimension); - break; - } - - auto src_image = surface->get_raw_texture(); - return{ src_image, deferred_request_command::copy_image_static, surface->get_section_base(), format, offset_x, offset_y, tex_width, tex_height, 1, - texture_upload_context::blit_engine_dst, surface->is_depth_texture(), scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d, - rsx::default_remap_vector }; - } - } - } - } - } - - //Do direct upload from CPU as the last resort - const bool is_swizzled = !(tex.format() & CELL_GCM_TEXTURE_LN); - auto subresources_layout = get_subresources_layout(tex); - - bool is_depth_format = false; - switch (format) - { - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - is_depth_format = true; + case rsx::texture_dimension_extended::texture_dimension_3d: + case rsx::texture_dimension_extended::texture_dimension_cubemap: + // Account for padding between mipmaps for all layers + required_surface_height = tex_range.length() / tex_pitch; + slice_h = required_surface_height / depth; + break; + default: + // Ignore mipmaps and search for LOD0 + required_surface_height = slice_h = tex_height; break; } - // Upgrade lock - lock.upgrade(); + auto bpp = get_format_block_size_in_bytes(format); + const auto overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, texaddr, tex_width, required_surface_height, tex_pitch, bpp); - //Invalidate - invalidate_range_impl_base(cmd, tex_range, invalidation_cause::read, std::forward(extras)...); + if (!overlapping_fbos.empty() || !overlapping_locals.empty()) + { + int _pool = -1; + if (LIKELY(overlapping_locals.empty())) + { + _pool = 0; + } + else if (overlapping_fbos.empty()) + { + _pool = 1; + } + else + { + _pool = (overlapping_locals.back()->last_write_tag < overlapping_fbos.back().surface->last_use_tag) ? 0 : 1; + } - //NOTE: SRGB correction is to be handled in the fragment shader; upload as linear RGB - return{ upload_image_from_cpu(cmd, tex_range, tex_width, tex_height, depth, tex.get_exact_mipmap_count(), tex_pitch, format, - texture_upload_context::shader_read, subresources_layout, extended_dimension, is_swizzled)->get_view(tex.remap(), tex.decoded_remap()), - texture_upload_context::shader_read, is_depth_format, scale_x, scale_y, extended_dimension }; + if (_pool == 0) + { + // Surface cache data is newer, check if this thing fits our search parameters + const auto& last = overlapping_fbos.back(); + if (last.src_x == 0 && last.src_y == 0) + { + u16 internal_width = tex_width; + u16 internal_height = required_surface_height; + get_native_dimensions(internal_width, internal_height, last.surface); + + if (last.width == internal_width && last.height == internal_height) + { + verify(HERE), last.surface->test(); + return process_framebuffer_resource_fast(cmd, last.surface, texaddr, tex.format(), tex_width, tex_height, depth, + extended_dimension, tex.remap(), tex.decoded_remap(), false); + } + } + } + + return merge_cache_resources(cmd, overlapping_fbos, overlapping_locals, + texaddr, tex.format(), tex_width, tex_height, depth, tex_pitch, slice_h, + extended_dimension, tex.remap(), tex.decoded_remap(), _pool); + } } + + // Do direct upload from CPU as the last resort + const bool is_swizzled = !(tex.format() & CELL_GCM_TEXTURE_LN); + auto subresources_layout = get_subresources_layout(tex); + + bool is_depth_format = false; + switch (format) + { + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + is_depth_format = true; + break; + } + + lock.upgrade(); + + //Invalidate + invalidate_range_impl_base(cmd, tex_range, invalidation_cause::read, std::forward(extras)...); + + //NOTE: SRGB correction is to be handled in the fragment shader; upload as linear RGB + return{ upload_image_from_cpu(cmd, tex_range, tex_width, tex_height, depth, tex.get_exact_mipmap_count(), tex_pitch, format, + texture_upload_context::shader_read, subresources_layout, extended_dimension, is_swizzled)->get_view(tex.remap(), tex.decoded_remap()), + texture_upload_context::shader_read, is_depth_format, scale_x, scale_y, extended_dimension }; } template @@ -2046,14 +2153,14 @@ namespace rsx src_is_render_target = false; } - if (src_is_render_target && !src_subres.surface->test() && !m_rtts.address_is_bound(src_subres.base_address, src_subres.is_depth_surface)) + if (src_is_render_target && !src_subres.surface->test() && !m_rtts.address_is_bound(src_subres.base_address)) { m_rtts.invalidate_surface_address(src_subres.base_address, src_subres.is_depth_surface); invalidate_address(cmd, src_subres.base_address, invalidation_cause::read, std::forward(extras)...); src_is_render_target = false; } - if (dst_is_render_target && !dst_subres.surface->test() && !m_rtts.address_is_bound(dst_subres.base_address, dst_subres.is_depth_surface)) + if (dst_is_render_target && !dst_subres.surface->test() && !m_rtts.address_is_bound(dst_subres.base_address)) { m_rtts.invalidate_surface_address(dst_subres.base_address, dst_subres.is_depth_surface); invalidate_address(cmd, dst_subres.base_address, invalidation_cause::read, std::forward(extras)...); @@ -2157,13 +2264,10 @@ namespace rsx if (!dst_is_render_target) { // Check for any available region that will fit this one - auto overlapping_surfaces = find_texture_from_range(address_range::start_length(dst_address, dst.pitch * dst.clip_height), rsx::texture_upload_context::blit_engine_dst); + auto overlapping_surfaces = find_texture_from_range(address_range::start_length(dst_address, dst.pitch * dst.clip_height), dst.pitch, rsx::texture_upload_context::blit_engine_dst); for (const auto &surface : overlapping_surfaces) { - if (surface->get_rsx_pitch() != dst.pitch) - continue; - const auto old_dst_area = dst_area; if (const u32 address_offset = dst_address - surface->get_section_base()) { @@ -2214,22 +2318,15 @@ namespace rsx max_dst_height = dst_subres.surface->get_surface_height(); } - //Create source texture if does not exist + // Create source texture if does not exist if (!src_is_render_target) { - auto overlapping_surfaces = find_texture_from_range(address_range::start_length(src_address, src.pitch * src.height)); + const u32 lookup_mask = rsx::texture_upload_context::blit_engine_src | rsx::texture_upload_context::blit_engine_dst; + auto overlapping_surfaces = find_texture_from_range(address_range::start_length(src_address, src.pitch * src.height), src.pitch, lookup_mask); auto old_src_area = src_area; for (const auto &surface : overlapping_surfaces) { - //look for any that will fit, unless its a shader read surface or framebuffer_storage - if (surface->get_context() == rsx::texture_upload_context::shader_read || - surface->get_context() == rsx::texture_upload_context::framebuffer_storage) - continue; - - if (surface->get_rsx_pitch() != src.pitch) - continue; - if (const u32 address_offset = src_address - surface->get_section_base()) { const u16 bpp = src_is_argb8 ? 4 : 2; diff --git a/rpcs3/Emu/RSX/Common/texture_cache_utils.h b/rpcs3/Emu/RSX/Common/texture_cache_utils.h index 76ca912b06..3393b23c94 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache_utils.h +++ b/rpcs3/Emu/RSX/Common/texture_cache_utils.h @@ -99,6 +99,17 @@ namespace rsx fmt::throw_exception("Unreachable " HERE); } + constexpr invalidation_cause defer() const + { + AUDIT(!deferred_flush()); + if (cause == read) + return deferred_read; + else if (cause == write) + return deferred_write; + else + fmt::throw_exception("Unreachable " HERE); + } + constexpr invalidation_cause() : cause(invalid) {} constexpr invalidation_cause(enum_type _cause) : cause(_cause) {} operator enum_type&() { return cause; } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index c767629411..bd99ce0bcc 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -330,12 +330,12 @@ void GLGSRender::end() _SelectTexture(GL_FRAGMENT_TEXTURES_START + i); gl::texture_view* view = nullptr; - if (rsx::method_registers.fragment_textures[i].enabled()) - { - auto sampler_state = static_cast(fs_sampler_state[i].get()); - view = sampler_state->image_handle; + auto sampler_state = static_cast(fs_sampler_state[i].get()); - if (!view && sampler_state->external_subresource_desc.external_handle) + if (rsx::method_registers.fragment_textures[i].enabled() && + sampler_state->validate()) + { + if (view = sampler_state->image_handle; UNLIKELY(!view)) { view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc); } @@ -375,13 +375,17 @@ void GLGSRender::end() auto sampler_state = static_cast(vs_sampler_state[i].get()); _SelectTexture(GL_VERTEX_TEXTURES_START + i); - if (sampler_state->image_handle) + if (rsx::method_registers.vertex_textures[i].enabled() && + sampler_state->validate()) { - sampler_state->image_handle->bind(); - } - else if (sampler_state->external_subresource_desc.external_handle) - { - m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc)->bind(); + if (LIKELY(sampler_state->image_handle)) + { + sampler_state->image_handle->bind(); + } + else + { + m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc)->bind(); + } } else { @@ -1640,7 +1644,8 @@ void GLGSRender::flip(int buffer) } else { - const auto overlap_info = m_rtts.get_merged_texture_memory_region(absolute_address, buffer_width, buffer_height, buffer_pitch, 4); + gl::command_context cmd = { gl_state }; + const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch, 4); verify(HERE), !overlap_info.empty(); if (overlap_info.back().surface == render_target_texture) @@ -1825,8 +1830,8 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing) is_writing ? (can_flush ? rsx::invalidation_cause::write : rsx::invalidation_cause::deferred_write) : (can_flush ? rsx::invalidation_cause::read : rsx::invalidation_cause::deferred_read); - gl::command_context null_cmd; - auto result = m_gl_texture_cache.invalidate_address(null_cmd, address, cause); + auto cmd = can_flush ? gl::command_context{ gl_state } : gl::command_context{}; + auto result = m_gl_texture_cache.invalidate_address(cmd, address, cause); if (!result.violation_handled) return false; diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index d80e168662..48ea2a03a4 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -375,30 +375,37 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk m_gl_texture_cache.clear_ro_tex_invalidate_intr(); - //Mark buffer regions as NO_ACCESS on Cell visible side - if (g_cfg.video.write_color_buffers) + const auto color_format = rsx::internals::surface_color_format_to_gl(layout.color_format); + for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) { - auto color_format = rsx::internals::surface_color_format_to_gl(layout.color_format); + if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue; - for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) + const auto surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]); + if (g_cfg.video.write_color_buffers) { - if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue; - - const utils::address_range surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]); + // Mark buffer regions as NO_ACCESS on Cell-visible side m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch, std::tuple<>{}, color_format.format, color_format.type, color_format.swap_bytes); } + else + { + m_gl_texture_cache.commit_framebuffer_memory_region(cmd, surface_range); + } } - if (g_cfg.video.write_depth_buffer) + if (m_depth_surface_info.address && m_depth_surface_info.pitch) { - if (m_depth_surface_info.address && m_depth_surface_info.pitch) + const auto surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]); + if (g_cfg.video.write_depth_buffer) { const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format); - const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]); m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, std::tuple<>{}, depth_format_gl.format, depth_format_gl.type, true); } + else + { + m_gl_texture_cache.commit_framebuffer_memory_region(cmd, surface_range); + } } if (m_gl_texture_cache.get_ro_tex_invalidate_intr()) @@ -585,6 +592,8 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init { gl::g_hw_blitter->fast_clear_image(cmd, this, {}); } + + on_write(); } return; diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index 18ef60eb8c..6858d29f96 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -107,6 +107,19 @@ namespace gl return surface_height; } + bool is_depth_surface() const override + { + switch (get_internal_format()) + { + case gl::texture::internal_format::depth16: + case gl::texture::internal_format::depth24_stencil8: + case gl::texture::internal_format::depth32f_stencil8: + return true; + default: + return false; + } + } + texture* get_surface() override { return (gl::texture*)this; diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 989834453d..ec89b4a9ba 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -387,6 +387,7 @@ namespace gl if (synchronized) return; + verify(HERE), cmd.drv; copy_texture(cmd, blocking); if (blocking) @@ -700,7 +701,7 @@ namespace gl return result; } - std::array get_component_mapping(u32 gcm_format, rsx::texture_create_flags flags) + std::array get_component_mapping(u32 gcm_format, rsx::texture_create_flags flags) const { switch (gcm_format) { @@ -733,7 +734,7 @@ namespace gl } } - void copy_transfer_regions_impl(gl::command_context& cmd, gl::texture* dst_image, const std::vector& sources) + void copy_transfer_regions_impl(gl::command_context& cmd, gl::texture* dst_image, const std::vector& sources) const { for (const auto &slice : sources) { @@ -759,6 +760,38 @@ namespace gl } } + gl::texture* get_template_from_collection_impl(const std::vector& sections_to_transfer) const + { + gl::texture* result = nullptr; + for (const auto §ion : sections_to_transfer) + { + if (!section.src) + continue; + + if (!result) + { + result = section.src; + } + else + { + const auto set1 = result->get_native_component_layout(); + const auto set2 = section.src->get_native_component_layout(); + + if (set1[0] != set2[0] || + set1[1] != set2[1] || + set1[2] != set2[2] || + set1[3] != set2[3]) + { + // TODO + // This requires a far more complex setup as its not always possible to mix and match without compute assistance + return nullptr; + } + } + } + + return result; + } + protected: gl::texture_view* create_temporary_subresource_view(gl::command_context&, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, @@ -909,6 +942,7 @@ namespace gl gl::upload_texture(section->get_raw_texture()->id(), gcm_format, width, height, depth, mipmaps, input_swizzled, type, subresource_layout); + section->last_write_tag = rsx::get_shared_tag(); return section; } @@ -966,6 +1000,7 @@ namespace gl return (ifmt == gl::texture::internal_format::depth24_stencil8 || ifmt == gl::texture::internal_format::depth32f_stencil8 || ifmt == gl::texture::internal_format::depth_stencil); + case CELL_GCM_TEXTURE_X16: case CELL_GCM_TEXTURE_DEPTH16: case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return (ifmt == gl::texture::internal_format::depth16 || diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 8946c822c2..dd9fca8969 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1626,12 +1626,12 @@ void VKGSRender::end() if (current_fp_metadata.referenced_textures_mask & (1 << i)) { vk::image_view* view = nullptr; - if (rsx::method_registers.fragment_textures[i].enabled()) - { - auto sampler_state = static_cast(fs_sampler_state[i].get()); - view = sampler_state->image_handle; + auto sampler_state = static_cast(fs_sampler_state[i].get()); - if (!view && sampler_state->external_subresource_desc.external_handle) + if (rsx::method_registers.fragment_textures[i].enabled() && + sampler_state->validate()) + { + if (view = sampler_state->image_handle; !view) { //Requires update, copy subresource view = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc); @@ -1705,7 +1705,7 @@ void VKGSRender::end() auto sampler_state = static_cast(vs_sampler_state[i].get()); auto image_ptr = sampler_state->image_handle; - if (!image_ptr && sampler_state->external_subresource_desc.external_handle) + if (!image_ptr && sampler_state->validate()) { image_ptr = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc); m_vertex_textures_dirty[i] = true; @@ -2970,28 +2970,36 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) m_texture_cache.notify_surface_changed(layout.zeta_address); } - if (g_cfg.video.write_color_buffers) + const auto color_fmt_info = vk::get_compatible_gcm_format(layout.color_format); + for (u8 index : m_draw_buffers) { - const auto color_fmt_info = vk::get_compatible_gcm_format(layout.color_format); - for (u8 index : m_draw_buffers) - { - if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue; + if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue; - const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]); + const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]); + if (g_cfg.video.write_color_buffers) + { m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range, m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], std::tuple{ m_swapchain->get_graphics_queue() }, color_fmt_info.first, color_fmt_info.second); } + else + { + m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue()); + } } - if (g_cfg.video.write_depth_buffer) + if (m_depth_surface_info.address && m_depth_surface_info.pitch) { - if (m_depth_surface_info.address && m_depth_surface_info.pitch) + const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]); + if (g_cfg.video.write_depth_buffer) { - const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16)? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8; - const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]); + const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8; m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, std::tuple{ m_swapchain->get_graphics_queue() }, gcm_format, false); } + else + { + m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue()); + } } auto vk_depth_format = (layout.zeta_address == 0) ? VK_FORMAT_UNDEFINED : vk::get_compatible_depth_surface_format(m_device->get_formats_support(), layout.depth_format); @@ -3301,7 +3309,7 @@ void VKGSRender::flip(int buffer) } else { - const auto overlap_info = m_rtts.get_merged_texture_memory_region(absolute_address, buffer_width, buffer_height, buffer_pitch, 4); + const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, absolute_address, buffer_width, buffer_height, buffer_pitch, 4); verify(HERE), !overlap_info.empty(); if (overlap_info.back().surface == render_target_texture) @@ -3341,16 +3349,14 @@ void VKGSRender::flip(int buffer) { // Read from cell const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height); - const auto overlap = m_texture_cache.find_texture_from_range(range); + const u32 lookup_mask = rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::framebuffer_storage; + const auto overlap = m_texture_cache.find_texture_from_range(range, 0, lookup_mask); bool flush_queue = false; for (const auto & section : overlap) { - if (section->get_protection() == utils::protection::no) - { - section->copy_texture(*m_current_command_buffer, false, m_swapchain->get_graphics_queue()); - flush_queue = true; - } + section->copy_texture(*m_current_command_buffer, false, m_swapchain->get_graphics_queue()); + flush_queue = true; } if (flush_queue) diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index d1b8f167cb..e7c3837d18 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -51,6 +51,11 @@ namespace vk return native_pitch; } + bool is_depth_surface() const override + { + return !!(attachment_aspect_flag & VK_IMAGE_ASPECT_DEPTH_BIT); + } + bool matches_dimensions(u16 _width, u16 _height) const { //Use forward scaling to account for rounding and clamping errors diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index b346898c6e..76bb3e530f 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -123,7 +123,7 @@ namespace vk if (src->current_layout != preferred_src_format) change_image_layout(cmd, src->value, src_layout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect)); - if (dst->current_layout != preferred_dst_format) + if (dst->current_layout != preferred_dst_format && src != dst) change_image_layout(cmd, dst->value, dst_layout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect)); auto scratch_buf = vk::get_scratch_buffer(); @@ -196,7 +196,7 @@ namespace vk if (src_layout != preferred_src_format) change_image_layout(cmd, src->value, preferred_src_format, src_layout, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect)); - if (dst_layout != preferred_dst_format) + if (dst_layout != preferred_dst_format && src != dst) change_image_layout(cmd, dst->value, preferred_dst_format, dst_layout, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect)); } @@ -231,7 +231,7 @@ namespace vk if (srcLayout != preferred_src_format) change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect)); - if (dstLayout != preferred_dst_format) + if (dstLayout != preferred_dst_format && src != dst) change_image_layout(cmd, dst, dstLayout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect)); for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level) @@ -245,7 +245,7 @@ namespace vk if (srcLayout != preferred_src_format) change_image_layout(cmd, src, preferred_src_format, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect)); - if (dstLayout != preferred_dst_format) + if (dstLayout != preferred_dst_format && src != dst) change_image_layout(cmd, dst, preferred_dst_format, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect)); } @@ -272,7 +272,7 @@ namespace vk if (srcLayout != preferred_src_format) change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); - if (dstLayout != preferred_dst_format) + if (dstLayout != preferred_dst_format && src != dst) change_image_layout(cmd, dst, dstLayout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); if (compatible_formats && src_width == dst_width && src_height == dst_height) @@ -296,7 +296,7 @@ namespace vk } else { - auto stretch_image_typeless_unsafe = [&cmd, preferred_src_format, preferred_dst_format](VkImage src, VkImage dst, VkImage typeless, + auto stretch_image_typeless_unsafe = [&cmd, preferred_src_format, preferred_dst_format, filter](VkImage src, VkImage dst, VkImage typeless, const areai& src_rect, const areai& dst_rect, VkImageAspectFlags aspect, VkImageAspectFlags transfer_flags = 0xFF) { const u32 src_w = u32(src_rect.x2 - src_rect.x1); @@ -314,14 +314,14 @@ namespace vk //2. Blit typeless surface to self copy_scaled_image(cmd, typeless, typeless, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, - 0, 0, src_w, src_h, 0, src_h, dst_w, dst_h, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT, VK_FILTER_NEAREST); + 0, 0, src_w, src_h, 0, src_h, dst_w, dst_h, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT, filter); //3. Copy back the aspect bits copy_image(cmd, typeless, dst, VK_IMAGE_LAYOUT_GENERAL, preferred_dst_format, {0, (s32)src_h, (s32)dst_w, s32(src_h + dst_h) }, dst_rect, 1, VK_IMAGE_ASPECT_COLOR_BIT, aspect, 0xFF, transfer_flags); }; - auto stretch_image_typeless_safe = [&cmd, preferred_src_format, preferred_dst_format](VkImage src, VkImage dst, VkImage typeless, + auto stretch_image_typeless_safe = [&cmd, preferred_src_format, preferred_dst_format, filter](VkImage src, VkImage dst, VkImage typeless, const areai& src_rect, const areai& dst_rect, VkImageAspectFlags aspect, VkImageAspectFlags transfer_flags = 0xFF) { const u32 src_w = u32(src_rect.x2 - src_rect.x1); @@ -345,7 +345,7 @@ namespace vk //2. Blit typeless surface to self copy_scaled_image(cmd, typeless, typeless, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, - 0, 0, src_w, src_h, 0, src_h, dst_w, dst_h, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT, VK_FILTER_NEAREST); + 0, 0, src_w, src_h, 0, src_h, dst_w, dst_h, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT, filter); //3. Copy back the aspect bits info.imageExtent = { dst_w, dst_h, 1 }; @@ -423,7 +423,7 @@ namespace vk if (srcLayout != preferred_src_format) change_image_layout(cmd, src, preferred_src_format, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); - if (dstLayout != preferred_dst_format) + if (dstLayout != preferred_dst_format && src != dst) change_image_layout(cmd, dst, preferred_dst_format, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect)); } diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 87ebef33b1..d577eb216e 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -457,7 +457,7 @@ namespace vk m_discarded_memory_size = 0; } - VkComponentMapping apply_component_mapping_flags(u32 gcm_format, rsx::texture_create_flags flags, const texture_channel_remap_t& remap_vector) + VkComponentMapping apply_component_mapping_flags(u32 gcm_format, rsx::texture_create_flags flags, const texture_channel_remap_t& remap_vector) const { switch (gcm_format) { @@ -496,7 +496,7 @@ namespace vk return mapping; } - void copy_transfer_regions_impl(vk::command_buffer& cmd, vk::image* dst, const std::vector& sections_to_transfer) + void copy_transfer_regions_impl(vk::command_buffer& cmd, vk::image* dst, const std::vector& sections_to_transfer) const { for (const auto §ion : sections_to_transfer) { @@ -515,10 +515,19 @@ namespace vk copy_rgn.srcOffset = { section.src_x, section.src_y, 0 }; copy_rgn.dstOffset = { section.dst_x, section.dst_y, 0 }; - copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, section.dst_z, 1 }; + copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; copy_rgn.srcSubresource = { src_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 }; copy_rgn.extent = { section.src_w, section.src_h, 1 }; + if (dst->info.imageType == VK_IMAGE_TYPE_3D) + { + copy_rgn.dstOffset.z = section.dst_z; + } + else + { + copy_rgn.dstSubresource.baseArrayLayer = section.dst_z; + } + vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range); vkCmdCopyImage(cmd, section.src->value, section.src->current_layout, dst->value, dst->current_layout, 1, ©_rgn); vk::change_image_layout(cmd, section.src, old_src_layout, src_range); @@ -579,6 +588,35 @@ namespace vk } } + vk::image* get_template_from_collection_impl(const std::vector& sections_to_transfer) const + { + vk::image* result = nullptr; + for (const auto §ion : sections_to_transfer) + { + if (!section.src) + continue; + + if (!result) + { + result = section.src; + } + else + { + if (section.src->native_component_map.a != result->native_component_map.a || + section.src->native_component_map.r != result->native_component_map.r || + section.src->native_component_map.g != result->native_component_map.g || + section.src->native_component_map.b != result->native_component_map.b) + { + // TODO + // This requires a far more complex setup as its not always possible to mix and match without compute assistance + return nullptr; + } + } + } + + return result; + } + protected: vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type, u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector, bool copy) @@ -587,7 +625,7 @@ namespace vk std::unique_ptr view; VkImageAspectFlags aspect; - VkImageCreateFlags image_flags; + VkImageCreateFlags image_flags = (view_type == VK_IMAGE_VIEW_TYPE_CUBE) ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0; VkFormat dst_format = vk::get_compatible_sampler_format(m_formats_support, gcm_format); if (source) @@ -599,13 +637,10 @@ namespace vk //HACK! Should use typeless transfer dst_format = source->info.format; } - - image_flags = source->info.flags; } else { aspect = vk::get_aspect_flags(dst_format); - image_flags = (view_type == VK_IMAGE_VIEW_TYPE_CUBE)? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0; } image.reset(new vk::viewable_image(*vk::get_current_renderer(), m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, @@ -772,13 +807,25 @@ namespace vk auto result = create_temporary_subresource_view_impl(cmd, nullptr, VK_IMAGE_TYPE_2D, VK_IMAGE_VIEW_TYPE_2D, gcm_format, 0, 0, width, height, remap_vector, false); + const auto image = result->image(); VkImageAspectFlags dst_aspect = vk::get_aspect_flags(result->info.format); VkImageSubresourceRange dst_range = { dst_aspect, 0, 1, 0, 1 }; - vk::change_image_layout(cmd, result->image(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range); + vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range); - copy_transfer_regions_impl(cmd, result->image(), sections_to_copy); + if (!(dst_aspect & VK_IMAGE_ASPECT_DEPTH_BIT)) + { + VkClearColorValue clear = {}; + vkCmdClearColorImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); + } + else + { + VkClearDepthStencilValue clear = { 1.f, 0 }; + vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range); + } - vk::change_image_layout(cmd, result->image(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range); + copy_transfer_regions_impl(cmd, image, sections_to_copy); + + vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range); return result; } @@ -940,6 +987,7 @@ namespace vk change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subres_range); + section->last_write_tag = rsx::get_shared_tag(); return section; } @@ -988,6 +1036,7 @@ namespace vk case CELL_GCM_TEXTURE_DEPTH24_D8: case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return (vk_format == VK_FORMAT_D24_UNORM_S8_UINT || vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT); + case CELL_GCM_TEXTURE_X16: case CELL_GCM_TEXTURE_DEPTH16: case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return (vk_format == VK_FORMAT_D16_UNORM);