From 571dbfb7b1111d8a3fbf9ba56ddb3ce66390c543 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 15 Sep 2017 01:32:23 +0300 Subject: [PATCH] rsx: Texture cache improvements - Limits buffer size to min 720 in the Y axis (1024 section causes conflicts in some cases - TODO) rsx: Fixups to allow large textures for blit operation - Also includes checks for both leaking sections and blit regions for vulkan hotfix for hanging when using WCB addendum - unlock both ro and no blocks before attempting to copy memory blocks gl: Fixups for ARB_explicit_uniform_location - Forces glsl v 430 to make use of the extension rsx/vk: Rework texture cache to minimize recursive access violations - Also modifies the vulkan commandbuffer begin/end/submit mechanism gl: Fix cached_texture_section::is_flushable to take memory protection into account rsx: Fix blit dst offset calculation --- rpcs3/Emu/RSX/Common/surface_store.h | 12 +-- rpcs3/Emu/RSX/Common/texture_cache.h | 113 ++++++++++++++++--------- rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp | 2 +- rpcs3/Emu/RSX/GL/GLTextureCache.h | 2 +- rpcs3/Emu/RSX/GL/GLVertexProgram.cpp | 1 - rpcs3/Emu/RSX/RSXThread.cpp | 4 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 29 ++----- rpcs3/Emu/RSX/VK/VKHelpers.h | 50 +++++++++++ rpcs3/Emu/RSX/VK/VKTextureCache.h | 46 +++++----- rpcs3/Emu/RSX/rsx_cache.h | 1 + rpcs3/Emu/RSX/rsx_methods.cpp | 2 + 11 files changed, 168 insertions(+), 94 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 0d400cc39d..2090e7f093 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -523,7 +523,7 @@ namespace rsx * address_is_bound - returns true if the surface at a given address is actively bound * get_surface_subresource_if_available - returns a sectiion descriptor that allows to crop surfaces stored in memory */ - bool surface_overlaps_address(surface_type surface, u32 surface_address, u32 texaddr, u16 *x, u16 *y, bool scale_to_fit) + bool surface_overlaps_address(surface_type surface, u32 surface_address, u32 texaddr, u16 *x, u16 *y, bool scale_to_fit, bool double_height) { bool is_subslice = false; u16 x_offset = 0; @@ -535,7 +535,6 @@ namespace rsx u32 offset = texaddr - surface_address; if (texaddr >= surface_address) { - if (offset == 0) { is_subslice = true; @@ -546,6 +545,8 @@ namespace rsx Traits::get_surface_info(surface, &info); u32 range = info.rsx_pitch * info.surface_height; + if (double_height) range *= 2; + if (offset < range) { const u32 y = (offset / info.rsx_pitch); @@ -560,6 +561,7 @@ namespace rsx x_offset = x; y_offset = y; + if (double_height) y_offset /= 2; is_subslice = true; } } @@ -602,11 +604,11 @@ namespace rsx return true; } - surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch, bool scale_to_fit = false, bool crop = false, bool ignore_depth_formats = false) + surface_subresource get_surface_subresource_if_applicable(u32 texaddr, u16 requested_width, u16 requested_height, u16 requested_pitch, bool scale_to_fit = false, bool crop = false, bool ignore_depth_formats = false, bool double_height = false) { auto test_surface = [&](surface_type surface, u32 this_address, u16 &x_offset, u16 &y_offset, u16 &w, u16 &h, bool &clipped) { - if (surface_overlaps_address(surface, this_address, texaddr, &x_offset, &y_offset, scale_to_fit)) + if (surface_overlaps_address(surface, this_address, texaddr, &x_offset, &y_offset, scale_to_fit, double_height)) { surface_format_info info; Traits::get_surface_info(surface, &info); @@ -625,7 +627,7 @@ namespace rsx if (region_fits(info.surface_width, info.surface_height, x_offset, y_offset, real_width, requested_height)) { w = real_width; - h = info.surface_height; + h = requested_height; clipped = false; return true; diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index ef4f544e6e..be0ad061e1 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -53,6 +53,12 @@ namespace rsx data.push_back(std::move(section)); } + + void remove_one() + { + verify(HERE), valid_count > 0; + valid_count--; + } }; // Keep track of cache misses to pre-emptively flush some addresses @@ -104,8 +110,12 @@ namespace rsx if (base == last_dirty_block && range_data.valid_count == 0) continue; - if (trampled_range.first >= (base + get_block_size()) || base >= trampled_range.second) - continue; + if (trampled_range.first < trampled_range.second) + { + //Only if a valid range, ignore empty sets + if (trampled_range.first >= (base + range_data.max_range + get_block_size()) || base >= trampled_range.second) + continue; + } for (int i = 0; i < range_data.data.size(); i++) { @@ -138,7 +148,7 @@ namespace rsx } m_unreleased_texture_objects++; - range_data.valid_count--; + range_data.remove_one(); response = true; } } @@ -159,6 +169,7 @@ namespace rsx bool response = false; u32 last_dirty_block = 0; std::pair trampled_range = std::make_pair(0xffffffff, 0x0); + std::vector sections_to_flush; for (auto It = m_cache.begin(); It != m_cache.end(); It++) { @@ -169,8 +180,12 @@ namespace rsx if (base == last_dirty_block && range_data.valid_count == 0) continue; - if (trampled_range.first >= (base + get_block_size()) || base >= trampled_range.second) - continue; + if (trampled_range.first < trampled_range.second) + { + //Only if a valid range, ignore empty sets + if (trampled_range.first >= (base + range_data.max_range + get_block_size()) || base >= trampled_range.second) + continue; + } for (int i = 0; i < range_data.data.size(); i++) { @@ -192,16 +207,12 @@ namespace rsx range_reset = true; } - //TODO: Map basic host_visible memory without coherent constraint - if (!tex.flush(std::forward(extras)...)) - { - //Missed address, note this - //TODO: Lower severity when successful to keep the cache from overworking - record_cache_miss(tex); - } + //Defer actual flush operation until all affected regions are cleared to prevent recursion + tex.unprotect(); + sections_to_flush.push_back(&tex); response = true; - range_data.valid_count--; + range_data.remove_one(); } } @@ -211,6 +222,16 @@ namespace rsx } } + for (auto tex : sections_to_flush) + { + if (!tex->flush(std::forward(extras)...)) + { + //Missed address, note this + //TODO: Lower severity when successful to keep the cache from overworking + record_cache_miss(*tex); + } + } + return response; } @@ -334,7 +355,7 @@ namespace rsx void lock_memory_region(image_storage_type* image, const u32 memory_address, const u32 memory_size, const u32 width, const u32 height, const u32 pitch, Args&&... extras) { writer_lock lock(m_cache_mutex); - section_storage_type& region = find_cached_texture(memory_address, memory_size, true, width, height, 1); + section_storage_type& region = find_cached_texture(memory_address, memory_size, false); if (!region.is_locked()) { @@ -389,7 +410,7 @@ namespace rsx address > no_access_range.second) return std::make_tuple(false, nullptr); - reader_lock lock(m_cache_mutex); + rsx::conditional_lock lock(in_access_violation_handler, m_cache_mutex); auto found = m_cache.find(get_block_address(address)); if (found != m_cache.end()) @@ -730,11 +751,11 @@ namespace rsx const u32 dst_address = (u32)((u64)dst.pixels - (u64)vm::base(0)); //Check if src/dst are parts of render targets - auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst.rsx_address, dst.width, dst.clip_height, dst.pitch, true, true, false); + auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst.rsx_address, dst.width, dst.clip_height, dst.pitch, true, true, false, dst.compressed_y); dst_is_render_target = dst_subres.surface != nullptr; //TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate - auto src_subres = m_rtts.get_surface_subresource_if_applicable(src.rsx_address, src.width, src.height, src.pitch, true, true, false); + auto src_subres = m_rtts.get_surface_subresource_if_applicable(src.rsx_address, src.width, src.slice_h, src.pitch, true, true, false, src.compressed_y); src_is_render_target = src_subres.surface != nullptr; //Always use GPU blit if src or dst is in the surface store @@ -747,15 +768,37 @@ namespace rsx float scale_x = dst.scale_x; float scale_y = dst.scale_y; - size2i clip_dimensions = { dst.clip_width, dst.clip_height }; + //TODO: Investigate effects of compression in X axis + if (dst.compressed_y) + { + scale_y *= 0.5f; + } - //Dimensions passed are restricted to powers of 2; get real height from clip_height and width from pitch - size2i dst_dimensions = { dst.pitch / (dst_is_argb8 ? 4 : 2), dst.clip_height }; + if (src.compressed_y) + { + scale_y *= 2.f; + } + + //1024 height is a hack (for ~720p buffers) + //It is possible to have a large buffer that goes up to around 4kx4k but anything above 1280x720 is rare + //RSX only handles 512x512 tiles so texture 'stitching' will eventually be needed to be completely accurate + //Sections will be submitted as (512x512 + 512x512 + 256x512 + 512x208 + 512x208 + 256x208) to blit a 720p surface to the backbuffer for example + + int practical_height; + if (dst.max_tile_h < dst.height || !src_is_render_target) + practical_height = (s32)dst.height; + else + { + //Hack + practical_height = std::min((s32)dst.max_tile_h, 1024); + } + + size2i dst_dimensions = { dst.pitch / (dst_is_argb8 ? 4 : 2), practical_height }; //Offset in x and y for src is 0 (it is already accounted for when getting pixels_src) //Reproject final clip onto source... - const u16 src_w = (const u16)((f32)clip_dimensions.width / dst.scale_x); - const u16 src_h = (const u16)((f32)clip_dimensions.height / dst.scale_y); + const u16 src_w = (const u16)((f32)dst.clip_width / scale_x); + const u16 src_h = (const u16)((f32)dst.clip_height / scale_y); areai src_area = { 0, 0, src_w, src_h }; areai dst_area = { 0, 0, dst.clip_width, dst.clip_height }; @@ -794,7 +837,7 @@ namespace rsx enforce_surface_creation_type(*cached_dest, dst.swizzled ? rsx::texture_create_flags::swapped_native_component_order : rsx::texture_create_flags::native_component_order); const auto old_dst_area = dst_area; - if (const u32 address_offset = dst.rsx_address - cached_dest->get_section_base()) + if (const u32 address_offset = dst_address - cached_dest->get_section_base()) { const u16 bpp = dst_is_argb8 ? 4 : 2; const u16 offset_y = address_offset / dst.pitch; @@ -826,6 +869,7 @@ namespace rsx if (!cached_dest && is_memcpy) { lock.upgrade(); + flush_address_impl(src_address, std::forward(extras)...); invalidate_range_impl(dst_address, memcpy_bytes_length, true); memcpy(dst.pixels, src.pixels, memcpy_bytes_length); return true; @@ -853,6 +897,7 @@ namespace rsx if (rsx_pitch <= 64 && native_pitch != rsx_pitch) { lock.upgrade(); + flush_address_impl(src_address, std::forward(extras)...); invalidate_range_impl(dst_address, memcpy_bytes_length, true); memcpy(dst.pixels, src.pixels, memcpy_bytes_length); return true; @@ -892,13 +937,13 @@ namespace rsx } else { - if (src_subres.w != clip_dimensions.width || - src_subres.h != clip_dimensions.height) + if (src_subres.w != dst.clip_width || + src_subres.h != dst.clip_height) { f32 subres_scaling_x = (f32)src.pitch / src_subres.surface->get_native_pitch(); - const int dst_width = (int)(src_subres.w * dst.scale_x * subres_scaling_x); - const int dst_height = (int)(src_subres.h * dst.scale_y); + const int dst_width = (int)(src_subres.w * scale_x * subres_scaling_x); + const int dst_height = (int)(src_subres.h * scale_y); dst_area.x2 = dst_area.x1 + dst_width; dst_area.y2 = dst_area.y1 + dst_height; @@ -912,14 +957,6 @@ namespace rsx src_area.y1 += src_subres.y; src_area.y2 += src_subres.y; - if (src.compressed_y) - { - dst_area.y1 *= 2; - dst_area.y2 *= 2; - - dst_dimensions.height *= 2; - } - vram_texture = src_subres.surface->get_surface(); } @@ -959,8 +996,8 @@ namespace rsx //Reproject clip offsets onto source to simplify blit if (dst.clip_x || dst.clip_y) { - const u16 scaled_clip_offset_x = (const u16)((f32)dst.clip_x / dst.scale_x); - const u16 scaled_clip_offset_y = (const u16)((f32)dst.clip_y / dst.scale_y); + const u16 scaled_clip_offset_x = (const u16)((f32)dst.clip_x / scale_x); + const u16 scaled_clip_offset_y = (const u16)((f32)dst.clip_y / scale_y); src_area.x1 += scaled_clip_offset_x; src_area.x2 += scaled_clip_offset_x; @@ -978,7 +1015,7 @@ namespace rsx lock.upgrade(); - dest_texture = create_new_texture(cmd, dst.rsx_address, dst.pitch * dst.clip_height, + dest_texture = create_new_texture(cmd, dst.rsx_address, dst.pitch * dst_dimensions.height, dst_dimensions.width, dst_dimensions.height, 1, 1, gcm_format, rsx::texture_dimension_extended::texture_dimension_2d, dst.swizzled? rsx::texture_create_flags::swapped_native_component_order : rsx::texture_create_flags::native_component_order, diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index 4097d7893d..e0e6ec5708 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -30,7 +30,7 @@ std::string GLFragmentDecompilerThread::compareFunction(COMPARE f, const std::st void GLFragmentDecompilerThread::insertHeader(std::stringstream & OS) { - OS << "#version 420\n"; + OS << "#version 430\n"; } void GLFragmentDecompilerThread::insertIntputs(std::stringstream & OS) diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index efdc55a2b8..83735eb48d 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -355,7 +355,7 @@ namespace gl bool is_flushable() const { - return pbo_id != 0; + return (locked && pbo_id != 0); } bool is_flushed() const diff --git a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp index ff6c69deda..b49fd4bf63 100644 --- a/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLVertexProgram.cpp @@ -32,7 +32,6 @@ std::string GLVertexDecompilerThread::compareFunction(COMPARE f, const std::stri void GLVertexDecompilerThread::insertHeader(std::stringstream &OS) { OS << "#version 430\n"; - OS << "#extension GL_ARB_separate_program_objects: enable\n\n"; OS << "layout(std140, binding = 0) uniform VertexContextBuffer\n"; OS << "{\n"; OS << " mat4 scale_offset_mat;\n"; diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 276146a3c8..b1fc064d04 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -399,7 +399,7 @@ namespace rsx auto flush_command_queue = [&]() { - const auto num_draws = method_registers.current_draw_clause.first_count_commands.size(); + const auto num_draws = (u32)method_registers.current_draw_clause.first_count_commands.size(); bool emit_begin = false; bool emit_end = true; @@ -411,7 +411,7 @@ namespace rsx u32 last = first_counts.front().first; u32 last_index = 0; - for (size_t draw = 0; draw < num_draws; draw++) + for (u32 draw = 0; draw < num_draws; draw++) { if (first_counts[draw].first != last) { diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index bf11cfd08b..545c4d5fb3 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2013,32 +2013,14 @@ void VKGSRender::write_buffers() void VKGSRender::close_and_submit_command_buffer(const std::vector &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags) { - CHECK_RESULT(vkEndCommandBuffer(*m_current_command_buffer)); - - VkCommandBuffer cmd = *m_current_command_buffer; - - VkSubmitInfo infos = {}; - infos.commandBufferCount = 1; - infos.pCommandBuffers = &cmd; - infos.pWaitDstStageMask = &pipeline_stage_flags; - infos.pWaitSemaphores = semaphores.data(); - infos.waitSemaphoreCount = static_cast(semaphores.size()); - infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - + m_current_command_buffer->end(); m_current_command_buffer->tag(); - CHECK_RESULT(vkQueueSubmit(m_swap_chain->get_present_queue(), 1, &infos, fence)); + m_current_command_buffer->submit(m_swap_chain->get_present_queue(), semaphores, fence, pipeline_stage_flags); } void VKGSRender::open_command_buffer() { - VkCommandBufferInheritanceInfo inheritance_info = {}; - inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; - - VkCommandBufferBeginInfo begin_infos = {}; - begin_infos.pInheritanceInfo = &inheritance_info; - begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_infos.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - CHECK_RESULT(vkBeginCommandBuffer(*m_current_command_buffer, &begin_infos)); + m_current_command_buffer->begin(); } @@ -2587,5 +2569,8 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst { close_render_pass(); - return m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer); + auto result = m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer); + m_current_command_buffer->begin(); + + return result; } diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 5c9ee4d27b..d5a810ec79 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -1040,6 +1040,9 @@ namespace vk class command_buffer { + private: + bool is_open = false; + protected: vk::command_pool *pool = nullptr; VkCommandBuffer commands = nullptr; @@ -1074,6 +1077,53 @@ namespace vk { return commands; } + + void begin() + { + if (is_open) + return; + + VkCommandBufferInheritanceInfo inheritance_info = {}; + inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; + + VkCommandBufferBeginInfo begin_infos = {}; + begin_infos.pInheritanceInfo = &inheritance_info; + begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_infos.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + CHECK_RESULT(vkBeginCommandBuffer(commands, &begin_infos)); + is_open = true; + } + + void end() + { + if (!is_open) + { + LOG_ERROR(RSX, "commandbuffer->end was called but commandbuffer is not in a recording state"); + return; + } + + CHECK_RESULT(vkEndCommandBuffer(commands)); + is_open = false; + } + + void submit(VkQueue queue, const std::vector &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags) + { + if (is_open) + { + LOG_ERROR(RSX, "commandbuffer->submit was called whilst the command buffer is in a recording state"); + return; + } + + VkSubmitInfo infos = {}; + infos.commandBufferCount = 1; + infos.pCommandBuffers = &commands; + infos.pWaitDstStageMask = &pipeline_stage_flags; + infos.pWaitSemaphores = semaphores.data(); + infos.waitSemaphoreCount = static_cast(semaphores.size()); + infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + + CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence)); + } }; class context diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index c7395d3f01..d7be128f8c 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -183,16 +183,7 @@ namespace vk if (manage_cb_lifetime) { - //cb has to be guaranteed to be in a closed state - //This function can be called asynchronously - VkCommandBufferInheritanceInfo inheritance_info = {}; - inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO; - - VkCommandBufferBeginInfo begin_infos = {}; - begin_infos.pInheritanceInfo = &inheritance_info; - begin_infos.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_infos.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - CHECK_RESULT(vkBeginCommandBuffer(cmd, &begin_infos)); + cmd.begin(); } VkBufferImageCopy copyRegion = {}; @@ -212,20 +203,8 @@ namespace vk if (manage_cb_lifetime) { - CHECK_RESULT(vkEndCommandBuffer(cmd)); - - VkPipelineStageFlags pipe_stage_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - VkCommandBuffer command_buffer = cmd; - - VkSubmitInfo infos = {}; - infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - infos.commandBufferCount = 1; - infos.pCommandBuffers = &command_buffer; - infos.pWaitDstStageMask = &pipe_stage_flags; - infos.pWaitSemaphores = nullptr; - infos.waitSemaphoreCount = 0; - - CHECK_RESULT(vkQueueSubmit(submit_queue, 1, &infos, dma_fence)); + cmd.end(); + cmd.submit(submit_queue, {}, dma_fence, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); //Now we need to restart the command-buffer to restore it to the way it was before... CHECK_RESULT(vkWaitForFences(*m_device, 1, &dma_fence, VK_TRUE, UINT64_MAX)); @@ -699,6 +678,25 @@ namespace vk VkImageAspectFlagBits aspect = VK_IMAGE_ASPECT_COLOR_BIT; if (is_depth) aspect = (VkImageAspectFlagBits)(src->info.format == VK_FORMAT_D16_UNORM ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); + //Checks + if (src_area.x2 <= src_area.x1 || src_area.y2 <= src_area.y1 || dst_area.x2 <= dst_area.x1 || dst_area.y2 <= dst_area.y1) + { + LOG_ERROR(RSX, "Blit request consists of an empty region descriptor!"); + return; + } + + if (src_area.x1 < 0 || src_area.x2 > (s32)src->width() || src_area.y1 < 0 || src_area.y2 > (s32)src->height()) + { + LOG_ERROR(RSX, "Blit request denied because the source region does not fit!"); + return; + } + + if (dst_area.x1 < 0 || dst_area.x2 > (s32)dst->width() || dst_area.y1 < 0 || dst_area.y2 > (s32)dst->height()) + { + LOG_ERROR(RSX, "Blit request denied because the destination region does not fit!"); + return; + } + copy_scaled_image(*commands, src->value, dst->value, src->current_layout, dst->current_layout, src_area.x1, src_area.y1, src_area.x2 - src_area.x1, src_area.y2 - src_area.y1, dst_area.x1, dst_area.y1, dst_area.x2 - dst_area.x1, dst_area.y2 - dst_area.y1, 1, aspect); diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index df14d41f10..e449c2d558 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -38,6 +38,7 @@ namespace rsx u16 clip_y; u16 clip_width; u16 clip_height; + u16 max_tile_h; f32 scale_x; f32 scale_y; diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index ba2a2888fa..3c45b7b1fe 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -661,6 +661,8 @@ namespace rsx dst_info.compressed_x = true; break; } + + dst_info.max_tile_h = static_cast((dst_region.tile->size - dst_region.base) / out_pitch); } if (dst_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER)