From e37a2a8f7d0e629da4835c005dc435b40e02bee7 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 8 Sep 2017 17:52:13 +0300 Subject: [PATCH] rsx: Texture cache fixes and improvments gl/vk/rsx: Refactoring; unify texture cache code gl: Fixups - Removes rsx::gl::texture class and leave gl::texture intact - Simplify texture create and upload mechanisms - Re-enable texture uploads with the new texture cache mechanism rsx: texture cache - check if bit region fits into dst texture before attempting to copy gl/vk: Cleanup - Set initial texture layout to DST_OPTIMAL since it has no data in it anyway at the start - Move structs outside of classes to avoid clutter --- rpcs3/Emu/RSX/Common/TextureUtils.cpp | 2 +- rpcs3/Emu/RSX/Common/TextureUtils.h | 1 + rpcs3/Emu/RSX/Common/surface_store.h | 18 +- rpcs3/Emu/RSX/Common/texture_cache.h | 972 +++++++++++++++ rpcs3/Emu/RSX/GL/GLGSRender.cpp | 80 +- rpcs3/Emu/RSX/GL/GLGSRender.h | 594 ++++----- rpcs3/Emu/RSX/GL/GLHelpers.h | 5 + rpcs3/Emu/RSX/GL/GLRenderTargets.cpp | 37 +- rpcs3/Emu/RSX/GL/GLRenderTargets.h | 42 +- rpcs3/Emu/RSX/GL/GLTexture.cpp | 584 ++++----- rpcs3/Emu/RSX/GL/GLTexture.h | 45 +- rpcs3/Emu/RSX/GL/GLTextureCache.cpp | 63 - rpcs3/Emu/RSX/GL/GLTextureCache.h | 1648 ++++++++----------------- rpcs3/Emu/RSX/RSXTexture.cpp | 9 + rpcs3/Emu/RSX/RSXTexture.h | 2 + rpcs3/Emu/RSX/VK/VKGSRender.cpp | 38 +- rpcs3/Emu/RSX/VK/VKHelpers.h | 2 +- rpcs3/Emu/RSX/VK/VKRenderTargets.h | 27 +- rpcs3/Emu/RSX/VK/VKTexture.cpp | 4 +- rpcs3/Emu/RSX/VK/VKTextureCache.h | 1291 ++++--------------- rpcs3/Emu/RSX/rsx_cache.h | 6 + rpcs3/Emu/RSX/rsx_methods.cpp | 31 +- rpcs3/GLGSRender.vcxproj | 6 +- rpcs3/GLGSRender.vcxproj.filters | 1 - rpcs3/emucore.vcxproj | 7 +- rpcs3/emucore.vcxproj.filters | 6 + 26 files changed, 2438 insertions(+), 3083 deletions(-) create mode 100644 rpcs3/Emu/RSX/Common/texture_cache.h delete mode 100644 rpcs3/Emu/RSX/GL/GLTextureCache.cpp diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index d7dddfd1cb..3301cb45bf 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -393,7 +393,7 @@ u8 get_format_block_size_in_bytes(rsx::surface_color_format format) } } -static size_t get_placed_texture_storage_size(u16 width, u16 height, u32 depth, u8 format, u16 mipmap, bool cubemap, size_t row_pitch_alignement, size_t mipmap_alignment) +size_t get_placed_texture_storage_size(u16 width, u16 height, u32 depth, u8 format, u16 mipmap, bool cubemap, size_t row_pitch_alignement, size_t mipmap_alignment) { size_t w = width; size_t h = std::max(height, 1); diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index 5de0dbc2f9..ca44dd7afb 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -18,6 +18,7 @@ struct rsx_subresource_layout * Get size to store texture in a linear fashion. * Storage is assumed to use a rowPitchAlignement boundary for every row of texture. */ +size_t get_placed_texture_storage_size(u16 width, u16 height, u32 depth, u8 format, u16 mipmap, bool cubemap, size_t row_pitch_alignement, size_t mipmap_alignment); size_t get_placed_texture_storage_size(const rsx::fragment_texture &texture, size_t row_pitch_alignement, size_t mipmap_alignment = 0x200); size_t get_placed_texture_storage_size(const rsx::vertex_texture &texture, size_t row_pitch_alignement, size_t mipmap_alignment = 0x200); diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 42397e6037..0d400cc39d 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -43,6 +43,16 @@ namespace rsx u8 bpp; }; + template + struct render_target_descriptor + { + virtual image_storage_type get_surface() const = 0; + virtual u16 get_surface_width() const = 0; + virtual u16 get_surface_height() const = 0; + virtual u16 get_rsx_pitch() const = 0; + virtual u16 get_native_pitch() const = 0; + }; + /** * Helper for surface (ie color and depth stencil render target) management. * It handles surface creation and storage. Backend should only retrieve pointer to surface. @@ -614,7 +624,7 @@ namespace rsx if (region_fits(info.surface_width, info.surface_height, x_offset, y_offset, real_width, requested_height)) { - w = info.surface_width; + w = real_width; h = info.surface_height; clipped = false; @@ -627,8 +637,8 @@ namespace rsx u16 remaining_width = info.surface_width - x_offset; u16 remaining_height = info.surface_height - y_offset; - w = remaining_width; - h = remaining_height; + w = std::min(real_width, remaining_width); + h = std::min(requested_height, remaining_height); clipped = true; return true; @@ -637,7 +647,7 @@ namespace rsx if (info.surface_width >= requested_width && info.surface_height >= requested_height) { LOG_WARNING(RSX, "Overlapping surface exceeds bounds; returning full surface region"); - w = requested_width; + w = real_width; h = requested_height; clipped = true; diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h new file mode 100644 index 0000000000..97878ad4b6 --- /dev/null +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -0,0 +1,972 @@ +#pragma once + +#include "../rsx_cache.h" +#include "../rsx_utils.h" + +#include + +namespace rsx +{ + enum texture_create_flags + { + default_component_order = 0, + native_component_order = 1, + swapped_native_component_order = 2, + }; + + template + class texture_cache + { + private: + std::pair, std::array> default_remap_vector = + { + { CELL_GCM_TEXTURE_REMAP_FROM_A, CELL_GCM_TEXTURE_REMAP_FROM_R, CELL_GCM_TEXTURE_REMAP_FROM_G, CELL_GCM_TEXTURE_REMAP_FROM_B }, + { CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP } + }; + + protected: + + struct ranged_storage + { + std::vector data; //Stored data + std::atomic_int valid_count = { 0 }; //Number of usable (non-dirty) blocks + u32 max_range = 0; //Largest stored block + + void notify(u32 data_size) + { + max_range = std::max(data_size, max_range); + valid_count++; + } + + void add(section_storage_type& section, u32 data_size) + { + max_range = std::max(data_size, max_range); + valid_count++; + + data.push_back(std::move(section)); + } + }; + + // Keep track of cache misses to pre-emptively flush some addresses + struct framebuffer_memory_characteristics + { + u32 misses; + u32 block_size; + texture_format format; + }; + + std::atomic_bool in_access_violation_handler = { false }; + shared_mutex m_cache_mutex; + std::unordered_map m_cache; + + std::pair read_only_range = std::make_pair(0xFFFFFFFF, 0); + std::pair no_access_range = std::make_pair(0xFFFFFFFF, 0); + + std::unordered_map m_cache_miss_statistics_table; + + //Memory usage + const s32 m_max_zombie_objects = 32; //Limit on how many texture objects to keep around for reuse after they are invalidated + s32 m_unreleased_texture_objects = 0; //Number of invalidated objects not yet freed from memory + + /* Helpers */ + virtual void free_texture_section(section_storage_type&) = 0; + virtual image_view_type create_temporary_subresource_view(commandbuffer_type&, image_resource_type* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h) = 0; + virtual image_view_type create_temporary_subresource_view(commandbuffer_type&, image_storage_type* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h) = 0; + virtual section_storage_type* create_new_texture(commandbuffer_type&, u32 rsx_address, u32 rsx_size, u16 width, u16 height, u16 depth, u16 mipmaps, const u32 gcm_format, + const rsx::texture_dimension_extended type, const texture_create_flags flags, std::pair, std::array>& remap_vector) = 0; + virtual section_storage_type* upload_image_from_cpu(commandbuffer_type&, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, const u32 gcm_format, + std::vector& subresource_layout, const rsx::texture_dimension_extended type, const bool swizzled, std::pair, std::array>& remap_vector) = 0; + virtual void enforce_surface_creation_type(section_storage_type& section, const texture_create_flags expected) = 0; + virtual void insert_texture_barrier() = 0; + + public: + + texture_cache() {} + ~texture_cache() {} + + virtual void destroy() = 0; + virtual bool is_depth_texture(const u32) = 0; + virtual void on_frame_end() = 0; + + section_storage_type *find_texture_from_range(u32 rsx_address, u32 range) + { + auto test = std::make_pair(rsx_address, range); + for (auto &address_range : m_cache) + { + auto &range_data = address_range.second; + for (auto &tex : range_data.data) + { + if (tex.get_section_base() > rsx_address) + continue; + + if (!tex.is_dirty() && tex.overlaps(test, true)) + return &tex; + } + } + + return nullptr; + } + + section_storage_type *find_texture_from_dimensions(u32 rsx_address, u16 width = 0, u16 height = 0, u16 mipmaps = 0) + { + auto found = m_cache.find(rsx_address); + if (found != m_cache.end()) + { + auto &range_data = found->second; + for (auto &tex : range_data.data) + { + if (tex.matches(rsx_address, width, height, mipmaps) && !tex.is_dirty()) + { + return &tex; + } + } + } + + return nullptr; + } + + section_storage_type& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0) + { + { + reader_lock lock(m_cache_mutex); + + auto found = m_cache.find(rsx_address); + if (found != m_cache.end()) + { + auto &range_data = found->second; + + for (auto &tex : range_data.data) + { + if (tex.matches(rsx_address, rsx_size) && !tex.is_dirty()) + { + if (!confirm_dimensions) return tex; + + if (tex.matches(rsx_address, width, height, mipmaps)) + return tex; + else + { + LOG_ERROR(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters.", rsx_address); + LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.get_width(), tex.get_height()); + } + } + } + + for (auto &tex : range_data.data) + { + if (tex.is_dirty()) + { + if (tex.exists()) + { + m_unreleased_texture_objects--; + free_texture_section(tex); + } + + range_data.notify(rsx_size); + return tex; + } + } + } + } + + writer_lock lock(m_cache_mutex); + + section_storage_type tmp; + m_cache[rsx_address].add(tmp, rsx_size); + return m_cache[rsx_address].data.back(); + } + + section_storage_type* find_flushable_section(const u32 address, const u32 range) + { + reader_lock lock(m_cache_mutex); + + auto found = m_cache.find(address); + if (found != m_cache.end()) + { + auto &range_data = found->second; + for (auto &tex : range_data.data) + { + if (tex.is_dirty()) continue; + if (!tex.is_flushable() && !tex.is_flushed()) continue; + + if (tex.matches(address, range)) + return &tex; + } + } + + return nullptr; + } + + template + void lock_memory_region(image_storage_type* image, const u32 memory_address, const u32 memory_size, const u32 width, const u32 height, const u32 pitch, Args&&... extras) + { + section_storage_type& region = find_cached_texture(memory_address, memory_size, true, width, height, 1); + + writer_lock lock(m_cache_mutex); + + if (!region.is_locked()) + { + region.reset(memory_address, memory_size); + region.set_dirty(false); + no_access_range = region.get_min_max(no_access_range); + } + + region.protect(utils::protection::no); + region.create(width, height, 1, 1, nullptr, image, pitch, false, std::forward(extras)...); + } + + template + bool flush_memory_to_cache(const u32 memory_address, const u32 memory_size, bool skip_synchronized, Args&&... extra) + { + section_storage_type* region = find_flushable_section(memory_address, memory_size); + + //TODO: Make this an assertion + if (region == nullptr) + { + LOG_ERROR(RSX, "Failed to find section for render target 0x%X + 0x%X", memory_address, memory_size); + return false; + } + + if (skip_synchronized && region->is_synchronized()) + return false; + + region->copy_texture(false, std::forward(extra)...); + return true; + } + + template + bool load_memory_from_cache(const u32 memory_address, const u32 memory_size, Args&&... extras) + { + section_storage_type *region = find_flushable_section(memory_address, memory_size); + + if (region && !region->is_dirty()) + { + region->fill_texture(std::forward(extras)...); + return true; + } + + //No valid object found in cache + return false; + } + + std::tuple address_is_flushable(u32 address) + { + if (address < no_access_range.first || + address > no_access_range.second) + return std::make_tuple(false, nullptr); + + reader_lock lock(m_cache_mutex); + + auto found = m_cache.find(address); + if (found != m_cache.end()) + { + auto &range_data = found->second; + for (auto &tex : range_data.data) + { + if (tex.is_dirty()) continue; + if (!tex.is_flushable()) continue; + + if (tex.overlaps(address)) + return std::make_tuple(true, &tex); + } + } + + for (auto &address_range : m_cache) + { + if (address_range.first == address) + continue; + + auto &range_data = address_range.second; + + //Quickly discard range + const u32 lock_base = address_range.first & ~0xfff; + const u32 lock_limit = align(range_data.max_range + address_range.first, 4096); + + if (address < lock_base || address >= lock_limit) + continue; + + for (auto &tex : range_data.data) + { + if (tex.is_dirty()) continue; + if (!tex.is_flushable()) continue; + + if (tex.overlaps(address)) + return std::make_tuple(true, &tex); + } + } + + return std::make_tuple(false, nullptr); + } + + template + bool flush_address(u32 address, Args&&... extras) + { + if (address < no_access_range.first || + address > no_access_range.second) + return false; + + bool response = false; + std::pair trampled_range = std::make_pair(0xffffffff, 0x0); + std::unordered_map processed_ranges; + + rsx::conditional_lock lock(in_access_violation_handler, m_cache_mutex); + + for (auto It = m_cache.begin(); It != m_cache.end(); It++) + { + auto &range_data = It->second; + const u32 base = It->first; + bool range_reset = false; + + if (processed_ranges[base] || range_data.valid_count == 0) + continue; + + //Quickly discard range + const u32 lock_base = base & ~0xfff; + const u32 lock_limit = align(range_data.max_range + base, 4096); + + if ((trampled_range.first >= lock_limit || lock_base >= trampled_range.second) && + (lock_base > address || lock_limit <= address)) + { + processed_ranges[base] = true; + continue; + } + + for (int i = 0; i < range_data.data.size(); i++) + { + auto &tex = range_data.data[i]; + + if (tex.is_dirty()) continue; + if (!tex.is_flushable()) continue; + + auto overlapped = tex.overlaps_page(trampled_range, address); + if (std::get<0>(overlapped)) + { + auto &new_range = std::get<1>(overlapped); + + if (new_range.first != trampled_range.first || + new_range.second != trampled_range.second) + { + i = 0; + trampled_range = new_range; + range_reset = true; + } + + //TODO: Map basic host_visible memory without coherent constraint + if (!tex.flush(std::forward(extras)...)) + { + //Missed address, note this + //TODO: Lower severity when successful to keep the cache from overworking + record_cache_miss(tex); + } + + response = true; + } + } + + if (range_reset) + { + processed_ranges.clear(); + It = m_cache.begin(); + } + + processed_ranges[base] = true; + } + + return response; + } + + bool invalidate_address(u32 address) + { + return invalidate_range(address, 4096 - (address & 4095)); + } + + bool invalidate_range(u32 address, u32 range, bool unprotect = true) + { + std::pair trampled_range = std::make_pair(address, address + range); + + if (trampled_range.second < read_only_range.first || + trampled_range.first > read_only_range.second) + { + //Doesnt fall in the read_only textures range; check render targets + if (trampled_range.second < no_access_range.first || + trampled_range.first > no_access_range.second) + return false; + } + + bool response = false; + std::unordered_map processed_ranges; + + rsx::conditional_lock lock(in_access_violation_handler, m_cache_mutex); + + for (auto It = m_cache.begin(); It != m_cache.end(); It++) + { + auto &range_data = It->second; + const u32 base = It->first; + bool range_reset = false; + + if (processed_ranges[base] || range_data.valid_count == 0) + continue; + + //Quickly discard range + const u32 lock_base = base & ~0xfff; + const u32 lock_limit = align(range_data.max_range + base, 4096); + + if (trampled_range.first >= lock_limit || lock_base >= trampled_range.second) + { + processed_ranges[base] = true; + continue; + } + + for (int i = 0; i < range_data.data.size(); i++) + { + auto &tex = range_data.data[i]; + + if (tex.is_dirty()) continue; + if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better + + auto overlapped = tex.overlaps_page(trampled_range, address); + if (std::get<0>(overlapped)) + { + auto &new_range = std::get<1>(overlapped); + + if (new_range.first != trampled_range.first || + new_range.second != trampled_range.second) + { + i = 0; + trampled_range = new_range; + range_reset = true; + } + + if (unprotect) + { + tex.set_dirty(true); + tex.unprotect(); + } + else + { + tex.discard(); + } + + m_unreleased_texture_objects++; + range_data.valid_count--; + response = true; + } + } + + if (range_reset) + { + processed_ranges.clear(); + It = m_cache.begin(); + } + + processed_ranges[base] = true; + } + + return response; + } + + void record_cache_miss(section_storage_type &tex) + { + const u32 memory_address = tex.get_section_base(); + const u32 memory_size = tex.get_section_size(); + const auto fmt = tex.get_format(); + + auto It = m_cache_miss_statistics_table.find(memory_address); + if (It == m_cache_miss_statistics_table.end()) + { + m_cache_miss_statistics_table[memory_address] = { 1, memory_size, fmt }; + return; + } + + auto &value = It->second; + if (value.format != fmt || value.block_size != memory_size) + { + m_cache_miss_statistics_table[memory_address] = { 1, memory_size, fmt }; + return; + } + + value.misses++; + } + + template + void flush_if_cache_miss_likely(const texture_format fmt, const u32 memory_address, const u32 memory_size, Args&&... extras) + { + auto It = m_cache_miss_statistics_table.find(memory_address); + if (It == m_cache_miss_statistics_table.end()) + { + m_cache_miss_statistics_table[memory_address] = { 0, memory_size, fmt }; + return; + } + + auto &value = It->second; + + if (value.format != fmt || value.block_size != memory_size) + { + //Reset since the data has changed + //TODO: Keep track of all this information together + m_cache_miss_statistics_table[memory_address] = { 0, memory_size, fmt }; + return; + } + + //Properly synchronized - no miss + if (!value.misses) return; + + //Auto flush if this address keeps missing (not properly synchronized) + if (value.misses > 16) + { + //TODO: Determine better way of setting threshold + if (!flush_memory_to_cache(memory_address, memory_size, true, std::forward(extras)...)) + value.misses--; + } + } + + void purge_dirty() + { + //Reclaims all graphics memory consumed by dirty textures + std::vector empty_addresses; + empty_addresses.resize(32); + + for (auto &address_range : m_cache) + { + auto &range_data = address_range.second; + + if (range_data.valid_count == 0) + empty_addresses.push_back(address_range.first); + + for (auto &tex : range_data.data) + { + if (!tex.is_dirty()) + continue; + + free_texture_section(tex); + } + } + + //Free descriptor objects as well + for (const auto &address : empty_addresses) + { + m_cache.erase(address); + } + + m_unreleased_texture_objects = 0; + } + + template + image_view_type upload_texture(commandbuffer_type& cmd, RsxTextureType& tex, surface_store_type& m_rtts) + { + const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); + const u32 range = (u32)get_texture_size(tex); + + const u32 format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + const u32 tex_width = tex.width(); + const u32 tex_height = tex.height(); + const u32 native_pitch = (tex_width * get_format_block_size_in_bytes(format)); + const u32 tex_pitch = (tex.pitch() == 0) ? native_pitch : tex.pitch(); + + if (!texaddr || !range) + { + LOG_ERROR(RSX, "Texture upload requested but texture not found, (address=0x%X, size=0x%X)", texaddr, range); + return 0; + } + + //Check for sampleable rtts from previous render passes + if (auto texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr)) + { + for (const auto& tex : m_rtts.m_bound_render_targets) + { + if (std::get<0>(tex) == texaddr) + { + if (g_cfg.video.strict_rendering_mode) + { + LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr); + return create_temporary_subresource_view(cmd, texptr, format, 0, 0, texptr->width(), texptr->height()); + } + else + { + //issue a texture barrier to ensure previous writes are visible + insert_texture_barrier(); + break; + } + } + } + + return texptr->get_view(); + } + + if (auto texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr)) + { + if (texaddr == std::get<0>(m_rtts.m_bound_depth_stencil)) + { + if (g_cfg.video.strict_rendering_mode) + { + LOG_WARNING(RSX, "Attempting to sample a currently bound depth surface @ 0x%x", texaddr); + return create_temporary_subresource_view(cmd, texptr, format, 0, 0, texptr->width(), texptr->height()); + } + else + { + //issue a texture barrier to ensure previous writes are visible + insert_texture_barrier(); + } + } + + return texptr->get_view(); + } + + /* Check if we are re-sampling a subresource of an RTV/DSV texture, bound or otherwise + * (Turbo: Super Stunt Squad does this; bypassing the need for a sync object) + * The engine does not read back the texture resource through cell, but specifies a texture location that is + * a bound render target. We can bypass the expensive download in this case + */ + + const f32 internal_scale = (f32)tex_pitch / native_pitch; + const u32 internal_width = (const u32)(tex_width * internal_scale); + + const auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, internal_width, tex_height, tex_pitch, true); + if (rsc.surface) + { + //TODO: Check that this region is not cpu-dirty before doing a copy + if (tex.get_extended_texture_dimension() != rsx::texture_dimension_extended::texture_dimension_2d) + { + LOG_ERROR(RSX, "Sampling of RTT region as non-2D texture! addr=0x%x, Type=%d, dims=%dx%d", + texaddr, (u8)tex.get_extended_texture_dimension(), tex.width(), tex.height()); + } + else + { + image_view_type bound_surface = 0; + + if (format == CELL_GCM_TEXTURE_COMPRESSED_DXT1 || format == CELL_GCM_TEXTURE_COMPRESSED_DXT23 || format == CELL_GCM_TEXTURE_COMPRESSED_DXT45) + { + LOG_WARNING(RSX, "Performing an RTT blit but request is for a compressed texture"); + } + + if (!rsc.is_bound || !g_cfg.video.strict_rendering_mode) + { + if (rsc.w == tex_width && rsc.h == tex_height) + { + if (rsc.is_bound) + { + LOG_WARNING(RSX, "Sampling from a currently bound render target @ 0x%x", texaddr); + insert_texture_barrier(); + } + + return rsc.surface->get_view(); + } + else + bound_surface = create_temporary_subresource_view(cmd, rsc.surface, format, rsc.x, rsc.y, rsc.w, rsc.h); + } + else + { + LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr); + bound_surface = create_temporary_subresource_view(cmd, rsc.surface, format, rsc.x, rsc.y, rsc.w, rsc.h); + } + + if (bound_surface) + return bound_surface; + } + } + + //If all the above failed, then its probably a generic texture. + //Search in cache and upload/bind + + auto cached_texture = find_texture_from_dimensions(texaddr, tex_width, tex_height); + if (cached_texture) + { + return cached_texture->get_raw_view(); + } + + //Do direct upload from CPU as the last resort + const auto extended_dimension = tex.get_extended_texture_dimension(); + u16 height = 0; + u16 depth = 0; + + switch (extended_dimension) + { + case rsx::texture_dimension_extended::texture_dimension_1d: + height = 1; + depth = 1; + break; + case rsx::texture_dimension_extended::texture_dimension_2d: + height = tex_height; + depth = 1; + break; + case rsx::texture_dimension_extended::texture_dimension_cubemap: + height = tex_height; + depth = 1; + break; + case rsx::texture_dimension_extended::texture_dimension_3d: + height = tex_height; + depth = tex.depth(); + break; + } + + const bool is_swizzled = !(tex.format() & CELL_GCM_TEXTURE_LN); + auto subresources_layout = get_subresources_layout(tex); + auto remap_vector = tex.decoded_remap(); + + return upload_image_from_cpu(cmd, texaddr, tex_width, height, depth, tex.get_exact_mipmap_count(), tex_pitch, format, + subresources_layout, extended_dimension, is_swizzled, remap_vector)->get_raw_view(); + } + + template + bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, commandbuffer_type& cmd, surface_store_type& m_rtts, blitter_type& blitter, Args&&... extras) + { + //Since we will have dst in vram, we can 'safely' ignore the swizzle flag + //TODO: Verify correct behavior + bool is_depth_blit = false; + bool src_is_render_target = false; + bool dst_is_render_target = false; + bool dst_is_argb8 = (dst.format == rsx::blit_engine::transfer_destination_format::a8r8g8b8); + bool src_is_argb8 = (src.format == rsx::blit_engine::transfer_source_format::a8r8g8b8); + + image_resource_type vram_texture = 0; + image_resource_type dest_texture = 0; + + const u32 src_address = (u32)((u64)src.pixels - (u64)vm::base(0)); + const u32 dst_address = (u32)((u64)dst.pixels - (u64)vm::base(0)); + + //Check if src/dst are parts of render targets + auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst.rsx_address, dst.width, dst.clip_height, dst.pitch, true, true, false); + dst_is_render_target = dst_subres.surface != nullptr; + + //TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate + auto src_subres = m_rtts.get_surface_subresource_if_applicable(src.rsx_address, src.width, src.height, src.pitch, true, true, false); + src_is_render_target = src_subres.surface != nullptr; + + //Always use GPU blit if src or dst is in the surface store + if (!g_cfg.video.use_gpu_texture_scaling && !(src_is_render_target || dst_is_render_target)) + return false; + + u16 max_dst_width = dst.width; + u16 max_dst_height = dst.height; + + float scale_x = dst.scale_x; + float scale_y = dst.scale_y; + + size2i clip_dimensions = { dst.clip_width, dst.clip_height }; + + //Dimensions passed are restricted to powers of 2; get real height from clip_height and width from pitch + size2i dst_dimensions = { dst.pitch / (dst_is_argb8 ? 4 : 2), dst.clip_height }; + + //Offset in x and y for src is 0 (it is already accounted for when getting pixels_src) + //Reproject final clip onto source... + const u16 src_w = (const u16)((f32)clip_dimensions.width / dst.scale_x); + const u16 src_h = (const u16)((f32)clip_dimensions.height / dst.scale_y); + + areai src_area = { 0, 0, src_w, src_h }; + areai dst_area = { 0, 0, dst.clip_width, dst.clip_height }; + + //Check if trivial memcpy can perform the same task + //Used to copy programs to the GPU in some cases + bool is_memcpy = false; + u32 memcpy_bytes_length = 0; + + if (dst_is_argb8 == src_is_argb8 && !dst.swizzled) + { + if ((src.slice_h == 1 && dst.clip_height == 1) || + (dst.clip_width == src.width && dst.clip_height == src.slice_h && src.pitch == dst.pitch)) + { + const u8 bpp = dst_is_argb8 ? 4 : 2; + is_memcpy = true; + memcpy_bytes_length = dst.clip_width * bpp * dst.clip_height; + } + } + + section_storage_type* cached_dest = nullptr; + if (!dst_is_render_target) + { + //First check if this surface exists in VRAM with exact dimensions + //Since scaled GPU resources are not invalidated by the CPU, we need to reuse older surfaces if possible + cached_dest = find_texture_from_dimensions(dst.rsx_address, dst_dimensions.width, dst_dimensions.height); + + //Check for any available region that will fit this one + if (!cached_dest) cached_dest = find_texture_from_range(dst_address, dst.pitch * dst.clip_height); + + if (cached_dest) + { + //Prep surface + enforce_surface_creation_type(*cached_dest, dst.swizzled ? rsx::texture_create_flags::swapped_native_component_order : rsx::texture_create_flags::native_component_order); + + //TODO: Move this code into utils since it is used alot + if (const u32 address_offset = dst.rsx_address - cached_dest->get_section_base()) + { + const u16 bpp = dst_is_argb8 ? 4 : 2; + const u16 offset_y = address_offset / dst.pitch; + const u16 offset_x = address_offset % dst.pitch; + const u16 offset_x_in_block = offset_x / bpp; + + dst_area.x1 += offset_x_in_block; + dst_area.x2 += offset_x_in_block; + dst_area.y1 += offset_y; + dst_area.y2 += offset_y; + } + + //Validate clipping region + if ((unsigned)dst_area.x2 <= cached_dest->get_width() && + (unsigned)dst_area.y2 <= cached_dest->get_height()) + { + dest_texture = cached_dest->get_raw_texture(); + + max_dst_width = cached_dest->get_width(); + max_dst_height = cached_dest->get_height(); + } + else + cached_dest = nullptr; + } + + if (!cached_dest && is_memcpy) + { + memcpy(dst.pixels, src.pixels, memcpy_bytes_length); + return true; + } + } + else + { + dst_area.x1 += dst_subres.x; + dst_area.x2 += dst_subres.x; + dst_area.y1 += dst_subres.y; + dst_area.y2 += dst_subres.y; + + dest_texture = dst_subres.surface->get_surface(); + + max_dst_width = dst_subres.surface->get_surface_width(); + max_dst_height = dst_subres.surface->get_surface_height(); + + if (is_memcpy) + { + //Some render target descriptions are actually invalid + //Confirm this is a flushable RTT + const auto rsx_pitch = dst_subres.surface->get_rsx_pitch(); + const auto native_pitch = dst_subres.surface->get_native_pitch(); + + if (rsx_pitch <= 64 && native_pitch != rsx_pitch) + { + memcpy(dst.pixels, src.pixels, memcpy_bytes_length); + return true; + } + } + } + + //Create source texture if does not exist + if (!src_is_render_target) + { + auto preloaded_texture = find_texture_from_dimensions(src_address, src.width, src.slice_h); + + if (preloaded_texture != nullptr) + { + vram_texture = preloaded_texture->get_raw_texture(); + } + else + { + flush_address(src.rsx_address, std::forward(extras)...); + + const u16 pitch_in_block = src_is_argb8 ? src.pitch >> 2 : src.pitch >> 1; + std::vector subresource_layout; + rsx_subresource_layout subres = {}; + subres.width_in_block = src.width; + subres.height_in_block = src.slice_h; + subres.pitch_in_bytes = pitch_in_block; + subres.depth = 1; + subres.data = { (const gsl::byte*)src.pixels, src.pitch * src.slice_h }; + subresource_layout.push_back(subres); + + const u32 gcm_format = src_is_argb8 ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5; + vram_texture = upload_image_from_cpu(cmd, src_address, src.width, src.slice_h, 1, 1, src.pitch, gcm_format, + subresource_layout, rsx::texture_dimension_extended::texture_dimension_2d, dst.swizzled, default_remap_vector)->get_raw_texture(); + } + } + else + { + if (src_subres.w != clip_dimensions.width || + src_subres.h != clip_dimensions.height) + { + f32 subres_scaling_x = (f32)src.pitch / src_subres.surface->get_native_pitch(); + + const int dst_width = (int)(src_subres.w * dst.scale_x * subres_scaling_x); + const int dst_height = (int)(src_subres.h * dst.scale_y); + + dst_area.x2 = dst_area.x1 + dst_width; + dst_area.y2 = dst_area.y1 + dst_height; + } + + src_area.x2 = src_subres.w; + src_area.y2 = src_subres.h; + + src_area.x1 += src_subres.x; + src_area.x2 += src_subres.x; + src_area.y1 += src_subres.y; + src_area.y2 += src_subres.y; + + if (src.compressed_y) + { + dst_area.y1 *= 2; + dst_area.y2 *= 2; + + dst_dimensions.height *= 2; + } + + vram_texture = src_subres.surface->get_surface(); + } + + bool format_mismatch = false; + + if (src_subres.is_depth_surface) + { + if (dest_texture) + { + if (dst_is_render_target && !dst_subres.is_depth_surface) + { + LOG_ERROR(RSX, "Depth->RGBA blit requested but not supported"); + return true; + } + + if (!cached_dest->has_compatible_format(src_subres.surface)) + format_mismatch = true; + } + + is_depth_blit = true; + } + + //TODO: Check for other types of format mismatch + if (format_mismatch) + { + invalidate_range(cached_dest->get_section_base(), cached_dest->get_section_size()); + + dest_texture = 0; + cached_dest = nullptr; + } + + //Validate clipping region + if ((dst.offset_x + dst.clip_x + dst.clip_width) > max_dst_width) dst.clip_x = 0; + if ((dst.offset_y + dst.clip_y + dst.clip_height) > max_dst_height) dst.clip_y = 0; + + //Reproject clip offsets onto source to simplify blit + if (dst.clip_x || dst.clip_y) + { + const u16 scaled_clip_offset_x = (const u16)((f32)dst.clip_x / dst.scale_x); + const u16 scaled_clip_offset_y = (const u16)((f32)dst.clip_y / dst.scale_y); + + src_area.x1 += scaled_clip_offset_x; + src_area.x2 += scaled_clip_offset_x; + src_area.y1 += scaled_clip_offset_y; + src_area.y2 += scaled_clip_offset_y; + } + + if (dest_texture == 0) + { + u32 gcm_format; + if (is_depth_blit) + gcm_format = (dst_is_argb8) ? CELL_GCM_TEXTURE_DEPTH24_D8 : CELL_GCM_TEXTURE_DEPTH16; + else + gcm_format = (dst_is_argb8) ? CELL_GCM_TEXTURE_A8R8G8B8 : CELL_GCM_TEXTURE_R5G6B5; + + dest_texture = create_new_texture(cmd, dst.rsx_address, dst.pitch * dst.clip_height, + dst_dimensions.width, dst_dimensions.height, 1, 1, + gcm_format, rsx::texture_dimension_extended::texture_dimension_2d, + dst.swizzled? rsx::texture_create_flags::swapped_native_component_order : rsx::texture_create_flags::native_component_order, + default_remap_vector)->get_raw_texture(); + } + + blitter.scale_image(vram_texture, dest_texture, src_area, dst_area, interpolate, is_depth_blit); + return true; + } + }; +} diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 95c70dd0ac..c9acf61565 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -463,22 +463,12 @@ void GLGSRender::end() { int location; if (!rsx::method_registers.fragment_textures[i].enabled()) - { - if (m_textures_dirty[i]) - { - glActiveTexture(GL_TEXTURE0 + i); - glBindTexture(GL_TEXTURE_2D, 0); - - m_textures_dirty[i] = false; - } continue; - } if (m_program->uniforms.has_location("tex" + std::to_string(i), &location)) { - m_gl_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.fragment_textures[i])); - __glcheck m_gl_texture_cache.upload_texture(i, rsx::method_registers.fragment_textures[i], m_gl_textures[i], m_rtts); - __glcheck m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]); + m_gl_texture_cache.upload_and_bind_texture(i, get_gl_target_for_texture(rsx::method_registers.fragment_textures[i]), rsx::method_registers.fragment_textures[i], m_rtts); + m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]); } } @@ -489,16 +479,11 @@ void GLGSRender::end() int location; if (!rsx::method_registers.vertex_textures[i].enabled()) - { - //glActiveTexture(GL_TEXTURE0 + texture_index); - //glBindTexture(GL_TEXTURE_2D, 0); continue; - } if (m_program->uniforms.has_location("vtex" + std::to_string(i), &location)) { - m_gl_vertex_textures[i].set_target(get_gl_target_for_texture(rsx::method_registers.vertex_textures[i])); - __glcheck m_gl_texture_cache.upload_texture(texture_index, rsx::method_registers.vertex_textures[i], m_gl_vertex_textures[i], m_rtts); + m_gl_texture_cache.upload_and_bind_texture(texture_index, GL_TEXTURE_2D, rsx::method_registers.vertex_textures[i], m_rtts); } } @@ -766,7 +751,8 @@ void GLGSRender::on_init_thread() glEnable(GL_CLIP_DISTANCE0 + 4); glEnable(GL_CLIP_DISTANCE0 + 5); - m_gl_texture_cache.initialize(this); + m_gl_texture_cache.initialize(); + m_thread_id = std::this_thread::get_id(); m_shaders_cache->load(); } @@ -831,7 +817,7 @@ void GLGSRender::on_exit() } m_text_printer.close(); - m_gl_texture_cache.close(); + m_gl_texture_cache.destroy(); for (u32 i = 0; i < occlusion_query_count; ++i) { @@ -1107,15 +1093,10 @@ void GLGSRender::flip(int buffer) //Check the texture cache for a blitted copy const u32 size = buffer_pitch * buffer_height; auto surface = m_gl_texture_cache.find_texture_from_range(absolute_address, size); - bool ignore_scaling = false; if (surface != nullptr) { - auto dims = surface->get_dimensions(); - buffer_width = std::get<0>(dims); - buffer_height = std::get<1>(dims); - - m_flip_fbo.color = surface->id(); + m_flip_fbo.color = surface->get_raw_view(); m_flip_fbo.read_buffer(m_flip_fbo.color); } else if (auto render_target_texture = m_rtts.get_texture_from_render_target_if_applicable(absolute_address)) @@ -1125,7 +1106,6 @@ void GLGSRender::flip(int buffer) m_flip_fbo.color = *render_target_texture; m_flip_fbo.read_buffer(m_flip_fbo.color); - ignore_scaling = true; } else { @@ -1156,20 +1136,6 @@ void GLGSRender::flip(int buffer) m_flip_fbo.color = m_flip_tex_color; m_flip_fbo.read_buffer(m_flip_fbo.color); - ignore_scaling = true; - } - - if (!ignore_scaling && buffer_region.tile && buffer_region.tile->comp != CELL_GCM_COMPMODE_DISABLED) - { - LOG_ERROR(RSX, "Output buffer compression mode = 0x%X", buffer_region.tile->comp); - - switch (buffer_region.tile->comp) - { - case CELL_GCM_COMPMODE_C32_2X2: - case CELL_GCM_COMPMODE_C32_2X1: - buffer_height = display_buffers[buffer].height / 2; - break; - } } // Blit source image to the screen @@ -1196,7 +1162,7 @@ void GLGSRender::flip(int buffer) rsx::thread::flip(buffer); // Cleanup - m_gl_texture_cache.clear_temporary_surfaces(); + m_gl_texture_cache.on_frame_end(); for (auto &tex : m_rtts.invalidated_resources) tex->remove(); @@ -1229,9 +1195,31 @@ u64 GLGSRender::timestamp() const bool GLGSRender::on_access_violation(u32 address, bool is_writing) { if (is_writing) - return m_gl_texture_cache.mark_as_dirty(address); + return m_gl_texture_cache.invalidate_address(address); else - return m_gl_texture_cache.flush_section(address); + { + if (std::this_thread::get_id() != m_thread_id) + { + bool flushable; + gl::cached_texture_section* section_to_post; + + std::tie(flushable, section_to_post) = m_gl_texture_cache.address_is_flushable(address); + if (!flushable) return false; + + work_item &task = post_flush_request(address, section_to_post); + + vm::temporary_unlock(); + { + std::unique_lock lock(task.guard_mutex); + task.cv.wait(lock, [&task] { return task.processed; }); + } + + task.received = true; + return task.result; + } + + return m_gl_texture_cache.flush_address(address); + } } void GLGSRender::on_notify_memory_unmapped(u32 address_base, u32 size) @@ -1273,7 +1261,7 @@ void GLGSRender::do_local_task() } } -work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::cached_texture_section *section) +work_item& GLGSRender::post_flush_request(u32 address, gl::cached_texture_section *section) { std::lock_guard lock(queue_guard); @@ -1295,7 +1283,7 @@ void GLGSRender::synchronize_buffers() bool GLGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate) { - return m_gl_texture_cache.upload_scaled_image(src, dst, interpolate, m_rtts); + return m_gl_texture_cache.blit(src, dst, interpolate, m_rtts); } void GLGSRender::check_zcull_status(bool framebuffer_swap, bool force_read) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 259783bebe..47db38f700 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -29,21 +29,321 @@ struct work_item std::mutex guard_mutex; u32 address_to_flush = 0; - gl::texture_cache::cached_texture_section *section_to_flush = nullptr; + gl::cached_texture_section *section_to_flush = nullptr; volatile bool processed = false; volatile bool result = false; volatile bool received = false; }; +struct occlusion_query_info +{ + GLuint handle; + GLint result; + GLint num_draws; + bool pending; + bool active; +}; + +struct zcull_statistics +{ + u32 zpass_pixel_cnt; + u32 zcull_stats; + u32 zcull_stats1; + u32 zcull_stats2; + u32 zcull_stats3; + + void clear() + { + zpass_pixel_cnt = zcull_stats = zcull_stats1 = zcull_stats2 = zcull_stats3 = 0; + } +}; + +struct occlusion_task +{ + std::vector task_stack; + occlusion_query_info* active_query = nullptr; + u32 pending = 0; + + //Add one query to the task + void add(occlusion_query_info* query) + { + active_query = query; + + if (task_stack.size() > 0 && pending == 0) + task_stack.resize(0); + + const auto empty_slots = task_stack.size() - pending; + if (empty_slots >= 4) + { + for (auto &_query : task_stack) + { + if (_query == nullptr) + { + _query = query; + pending++; + return; + } + } + } + + task_stack.push_back(query); + pending++; + } +}; + +struct driver_state +{ + const u32 DEPTH_BOUNDS_MIN = 0xFFFF0001; + const u32 DEPTH_BOUNDS_MAX = 0xFFFF0002; + const u32 DEPTH_RANGE_MIN = 0xFFFF0003; + const u32 DEPTH_RANGE_MAX = 0xFFFF0004; + + std::unordered_map properties = {}; + std::unordered_map> indexed_properties = {}; + + bool enable(u32 test, GLenum cap) + { + auto found = properties.find(cap); + if (found != properties.end() && found->second == test) + return !!test; + + properties[cap] = test; + + if (test) + glEnable(cap); + else + glDisable(cap); + + return !!test; + } + + bool enablei(u32 test, GLenum cap, u32 index) + { + auto found = indexed_properties.find(cap); + const bool exists = found != indexed_properties.end(); + + if (!exists) + { + indexed_properties[cap] = {}; + indexed_properties[cap][index] = test; + } + else + { + if (found->second[index] == test) + return !!test; + + found->second[index] = test; + } + + if (test) + glEnablei(cap, index); + else + glDisablei(cap, index); + + return !!test; + } + + const bool test_property(GLenum property, u32 test) const + { + auto found = properties.find(property); + if (found == properties.end()) + return false; + + return (found->second == test); + } + + void depth_func(GLenum func) + { + if (!test_property(GL_DEPTH_FUNC, func)) + { + glDepthFunc(func); + properties[GL_DEPTH_FUNC] = func; + } + } + + void depth_mask(GLboolean mask) + { + if (!test_property(GL_DEPTH_WRITEMASK, mask)) + { + glDepthMask(mask); + properties[GL_DEPTH_WRITEMASK] = mask; + } + } + + void clear_depth(GLfloat depth) + { + u32 value = (u32&)depth; + if (!test_property(GL_DEPTH_CLEAR_VALUE, value)) + { + glClearDepth(depth); + properties[GL_DEPTH_CLEAR_VALUE] = value; + } + } + + void stencil_mask(GLuint mask) + { + if (!test_property(GL_STENCIL_WRITEMASK, mask)) + { + glStencilMask(mask); + properties[GL_STENCIL_WRITEMASK] = mask; + } + } + + void clear_stencil(GLint stencil) + { + u32 value = (u32&)stencil; + if (!test_property(GL_STENCIL_CLEAR_VALUE, value)) + { + glClearStencil(stencil); + properties[GL_STENCIL_CLEAR_VALUE] = value; + } + } + + void color_mask(u32 mask) + { + if (!test_property(GL_COLOR_WRITEMASK, mask)) + { + glColorMask(((mask & 0x20) ? 1 : 0), ((mask & 0x40) ? 1 : 0), ((mask & 0x80) ? 1 : 0), ((mask & 0x10) ? 1 : 0)); + properties[GL_COLOR_WRITEMASK] = mask; + } + } + + void color_mask(bool r, bool g, bool b, bool a) + { + u32 mask = 0; + if (r) mask |= 0x20; + if (g) mask |= 0x40; + if (b) mask |= 0x80; + if (a) mask |= 0x10; + + color_mask(mask); + } + + void clear_color(u8 r, u8 g, u8 b, u8 a) + { + u32 value = (u32)r | (u32)g << 8 | (u32)b << 16 | (u32)a << 24; + if (!test_property(GL_COLOR_CLEAR_VALUE, value)) + { + glClearColor(r / 255.f, g / 255.f, b / 255.f, a / 255.f); + properties[GL_COLOR_CLEAR_VALUE] = value; + } + } + + void depth_bounds(float min, float max) + { + u32 depth_min = (u32&)min; + u32 depth_max = (u32&)max; + + if (!test_property(DEPTH_BOUNDS_MIN, depth_min) || !test_property(DEPTH_BOUNDS_MAX, depth_max)) + { + glDepthBoundsEXT(min, max); + + properties[DEPTH_BOUNDS_MIN] = depth_min; + properties[DEPTH_BOUNDS_MAX] = depth_max; + } + } + + void depth_range(float min, float max) + { + u32 depth_min = (u32&)min; + u32 depth_max = (u32&)max; + + if (!test_property(DEPTH_RANGE_MIN, depth_min) || !test_property(DEPTH_RANGE_MAX, depth_max)) + { + glDepthRange(min, max); + + properties[DEPTH_RANGE_MIN] = depth_min; + properties[DEPTH_RANGE_MAX] = depth_max; + } + } + + void logic_op(GLenum op) + { + if (!test_property(GL_COLOR_LOGIC_OP, op)) + { + glLogicOp(op); + properties[GL_COLOR_LOGIC_OP] = op; + } + } + + void line_width(GLfloat width) + { + u32 value = (u32&)width; + + if (!test_property(GL_LINE_WIDTH, value)) + { + glLineWidth(width); + properties[GL_LINE_WIDTH] = value; + } + } + + void front_face(GLenum face) + { + if (!test_property(GL_FRONT_FACE, face)) + { + glFrontFace(face); + properties[GL_FRONT_FACE] = face; + } + } + + void cull_face(GLenum mode) + { + if (!test_property(GL_CULL_FACE_MODE, mode)) + { + glCullFace(mode); + properties[GL_CULL_FACE_MODE] = mode; + } + } + + void polygon_offset(float factor, float units) + { + u32 _units = (u32&)units; + u32 _factor = (u32&)factor; + + if (!test_property(GL_POLYGON_OFFSET_UNITS, _units) || !test_property(GL_POLYGON_OFFSET_FACTOR, _factor)) + { + glPolygonOffset(factor, units); + + properties[GL_POLYGON_OFFSET_UNITS] = _units; + properties[GL_POLYGON_OFFSET_FACTOR] = _factor; + } + } +}; + +struct sw_ring_buffer +{ + std::vector data; + u32 ring_pos = 0; + u32 ring_length = 0; + + sw_ring_buffer(u32 size) + { + data.resize(size); + ring_length = size; + } + + void* get(u32 dwords) + { + const u32 required = (dwords << 2); + if ((ring_pos + required) > ring_length) + { + ring_pos = 0; + return data.data(); + } + + void *result = data.data() + ring_pos; + ring_pos += required; + return result; + } +}; + class GLGSRender : public GSRender { private: GLFragmentProgram m_fragment_prog; GLVertexProgram m_vertex_prog; - rsx::gl::texture m_gl_textures[rsx::limits::fragment_textures_count]; - rsx::gl::texture m_gl_vertex_textures[rsx::limits::vertex_textures_count]; gl::sampler_state m_gl_sampler_states[rsx::limits::fragment_textures_count]; gl::glsl::program *m_program; @@ -86,14 +386,12 @@ private: rsx::gcm_framebuffer_info depth_surface_info; bool flush_draw_buffers = false; + std::thread::id m_thread_id; -public: - gl::fbo draw_fbo; - -private: GLProgramBuffer m_prog_buffer; //buffer + gl::fbo draw_fbo; gl::fbo m_flip_fbo; gl::texture m_flip_tex_color; @@ -102,64 +400,8 @@ private: //occlusion query bool zcull_surface_active = false; - - struct occlusion_query_info - { - GLuint handle; - GLint result; - GLint num_draws; - bool pending; - bool active; - }; - - struct - { - u32 zpass_pixel_cnt; - u32 zcull_stats; - u32 zcull_stats1; - u32 zcull_stats2; - u32 zcull_stats3; - - void clear() - { - zpass_pixel_cnt = zcull_stats = zcull_stats1 = zcull_stats2 = zcull_stats3 = 0; - } - } - current_zcull_stats; - - struct occlusion_task - { - std::vector task_stack; - occlusion_query_info* active_query = nullptr; - u32 pending = 0; - - //Add one query to the task - void add(occlusion_query_info* query) - { - active_query = query; - - if (task_stack.size() > 0 && pending == 0) - task_stack.resize(0); - - const auto empty_slots = task_stack.size() - pending; - if (empty_slots >= 4) - { - for (auto &_query : task_stack) - { - if (_query == nullptr) - { - _query = query; - pending++; - return; - } - } - } - - task_stack.push_back(query); - pending++; - } - } - zcull_task_queue = {}; + zcull_statistics current_zcull_stats; + occlusion_task zcull_task_queue = {}; const u32 occlusion_query_count = 128; std::array occlusion_query_data = {}; @@ -169,225 +411,7 @@ public: private: - struct - { - const u32 DEPTH_BOUNDS_MIN = 0xFFFF0001; - const u32 DEPTH_BOUNDS_MAX = 0xFFFF0002; - const u32 DEPTH_RANGE_MIN = 0xFFFF0003; - const u32 DEPTH_RANGE_MAX = 0xFFFF0004; - - std::unordered_map properties = {}; - std::unordered_map> indexed_properties = {}; - - bool enable(u32 test, GLenum cap) - { - auto found = properties.find(cap); - if (found != properties.end() && found->second == test) - return !!test; - - properties[cap] = test; - - if (test) - glEnable(cap); - else - glDisable(cap); - - return !!test; - } - - bool enablei(u32 test, GLenum cap, u32 index) - { - auto found = indexed_properties.find(cap); - const bool exists = found != indexed_properties.end(); - - if (!exists) - { - indexed_properties[cap] = {}; - indexed_properties[cap][index] = test; - } - else - { - if (found->second[index] == test) - return !!test; - - found->second[index] = test; - } - - if (test) - glEnablei(cap, index); - else - glDisablei(cap, index); - - return !!test; - } - - const bool test_property(GLenum property, u32 test) const - { - auto found = properties.find(property); - if (found == properties.end()) - return false; - - return (found->second == test); - } - - void depth_func(GLenum func) - { - if (!test_property(GL_DEPTH_FUNC, func)) - { - glDepthFunc(func); - properties[GL_DEPTH_FUNC] = func; - } - } - - void depth_mask(GLboolean mask) - { - if (!test_property(GL_DEPTH_WRITEMASK, mask)) - { - glDepthMask(mask); - properties[GL_DEPTH_WRITEMASK] = mask; - } - } - - void clear_depth(GLfloat depth) - { - u32 value = (u32&)depth; - if (!test_property(GL_DEPTH_CLEAR_VALUE, value)) - { - glClearDepth(depth); - properties[GL_DEPTH_CLEAR_VALUE] = value; - } - } - - void stencil_mask(GLuint mask) - { - if (!test_property(GL_STENCIL_WRITEMASK, mask)) - { - glStencilMask(mask); - properties[GL_STENCIL_WRITEMASK] = mask; - } - } - - void clear_stencil(GLint stencil) - { - u32 value = (u32&)stencil; - if (!test_property(GL_STENCIL_CLEAR_VALUE, value)) - { - glClearStencil(stencil); - properties[GL_STENCIL_CLEAR_VALUE] = value; - } - } - - void color_mask(u32 mask) - { - if (!test_property(GL_COLOR_WRITEMASK, mask)) - { - glColorMask(((mask & 0x20) ? 1 : 0), ((mask & 0x40) ? 1 : 0), ((mask & 0x80) ? 1 : 0), ((mask & 0x10) ? 1 : 0)); - properties[GL_COLOR_WRITEMASK] = mask; - } - } - - void color_mask(bool r, bool g, bool b, bool a) - { - u32 mask = 0; - if (r) mask |= 0x20; - if (g) mask |= 0x40; - if (b) mask |= 0x80; - if (a) mask |= 0x10; - - color_mask(mask); - } - - void clear_color(u8 r, u8 g, u8 b, u8 a) - { - u32 value = (u32)r | (u32)g << 8 | (u32)b << 16 | (u32)a << 24; - if (!test_property(GL_COLOR_CLEAR_VALUE, value)) - { - glClearColor(r / 255.f, g / 255.f, b / 255.f, a / 255.f); - properties[GL_COLOR_CLEAR_VALUE] = value; - } - } - - void depth_bounds(float min, float max) - { - u32 depth_min = (u32&)min; - u32 depth_max = (u32&)max; - - if (!test_property(DEPTH_BOUNDS_MIN, depth_min) || !test_property(DEPTH_BOUNDS_MAX, depth_max)) - { - glDepthBoundsEXT(min, max); - - properties[DEPTH_BOUNDS_MIN] = depth_min; - properties[DEPTH_BOUNDS_MAX] = depth_max; - } - } - - void depth_range(float min, float max) - { - u32 depth_min = (u32&)min; - u32 depth_max = (u32&)max; - - if (!test_property(DEPTH_RANGE_MIN, depth_min) || !test_property(DEPTH_RANGE_MAX, depth_max)) - { - glDepthRange(min, max); - - properties[DEPTH_RANGE_MIN] = depth_min; - properties[DEPTH_RANGE_MAX] = depth_max; - } - } - - void logic_op(GLenum op) - { - if (!test_property(GL_COLOR_LOGIC_OP, op)) - { - glLogicOp(op); - properties[GL_COLOR_LOGIC_OP] = op; - } - } - - void line_width(GLfloat width) - { - u32 value = (u32&)width; - - if (!test_property(GL_LINE_WIDTH, value)) - { - glLineWidth(width); - properties[GL_LINE_WIDTH] = value; - } - } - - void front_face(GLenum face) - { - if (!test_property(GL_FRONT_FACE, face)) - { - glFrontFace(face); - properties[GL_FRONT_FACE] = face; - } - } - - void cull_face(GLenum mode) - { - if (!test_property(GL_CULL_FACE_MODE, mode)) - { - glCullFace(mode); - properties[GL_CULL_FACE_MODE] = mode; - } - } - - void polygon_offset(float factor, float units) - { - u32 _units = (u32&)units; - u32 _factor = (u32&)factor; - - if (!test_property(GL_POLYGON_OFFSET_UNITS, _units) || !test_property(GL_POLYGON_OFFSET_FACTOR, _factor)) - { - glPolygonOffset(factor, units); - - properties[GL_POLYGON_OFFSET_UNITS] = _units; - properties[GL_POLYGON_OFFSET_FACTOR] = _factor; - } - } - } - gl_state; + driver_state gl_state; // Return element to draw and in case of indexed draw index type and offset in index buffer std::tuple > > set_vertex_buffer(); @@ -405,7 +429,7 @@ public: void set_viewport(); void synchronize_buffers(); - work_item& post_flush_request(u32 address, gl::texture_cache::cached_texture_section *section); + work_item& post_flush_request(u32 address, gl::cached_texture_section *section); bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override; diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index 65addd71cd..00914d89a4 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -1512,6 +1512,11 @@ namespace gl return (texture::format)result; } + virtual texture::internal_format get_compatible_internal_format() const + { + return (texture::internal_format)get_internal_format(); + } + texture::channel_type get_channel_type(texture::channel_name channel) const { save_binding_state save(*this); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 5caf11f4a0..dbad2b3b2e 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -291,8 +291,8 @@ void GLGSRender::init_buffers(bool skip_reading) if (!surface_info[i].address || !surface_info[i].pitch) continue; const u32 range = surface_info[i].pitch * surface_info[i].height; - m_gl_texture_cache.lock_rtt_region(surface_info[i].address, range, surface_info[i].width, surface_info[i].height, surface_info[i].pitch, - color_format.format, color_format.type, color_format.swap_bytes, *std::get<1>(m_rtts.m_bound_render_targets[i])); + m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), surface_info[i].address, range, surface_info[i].width, surface_info[i].height, surface_info[i].pitch, + color_format.format, color_format.type, color_format.swap_bytes); } } @@ -311,8 +311,8 @@ void GLGSRender::init_buffers(bool skip_reading) if (pitch != depth_surface_info.pitch) LOG_WARNING(RSX, "Depth surface pitch does not match computed pitch, %d vs %d", depth_surface_info.pitch, pitch); - m_gl_texture_cache.lock_rtt_region(depth_surface_info.address, range, depth_surface_info.width, depth_surface_info.height, pitch, - depth_format_gl.format, depth_format_gl.type, true, *std::get<1>(m_rtts.m_bound_depth_stencil)); + m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), depth_surface_info.address, range, depth_surface_info.width, depth_surface_info.height, pitch, + depth_format_gl.format, depth_format_gl.type, true); } } } @@ -344,8 +344,8 @@ void GLGSRender::read_buffers() auto read_color_buffers = [&](int index, int count) { - u32 width = rsx::method_registers.surface_clip_width(); - u32 height = rsx::method_registers.surface_clip_height(); + const u32 width = rsx::method_registers.surface_clip_width(); + const u32 height = rsx::method_registers.surface_clip_height(); const std::array offsets = get_offsets(); const std::array locations = get_locations(); @@ -353,17 +353,19 @@ void GLGSRender::read_buffers() for (int i = index; i < index + count; ++i) { - u32 offset = offsets[i]; - u32 location = locations[i]; - u32 pitch = pitchs[i]; + const u32 offset = offsets[i]; + const u32 location = locations[i]; + const u32 pitch = pitchs[i]; if (!surface_info[i].pitch) continue; + + const u32 range = pitch * height; rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf); u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0)); - bool success = m_gl_texture_cache.load_rtt((*std::get<1>(m_rtts.m_bound_render_targets[i])), texaddr, pitch); + bool success = m_gl_texture_cache.load_memory_from_cache(texaddr, pitch * height, std::get<1>(m_rtts.m_bound_render_targets[i])); //Fall back to slower methods if the image could not be fetched from cache. if (!success) @@ -374,7 +376,6 @@ void GLGSRender::read_buffers() } else { - u32 range = pitch * height; m_gl_texture_cache.invalidate_range(texaddr, range); std::unique_ptr buffer(new u8[pitch * height]); @@ -416,13 +417,15 @@ void GLGSRender::read_buffers() if (g_cfg.video.read_depth_buffer) { //TODO: use pitch - u32 pitch = depth_surface_info.pitch; + const u32 pitch = depth_surface_info.pitch; + const u32 width = rsx::method_registers.surface_clip_width(); + const u32 height = rsx::method_registers.surface_clip_height(); if (!pitch) return; u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma()); - bool in_cache = m_gl_texture_cache.load_rtt((*std::get<1>(m_rtts.m_bound_depth_stencil)), depth_address, pitch); + bool in_cache = m_gl_texture_cache.load_memory_from_cache(depth_address, pitch * height, std::get<1>(m_rtts.m_bound_depth_stencil)); if (in_cache) return; @@ -433,7 +436,7 @@ void GLGSRender::read_buffers() int pixel_size = rsx::internals::get_pixel_size(rsx::method_registers.surface_depth_fmt()); gl::buffer pbo_depth; - __glcheck pbo_depth.create(rsx::method_registers.surface_clip_width() * rsx::method_registers.surface_clip_height() * pixel_size); + __glcheck pbo_depth.create(width * height * pixel_size); __glcheck pbo_depth.map([&](GLubyte* pixels) { u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma()); @@ -482,7 +485,7 @@ void GLGSRender::write_buffers() */ const u32 range = surface_info[i].pitch * surface_info[i].height; - __glcheck m_gl_texture_cache.save_rtt(surface_info[i].address, range); + __glcheck m_gl_texture_cache.flush_memory_to_cache(surface_info[i].address, range, true); } }; @@ -497,6 +500,6 @@ void GLGSRender::write_buffers() u32 range = depth_surface_info.width * depth_surface_info.height * 2; if (depth_surface_info.depth_format != rsx::surface_depth_format::z16) range *= 2; - m_gl_texture_cache.save_rtt(depth_surface_info.address, range); + m_gl_texture_cache.flush_memory_to_cache(depth_surface_info.address, range, true); } -} \ No newline at end of file +} diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index 5e012fcf75..c69897173b 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -48,7 +48,7 @@ namespace rsx namespace gl { - class render_target : public texture + class render_target : public texture, public rsx::render_target_descriptor { bool is_cleared = false; @@ -82,7 +82,7 @@ namespace gl native_pitch = pitch; } - u16 get_native_pitch() const + u16 get_native_pitch() const override { return native_pitch; } @@ -93,17 +93,29 @@ namespace gl rsx_pitch = pitch; } - u16 get_rsx_pitch() const + u16 get_rsx_pitch() const override { return rsx_pitch; } - std::pair get_dimensions() + u16 get_surface_width() const override { - if (!surface_height) surface_height = height(); - if (!surface_width) surface_width = width(); + return surface_width; + } - return std::make_pair(surface_width, surface_height); + u16 get_surface_height() const override + { + return surface_height; + } + + u32 get_surface() const override + { + return id(); + } + + u32 get_view() const + { + return id(); } void set_compatible_format(texture::internal_format format) @@ -111,10 +123,16 @@ namespace gl compatible_internal_format = format; } - texture::internal_format get_compatible_internal_format() + texture::internal_format get_compatible_internal_format() const override { return compatible_internal_format; } + + void update_surface() + { + surface_width = width(); + surface_height = height(); + } }; } @@ -159,6 +177,7 @@ struct gl_render_target_traits result->old_contents = old_surface; result->set_cleared(); + result->update_surface(); return result; } @@ -198,18 +217,17 @@ struct gl_render_target_traits if (old_surface != nullptr && old_surface->get_compatible_internal_format() == format.internal_format) result->old_contents = old_surface; + result->update_surface(); return result; } static void get_surface_info(gl::render_target *surface, rsx::surface_format_info *info) { - const auto dims = surface->get_dimensions(); - info->rsx_pitch = surface->get_rsx_pitch(); info->native_pitch = surface->get_native_pitch(); - info->surface_width = std::get<0>(dims); - info->surface_height = std::get<1>(dims); + info->surface_width = surface->get_surface_width(); + info->surface_height = surface->get_surface_height(); info->bpp = static_cast(info->native_pitch / info->surface_width); } diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 7d3bc460b1..2b2cb2a53a 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -5,7 +5,6 @@ #include "../RSXThread.h" #include "../RSXTexture.h" #include "../rsx_utils.h" -#include "../Common/TextureUtils.h" namespace gl { @@ -162,20 +161,20 @@ namespace gl } glSamplerParameteri(samplerHandle, GL_TEXTURE_MIN_FILTER, min_filter); - glSamplerParameterf(samplerHandle, GL_TEXTURE_LOD_BIAS, 0.f); - glSamplerParameterf(samplerHandle, GL_TEXTURE_MIN_LOD, -1000.f); - glSamplerParameterf(samplerHandle, GL_TEXTURE_MAX_LOD, 1000.f); + glSamplerParameterf(samplerHandle, GL_TEXTURE_LOD_BIAS, 0.f); + glSamplerParameterf(samplerHandle, GL_TEXTURE_MIN_LOD, -1000.f); + glSamplerParameterf(samplerHandle, GL_TEXTURE_MAX_LOD, 1000.f); } else { - glSamplerParameteri(samplerHandle, GL_TEXTURE_MIN_FILTER, tex_min_filter(tex.min_filter())); - glSamplerParameterf(samplerHandle, GL_TEXTURE_LOD_BIAS, tex.bias()); - glSamplerParameteri(samplerHandle, GL_TEXTURE_MIN_LOD, (tex.min_lod() >> 8)); - glSamplerParameteri(samplerHandle, GL_TEXTURE_MAX_LOD, (tex.max_lod() >> 8)); + glSamplerParameteri(samplerHandle, GL_TEXTURE_MIN_FILTER, tex_min_filter(tex.min_filter())); + glSamplerParameterf(samplerHandle, GL_TEXTURE_LOD_BIAS, tex.bias()); + glSamplerParameteri(samplerHandle, GL_TEXTURE_MIN_LOD, (tex.min_lod() >> 8)); + glSamplerParameteri(samplerHandle, GL_TEXTURE_MAX_LOD, (tex.max_lod() >> 8)); } - glSamplerParameteri(samplerHandle, GL_TEXTURE_MAG_FILTER, tex_mag_filter(tex.mag_filter())); - glSamplerParameterf(samplerHandle, GL_TEXTURE_MAX_ANISOTROPY_EXT, ::gl::max_aniso(tex.max_aniso())); + glSamplerParameteri(samplerHandle, GL_TEXTURE_MAG_FILTER, tex_mag_filter(tex.mag_filter())); + glSamplerParameterf(samplerHandle, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_aniso(tex.max_aniso())); const u32 texture_format = tex.format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); if (texture_format == CELL_GCM_TEXTURE_DEPTH16 || texture_format == CELL_GCM_TEXTURE_DEPTH24_D8) @@ -197,10 +196,7 @@ namespace gl else glSamplerParameteri(samplerHandle, GL_TEXTURE_COMPARE_MODE, GL_NONE); } -} -namespace -{ bool is_compressed_format(u32 texture_format) { switch (texture_format) @@ -234,23 +230,6 @@ namespace fmt::throw_exception("Unknown format 0x%x" HERE, texture_format); } - bool requires_unpack_byte(u32 texture_format) - { - switch (texture_format) - { - case CELL_GCM_TEXTURE_R5G6B5: - case CELL_GCM_TEXTURE_X16: - case CELL_GCM_TEXTURE_R5G5B5A1: - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: - case CELL_GCM_TEXTURE_Y16_X16: - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - case CELL_GCM_TEXTURE_D1R5G5B5: - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - return true; - } - return false; - } - std::array get_swizzle_remap(u32 texture_format) { // NOTE: This must be in ARGB order in all forms below. @@ -270,307 +249,253 @@ namespace case CELL_GCM_TEXTURE_COMPRESSED_DXT45: case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - return { GL_ALPHA, GL_RED, GL_GREEN, GL_BLUE }; + return{ GL_ALPHA, GL_RED, GL_GREEN, GL_BLUE }; case CELL_GCM_TEXTURE_A4R4G4B4: - return { GL_BLUE, GL_GREEN, GL_RED, GL_ALPHA }; + return{ GL_BLUE, GL_GREEN, GL_RED, GL_ALPHA }; case CELL_GCM_TEXTURE_B8: case CELL_GCM_TEXTURE_X16: case CELL_GCM_TEXTURE_X32_FLOAT: - return { GL_RED, GL_RED, GL_RED, GL_RED }; + return{ GL_RED, GL_RED, GL_RED, GL_RED }; - case CELL_GCM_TEXTURE_G8B8: - return { GL_GREEN, GL_RED, GL_GREEN, GL_RED}; + case CELL_GCM_TEXTURE_G8B8: + return{ GL_GREEN, GL_RED, GL_GREEN, GL_RED }; case CELL_GCM_TEXTURE_Y16_X16: case CELL_GCM_TEXTURE_Y16_X16_FLOAT: - return { GL_RED, GL_GREEN, GL_RED, GL_GREEN}; + return{ GL_RED, GL_GREEN, GL_RED, GL_GREEN }; case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - return { GL_RED, GL_ALPHA, GL_BLUE, GL_GREEN }; + return{ GL_RED, GL_ALPHA, GL_BLUE, GL_GREEN }; case CELL_GCM_TEXTURE_D1R5G5B5: - case CELL_GCM_TEXTURE_D8R8G8B8: - return { GL_ONE, GL_RED, GL_GREEN, GL_BLUE }; + case CELL_GCM_TEXTURE_D8R8G8B8: + return{ GL_ONE, GL_RED, GL_GREEN, GL_BLUE }; case CELL_GCM_TEXTURE_COMPRESSED_HILO8: - case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: - return { GL_RED, GL_GREEN, GL_RED, GL_GREEN }; + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: + return{ GL_RED, GL_GREEN, GL_RED, GL_GREEN }; case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - return { GL_ZERO, GL_GREEN, GL_BLUE, GL_RED }; + return{ GL_ZERO, GL_GREEN, GL_BLUE, GL_RED }; } fmt::throw_exception("Unknown format 0x%x" HERE, texture_format); } -} -namespace rsx -{ - namespace gl + GLuint create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, rsx::texture_dimension_extended type) { - static const int gl_tex_zfunc[] = + if (is_compressed_format(gcm_format)) { - GL_NEVER, - GL_LESS, - GL_EQUAL, - GL_LEQUAL, - GL_GREATER, - GL_NOTEQUAL, - GL_GEQUAL, - GL_ALWAYS, - }; - - void texture::create() - { - if (m_id) - { - remove(); - } - - glGenTextures(1, &m_id); + //Compressed formats have a 4-byte alignment + //TODO: Verify that samplers are not affected by the padding + width = align(width, 4); + height = align(height, 4); } - u16 texture::get_pitch_modifier(u32 format) + GLuint id = 0; + GLenum target; + GLenum internal_format = get_sized_internal_format(gcm_format); + + glGenTextures(1, &id); + + switch (type) { - switch (format) - { - case CELL_GCM_TEXTURE_COMPRESSED_HILO8: - case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: - default: - LOG_ERROR(RSX, "Unimplemented pitch modifier for texture format: 0x%x", format); - return 0; - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - case ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN) & CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - return 4; - case CELL_GCM_TEXTURE_B8: - return 1; - case CELL_GCM_TEXTURE_COMPRESSED_DXT1: - case CELL_GCM_TEXTURE_COMPRESSED_DXT23: - case CELL_GCM_TEXTURE_COMPRESSED_DXT45: - return 0; - case CELL_GCM_TEXTURE_A1R5G5B5: - case CELL_GCM_TEXTURE_A4R4G4B4: - case CELL_GCM_TEXTURE_R5G6B5: - case CELL_GCM_TEXTURE_G8B8: - case CELL_GCM_TEXTURE_R6G5B5: - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - case CELL_GCM_TEXTURE_X16: - case CELL_GCM_TEXTURE_R5G5B5A1: - case CELL_GCM_TEXTURE_D1R5G5B5: - return 2; - case CELL_GCM_TEXTURE_A8R8G8B8: - case CELL_GCM_TEXTURE_X32_FLOAT: - case CELL_GCM_TEXTURE_Y16_X16_FLOAT: - case CELL_GCM_TEXTURE_D8R8G8B8: - case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: - case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - case CELL_GCM_TEXTURE_Y16_X16: - return 4; - case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: - return 8; - case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: - return 16; - } + case rsx::texture_dimension_extended::texture_dimension_1d: + target = GL_TEXTURE_1D; + glBindTexture(GL_TEXTURE_1D, id); + glTexStorage1D(GL_TEXTURE_1D, mipmaps, internal_format, width); + break; + case rsx::texture_dimension_extended::texture_dimension_2d: + target = GL_TEXTURE_2D; + glBindTexture(GL_TEXTURE_2D, id); + glTexStorage2D(GL_TEXTURE_2D, mipmaps, internal_format, width, height); + break; + case rsx::texture_dimension_extended::texture_dimension_3d: + target = GL_TEXTURE_3D; + glBindTexture(GL_TEXTURE_3D, id); + glTexStorage3D(GL_TEXTURE_3D, mipmaps, internal_format, width, height, depth); + break; + case rsx::texture_dimension_extended::texture_dimension_cubemap: + target = GL_TEXTURE_CUBE_MAP; + glBindTexture(GL_TEXTURE_CUBE_MAP, id); + glTexStorage2D(GL_TEXTURE_CUBE_MAP, mipmaps, internal_format, width, height); + break; } - namespace + glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + + return id; + } + + void fill_texture(rsx::texture_dimension_extended dim, u16 mipmap_count, int format, u16 width, u16 height, u16 depth, + const std::vector &input_layouts, bool is_swizzled, std::vector staging_buffer) + { + int mip_level = 0; + if (is_compressed_format(format)) { - void create_and_fill_texture(rsx::texture_dimension_extended dim, - u16 mipmap_count, int format, u16 width, u16 height, u16 depth, const std::vector &input_layouts, bool is_swizzled, - std::vector staging_buffer) - { - int mip_level = 0; - if (is_compressed_format(format)) - { - //Compressed formats have a 4-byte alignment - //TODO: Verify that samplers are not affected by the padding - width = align(width, 4); - height = align(height, 4); - } - - if (dim == rsx::texture_dimension_extended::texture_dimension_1d) - { - __glcheck glTexStorage1D(GL_TEXTURE_1D, mipmap_count, ::gl::get_sized_internal_format(format), width); - if (!is_compressed_format(format)) - { - const auto &format_type = ::gl::get_format_type(format); - for (const rsx_subresource_layout &layout : input_layouts) - { - __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); - __glcheck glTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data()); - } - } - else - { - for (const rsx_subresource_layout &layout : input_layouts) - { - u32 size = layout.width_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); - __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); - __glcheck glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, ::gl::get_sized_internal_format(format), size, staging_buffer.data()); - } - } - return; - } - - if (dim == rsx::texture_dimension_extended::texture_dimension_2d) - { - __glcheck glTexStorage2D(GL_TEXTURE_2D, mipmap_count, ::gl::get_sized_internal_format(format), width, height); - if (!is_compressed_format(format)) - { - const auto &format_type = ::gl::get_format_type(format); - for (const rsx_subresource_layout &layout : input_layouts) - { - __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); - __glcheck glTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block, layout.height_in_block, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data()); - } - } - else - { - for (const rsx_subresource_layout &layout : input_layouts) - { - u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); - __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); - __glcheck glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, ::gl::get_sized_internal_format(format), size, staging_buffer.data()); - } - } - return; - } - - if (dim == rsx::texture_dimension_extended::texture_dimension_cubemap) - { - __glcheck glTexStorage2D(GL_TEXTURE_CUBE_MAP, mipmap_count, ::gl::get_sized_internal_format(format), width, height); - // Note : input_layouts size is get_exact_mipmap_count() for non cubemap texture, and 6 * get_exact_mipmap_count() for cubemap - // Thus for non cubemap texture, mip_level / mipmap_per_layer will always be rounded to 0. - // mip_level % mipmap_per_layer will always be equal to mip_level - if (!is_compressed_format(format)) - { - const auto &format_type = ::gl::get_format_type(format); - for (const rsx_subresource_layout &layout : input_layouts) - { - upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); - __glcheck glTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block, layout.height_in_block, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data()); - mip_level++; - } - } - else - { - for (const rsx_subresource_layout &layout : input_layouts) - { - u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); - __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); - __glcheck glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, ::gl::get_sized_internal_format(format), size, staging_buffer.data()); - mip_level++; - } - } - return; - } - - if (dim == rsx::texture_dimension_extended::texture_dimension_3d) - { - __glcheck glTexStorage3D(GL_TEXTURE_3D, mipmap_count, ::gl::get_sized_internal_format(format), width, height, depth); - if (!is_compressed_format(format)) - { - const auto &format_type = ::gl::get_format_type(format); - for (const rsx_subresource_layout &layout : input_layouts) - { - __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); - __glcheck glTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block, layout.height_in_block, depth, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data()); - } - } - else - { - for (const rsx_subresource_layout &layout : input_layouts) - { - u32 size = layout.width_in_block * layout.height_in_block * layout.depth * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); - __glcheck upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); - __glcheck glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, ::gl::get_sized_internal_format(format), size, staging_buffer.data()); - } - } - return; - } - } + //Compressed formats have a 4-byte alignment + //TODO: Verify that samplers are not affected by the padding + width = align(width, 4); + height = align(height, 4); } - bool texture::mandates_expansion(u32 format) + if (dim == rsx::texture_dimension_extended::texture_dimension_1d) { - /** - * If a texture behaves differently when uploaded directly vs when uploaded via texutils methods, it should be added here. - */ - if (format == CELL_GCM_TEXTURE_A1R5G5B5) - return true; - - return false; + glTexStorage1D(GL_TEXTURE_1D, mipmap_count, get_sized_internal_format(format), width); + if (!is_compressed_format(format)) + { + const auto &format_type = get_format_type(format); + for (const rsx_subresource_layout &layout : input_layouts) + { + upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); + glTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data()); + } + } + else + { + for (const rsx_subresource_layout &layout : input_layouts) + { + u32 size = layout.width_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); + upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); + glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data()); + } + } + return; } - void texture::init(int index, rsx::fragment_texture& tex) + if (dim == rsx::texture_dimension_extended::texture_dimension_2d) { - switch (tex.dimension()) + if (!is_compressed_format(format)) { - case rsx::texture_dimension::dimension3d: - if (!tex.depth()) + const auto &format_type = get_format_type(format); + for (const rsx_subresource_layout &layout : input_layouts) { - return; + upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); + glTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block, layout.height_in_block, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data()); } - - case rsx::texture_dimension::dimension2d: - if (!tex.height()) - { - return; - } - - case rsx::texture_dimension::dimension1d: - if (!tex.width()) - { - return; - } - - break; } - - const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); - - //We can't re-use texture handles if using immutable storage - if (m_id) + else { - __glcheck remove(); + for (const rsx_subresource_layout &layout : input_layouts) + { + u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); + upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); + glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data()); + } } - __glcheck create(); + return; + } - __glcheck glActiveTexture(GL_TEXTURE0 + index); - bind(); + if (dim == rsx::texture_dimension_extended::texture_dimension_cubemap) + { + // Note : input_layouts size is get_exact_mipmap_count() for non cubemap texture, and 6 * get_exact_mipmap_count() for cubemap + // Thus for non cubemap texture, mip_level / mipmap_per_layer will always be rounded to 0. + // mip_level % mipmap_per_layer will always be equal to mip_level + if (!is_compressed_format(format)) + { + const auto &format_type = get_format_type(format); + for (const rsx_subresource_layout &layout : input_layouts) + { + upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); + glTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block, layout.height_in_block, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data()); + mip_level++; + } + } + else + { + for (const rsx_subresource_layout &layout : input_layouts) + { + u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); + upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); + glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, get_sized_internal_format(format), size, staging_buffer.data()); + mip_level++; + } + } + return; + } - u32 full_format = tex.format(); + if (dim == rsx::texture_dimension_extended::texture_dimension_3d) + { + if (!is_compressed_format(format)) + { + const auto &format_type = get_format_type(format); + for (const rsx_subresource_layout &layout : input_layouts) + { + upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); + glTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block, layout.height_in_block, depth, std::get<0>(format_type), std::get<1>(format_type), staging_buffer.data()); + } + } + else + { + for (const rsx_subresource_layout &layout : input_layouts) + { + u32 size = layout.width_in_block * layout.height_in_block * layout.depth * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); + upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); + glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, get_sized_internal_format(format), size, staging_buffer.data()); + } + } + return; + } + } - u32 format = full_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - bool is_swizzled = !!(~full_format & CELL_GCM_TEXTURE_LN); + void upload_texture(const GLuint id, const u32 texaddr, const u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, bool is_swizzled, rsx::texture_dimension_extended type, + std::vector& subresources_layout, std::pair, std::array>& decoded_remap, bool static_state) + { + const bool is_cubemap = type == rsx::texture_dimension_extended::texture_dimension_cubemap; + + size_t texture_data_sz = get_placed_texture_storage_size(width, height, depth, gcm_format, mipmaps, is_cubemap, 256, 512); + std::vector data_upload_buf(texture_data_sz); - __glcheck ::gl::pixel_pack_settings().apply(); - __glcheck ::gl::pixel_unpack_settings().apply(); + const std::array& glRemap = get_swizzle_remap(gcm_format); - u32 aligned_pitch = tex.pitch(); + GLenum target; + GLenum remap_values[4]; - size_t texture_data_sz = get_placed_texture_storage_size(tex, 256); - std::vector data_upload_buf(texture_data_sz); - u32 block_sz = get_pitch_modifier(format); + switch (type) + { + case rsx::texture_dimension_extended::texture_dimension_1d: + target = GL_TEXTURE_1D; + break; + case rsx::texture_dimension_extended::texture_dimension_2d: + target = GL_TEXTURE_2D; + break; + case rsx::texture_dimension_extended::texture_dimension_3d: + target = GL_TEXTURE_3D; + break; + case rsx::texture_dimension_extended::texture_dimension_cubemap: + target = GL_TEXTURE_CUBE_MAP; + break; + } - __glcheck glPixelStorei(GL_UNPACK_ALIGNMENT, 4); + glBindTexture(target, id); + glPixelStorei(GL_UNPACK_ALIGNMENT, 4); + glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, mipmaps - 1); - __glcheck create_and_fill_texture(tex.get_extended_texture_dimension(), tex.get_exact_mipmap_count(), format, tex.width(), tex.height(), tex.depth(), get_subresources_layout(tex), is_swizzled, data_upload_buf); + if (static_state) + { + //Usually for vertex textures - const std::array& glRemap = get_swizzle_remap(format); + glTexParameteri(target, GL_TEXTURE_SWIZZLE_A, glRemap[0]); + glTexParameteri(target, GL_TEXTURE_SWIZZLE_R, glRemap[1]); + glTexParameteri(target, GL_TEXTURE_SWIZZLE_G, glRemap[2]); + glTexParameteri(target, GL_TEXTURE_SWIZZLE_B, glRemap[3]); - glTexParameteri(m_target, GL_TEXTURE_BASE_LEVEL, 0); - glTexParameteri(m_target, GL_TEXTURE_MAX_LEVEL, tex.get_exact_mipmap_count() - 1); - - auto decoded_remap = tex.decoded_remap(); + glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameteri(target, GL_TEXTURE_WRAP_R, GL_REPEAT); + glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameterf(target, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.f); + } + else + { //Remapping tables; format is A-R-G-B //Remap input table. Contains channel index to read color from const auto remap_inputs = decoded_remap.first; @@ -578,8 +503,6 @@ namespace rsx //Remap control table. Controls whether the remap value is used, or force either 0 or 1 const auto remap_lookup = decoded_remap.second; - GLenum remap_values[4]; - for (u8 channel = 0; channel < 4; ++channel) { switch (remap_lookup[channel]) @@ -598,109 +521,14 @@ namespace rsx } } - __glcheck glTexParameteri(m_target, GL_TEXTURE_SWIZZLE_A, remap_values[0]); - __glcheck glTexParameteri(m_target, GL_TEXTURE_SWIZZLE_R, remap_values[1]); - __glcheck glTexParameteri(m_target, GL_TEXTURE_SWIZZLE_G, remap_values[2]); - __glcheck glTexParameteri(m_target, GL_TEXTURE_SWIZZLE_B, remap_values[3]); - - //The rest of sampler state is now handled by sampler state objects + glTexParameteri(target, GL_TEXTURE_SWIZZLE_A, remap_values[0]); + glTexParameteri(target, GL_TEXTURE_SWIZZLE_R, remap_values[1]); + glTexParameteri(target, GL_TEXTURE_SWIZZLE_G, remap_values[2]); + glTexParameteri(target, GL_TEXTURE_SWIZZLE_B, remap_values[3]); } - void texture::init(int index, rsx::vertex_texture& tex) - { - switch (tex.dimension()) - { - case rsx::texture_dimension::dimension3d: - if (!tex.depth()) - { - return; - } + //The rest of sampler state is now handled by sampler state objects - case rsx::texture_dimension::dimension2d: - if (!tex.height()) - { - return; - } - - case rsx::texture_dimension::dimension1d: - if (!tex.width()) - { - return; - } - - break; - } - - const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); - - //We can't re-use texture handles if using immutable storage - if (m_id) - { - __glcheck remove(); - } - __glcheck create(); - - __glcheck glActiveTexture(GL_TEXTURE0 + index); - bind(); - - u32 full_format = tex.format(); - - u32 format = full_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - bool is_swizzled = !!(~full_format & CELL_GCM_TEXTURE_LN); - - __glcheck::gl::pixel_pack_settings().apply(); - __glcheck::gl::pixel_unpack_settings().apply(); - - u32 aligned_pitch = tex.pitch(); - - size_t texture_data_sz = get_placed_texture_storage_size(tex, 256); - std::vector data_upload_buf(texture_data_sz); - u32 block_sz = get_pitch_modifier(format); - - __glcheck glPixelStorei(GL_UNPACK_ALIGNMENT, 4); - - __glcheck create_and_fill_texture(tex.get_extended_texture_dimension(), tex.get_exact_mipmap_count(), format, tex.width(), tex.height(), tex.depth(), get_subresources_layout(tex), is_swizzled, data_upload_buf); - - const std::array& glRemap = get_swizzle_remap(format); - - glTexParameteri(m_target, GL_TEXTURE_MAX_LEVEL, tex.get_exact_mipmap_count() - 1); - - __glcheck glTexParameteri(m_target, GL_TEXTURE_SWIZZLE_A, glRemap[0]); - __glcheck glTexParameteri(m_target, GL_TEXTURE_SWIZZLE_R, glRemap[1]); - __glcheck glTexParameteri(m_target, GL_TEXTURE_SWIZZLE_G, glRemap[2]); - __glcheck glTexParameteri(m_target, GL_TEXTURE_SWIZZLE_B, glRemap[3]); - - __glcheck glTexParameteri(m_target, GL_TEXTURE_WRAP_S, GL_REPEAT); - __glcheck glTexParameteri(m_target, GL_TEXTURE_WRAP_T, GL_REPEAT); - __glcheck glTexParameteri(m_target, GL_TEXTURE_WRAP_R, GL_REPEAT); - - __glcheck glTexParameteri(m_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - __glcheck glTexParameteri(m_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - __glcheck glTexParameterf(m_target, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1.f); - } - - void texture::bind() - { - glBindTexture(m_target, m_id); - } - - void texture::unbind() - { - glBindTexture(m_target, 0); - } - - void texture::remove() - { - if (m_id) - { - glDeleteTextures(1, &m_id); - m_id = 0; - } - } - - u32 texture::id() const - { - return m_id; - } + fill_texture(type, mipmaps, gcm_format, width, height, depth, subresources_layout, is_swizzled, data_upload_buf); } } diff --git a/rpcs3/Emu/RSX/GL/GLTexture.h b/rpcs3/Emu/RSX/GL/GLTexture.h index 464a9c05a6..1f7ba0f075 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.h +++ b/rpcs3/Emu/RSX/GL/GLTexture.h @@ -1,5 +1,6 @@ #include "OpenGL.h" #include "../GCM.h" +#include "../Common/TextureUtils.h" namespace rsx { @@ -14,6 +15,11 @@ namespace gl GLenum wrap_mode(rsx::texture_wrap_mode wrap); float max_aniso(rsx::texture_max_anisotropy aniso); + GLuint create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, rsx::texture_dimension_extended type); + + void upload_texture(const GLuint id, const u32 texaddr, const u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, bool is_swizzled, rsx::texture_dimension_extended type, + std::vector& subresources_layout, std::pair, std::array>& decoded_remap, bool static_state); + class sampler_state { GLuint samplerHandle = 0; @@ -38,42 +44,3 @@ namespace gl void apply(rsx::fragment_texture& tex); }; } - -namespace rsx -{ - namespace gl - { - class texture - { - u32 m_id = 0; - u32 m_target = GL_TEXTURE_2D; - - public: - void create(); - - void init(int index, rsx::fragment_texture& tex); - void init(int index, rsx::vertex_texture& tex); - - /** - * If a format is marked as mandating expansion, any request to have the data uploaded to the GPU shall require that the pixel data - * be decoded/expanded fully, regardless of whether the input is swizzled. This is because some formats behave differently when swizzled pixel data - * is decoded and when data is fed directly, usually byte order is not the same. Forcing decoding/expanding fixes this but slows performance. - */ - static bool mandates_expansion(u32 format); - - /** - * The pitch modifier changes the pitch value supplied by the rsx::texture by supplying a suitable divisor or 0 if no change is needed. - * The modified value, if any, is then used to supply to GL the UNPACK_ROW_LENGTH for the texture data to be supplied. - */ - static u16 get_pitch_modifier(u32 format); - - void bind(); - void unbind(); - void remove(); - - void set_target(u32 target) { m_target = target; } - void set_id(u32 id) { m_id = id; } - u32 id() const; - }; - } -} diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.cpp b/rpcs3/Emu/RSX/GL/GLTextureCache.cpp deleted file mode 100644 index 2ba489f825..0000000000 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.cpp +++ /dev/null @@ -1,63 +0,0 @@ -#include "stdafx.h" -#include "GLGSRender.h" -#include "GLTextureCache.h" - -namespace gl -{ - bool texture_cache::flush_section(u32 address) - { - if (address < no_access_range.first || - address >= no_access_range.second) - return false; - - bool post_task = false; - cached_texture_section* section_to_post = nullptr; - - { - rsx::conditional_lock lock(in_access_violation_handler, m_section_mutex); - - for (cached_texture_section &tex : no_access_memory_sections) - { - if (tex.is_dirty()) continue; - - if (tex.is_locked() && tex.overlaps(address)) - { - if (tex.is_flushed()) - { - LOG_WARNING(RSX, "Section matches range, but marked as already flushed!, 0x%X+0x%X", tex.get_section_base(), tex.get_section_size()); - continue; - } - - //LOG_WARNING(RSX, "Cell needs GPU data synced here, address=0x%X", address); - - if (std::this_thread::get_id() != m_renderer_thread) - { - post_task = true; - section_to_post = &tex; - break; - } - - tex.flush(); - return true; - } - } - } - - if (post_task) - { - //LOG_WARNING(RSX, "Cache access not from worker thread! address = 0x%X", address); - work_item &task = m_renderer->post_flush_request(address, section_to_post); - - vm::temporary_unlock(); - { - std::unique_lock lock(task.guard_mutex); - task.cv.wait(lock, [&task] { return task.processed; }); - } - - task.received = true; - return task.result; - } - - return false; - } -} diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 490559b6a2..1bac9f2e27 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -15,6 +15,7 @@ #include "Emu/System.h" #include "GLRenderTargets.h" #include "../Common/TextureUtils.h" +#include "../Common/texture_cache.h" #include "../../Memory/vm.h" #include "../rsx_utils.h" @@ -22,336 +23,399 @@ class GLGSRender; namespace gl { - class texture_cache + extern GLenum get_sized_internal_format(u32); + + class cached_texture_section : public rsx::buffered_section { - public: - class cached_texture_section : public rsx::buffered_section + private: + fence m_fence; + u32 pbo_id = 0; + u32 pbo_size = 0; + + u32 vram_texture = 0; + + bool copied = false; + bool flushed = false; + bool is_depth = false; + + u32 current_width = 0; + u32 current_height = 0; + u32 current_pitch = 0; + u32 real_pitch = 0; + + texture::format format = texture::format::rgba; + texture::type type = texture::type::ubyte; + bool pack_unpack_swap_bytes = false; + + u8 get_pixel_size(texture::format fmt_, texture::type type_) { - private: - fence m_fence; - u32 pbo_id = 0; - u32 pbo_size = 0; - - u32 vram_texture = 0; - - bool copied = false; - bool flushed = false; - bool is_depth = false; - - u32 current_width = 0; - u32 current_height = 0; - u32 current_pitch = 0; - u32 real_pitch = 0; - - texture::format format = texture::format::rgba; - texture::type type = texture::type::ubyte; - bool pack_unpack_swap_bytes = false; - - u8 get_pixel_size(texture::format fmt_, texture::type type_) + u8 size = 1; + switch (type_) { - u8 size = 1; - switch (type_) - { - case texture::type::ubyte: - case texture::type::sbyte: - break; - case texture::type::ushort: - case texture::type::sshort: - case texture::type::f16: - size = 2; - break; - case texture::type::ushort_5_6_5: - case texture::type::ushort_5_6_5_rev: - case texture::type::ushort_4_4_4_4: - case texture::type::ushort_4_4_4_4_rev: - case texture::type::ushort_5_5_5_1: - case texture::type::ushort_1_5_5_5_rev: - return 2; - case texture::type::uint_8_8_8_8: - case texture::type::uint_8_8_8_8_rev: - case texture::type::uint_10_10_10_2: - case texture::type::uint_2_10_10_10_rev: - case texture::type::uint_24_8: - return 4; - case texture::type::f32: - case texture::type::sint: - case texture::type::uint: - size = 4; - break; - } - - switch (fmt_) - { - case texture::format::red: - case texture::format::r: - break; - case texture::format::rg: - size *= 2; - break; - case texture::format::rgb: - case texture::format::bgr: - size *= 3; - break; - case texture::format::rgba: - case texture::format::bgra: - size *= 4; - break; - - //Depth formats.. - case texture::format::depth: - size = 2; - break; - case texture::format::depth_stencil: - size = 4; - break; - default: - LOG_ERROR(RSX, "Unsupported rtt format %d", (GLenum)fmt_); - size = 4; - } - - return size; + case texture::type::ubyte: + case texture::type::sbyte: + break; + case texture::type::ushort: + case texture::type::sshort: + case texture::type::f16: + size = 2; + break; + case texture::type::ushort_5_6_5: + case texture::type::ushort_5_6_5_rev: + case texture::type::ushort_4_4_4_4: + case texture::type::ushort_4_4_4_4_rev: + case texture::type::ushort_5_5_5_1: + case texture::type::ushort_1_5_5_5_rev: + return 2; + case texture::type::uint_8_8_8_8: + case texture::type::uint_8_8_8_8_rev: + case texture::type::uint_10_10_10_2: + case texture::type::uint_2_10_10_10_rev: + case texture::type::uint_24_8: + return 4; + case texture::type::f32: + case texture::type::sint: + case texture::type::uint: + size = 4; + break; } - void init_buffer() + switch (fmt_) { - if (pbo_id) - { - glDeleteBuffers(1, &pbo_id); - pbo_id = 0; - pbo_size = 0; - } + case texture::format::red: + case texture::format::r: + break; + case texture::format::rg: + size *= 2; + break; + case texture::format::rgb: + case texture::format::bgr: + size *= 3; + break; + case texture::format::rgba: + case texture::format::bgra: + size *= 4; + break; - glGenBuffers(1, &pbo_id); - - const u32 buffer_size = align(cpu_address_range, 4096); - glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); - glBufferStorage(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_MAP_READ_BIT); - - pbo_size = buffer_size; + //Depth formats.. + case texture::format::depth: + size = 2; + break; + case texture::format::depth_stencil: + size = 4; + break; + default: + LOG_ERROR(RSX, "Unsupported rtt format %d", (GLenum)fmt_); + size = 4; } - public: + return size; + } - void reset(const u32 base, const u32 size, const bool flushable) + void init_buffer() + { + if (pbo_id) { - rsx::protection_policy policy = g_cfg.video.strict_rendering_mode ? rsx::protection_policy::protect_policy_full_range : rsx::protection_policy::protect_policy_one_page; - rsx::buffered_section::reset(base, size, policy); - - if (flushable) - init_buffer(); - - flushed = false; - copied = false; - is_depth = false; + glDeleteBuffers(1, &pbo_id); + pbo_id = 0; + pbo_size = 0; + } + glGenBuffers(1, &pbo_id); + + const u32 buffer_size = align(cpu_address_range, 4096); + glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); + glBufferStorage(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_MAP_READ_BIT); + + pbo_size = buffer_size; + } + + public: + + void reset(const u32 base, const u32 size, const bool flushable=false) + { + rsx::protection_policy policy = g_cfg.video.strict_rendering_mode ? rsx::protection_policy::protect_policy_full_range : rsx::protection_policy::protect_policy_one_page; + rsx::buffered_section::reset(base, size, policy); + + if (flushable) + init_buffer(); + + flushed = false; + copied = false; + is_depth = false; + + vram_texture = 0; + } + + void create(const u16 w, const u16 h, const u16 /*depth*/, const u16 /*mipmaps*/, void*, + gl::texture* image, const u32 native_pitch, bool read_only, + gl::texture::format gl_format, gl::texture::type gl_type, bool swap_bytes) + { + if (!read_only && pbo_id == 0) + init_buffer(); + + flushed = false; + copied = false; + is_depth = false; + + current_width = w; + current_height = h; + current_pitch = native_pitch; + + vram_texture = image->id(); + set_format(gl_format, gl_type, swap_bytes); + } + + void create_read_only(const u32 id, const u32 width, const u32 height) + { + //Only to be used for ro memory, we dont care about most members, just dimensions and the vram texture handle + current_width = width; + current_height = height; + vram_texture = id; + + current_pitch = 0; + real_pitch = 0; + } + + bool matches(const u32 rsx_address, const u32 rsx_size) + { + return rsx::buffered_section::matches(rsx_address, rsx_size); + } + + bool matches(const u32 rsx_address, const u32 width, const u32 height, const u32 mipmaps) + { + if (cpu_address_base == rsx_address && !dirty) + { + //Mostly only used to debug; matches textures without checking dimensions + if (width == 0 && height == 0) + return true; + + return (current_width == width && current_height == height); + } + + return false; + } + + void set_dimensions(u32 width, u32 height, u32 pitch) + { + current_width = width; + current_height = height; + current_pitch = pitch; + + real_pitch = width * get_pixel_size(format, type); + } + + void set_format(const texture::format gl_format, const texture::type gl_type, const bool swap_bytes) + { + format = gl_format; + type = gl_type; + pack_unpack_swap_bytes = swap_bytes; + + real_pitch = current_width * get_pixel_size(format, type); + } + + void set_depth_flag(bool is_depth_fmt) + { + is_depth = is_depth_fmt; + } + + void set_source(gl::texture &source) + { + vram_texture = source.id(); + } + + void copy_texture(bool=false) + { + if (!glIsTexture(vram_texture)) + { + LOG_ERROR(RSX, "Attempted to download rtt texture, but texture handle was invalid! (0x%X)", vram_texture); + return; + } + + glPixelStorei(GL_PACK_SWAP_BYTES, pack_unpack_swap_bytes); + glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); + + if (get_driver_caps().EXT_dsa_supported) + glGetTextureImageEXT(vram_texture, GL_TEXTURE_2D, 0, (GLenum)format, (GLenum)type, nullptr); + else + glGetTextureImage(vram_texture, 0, (GLenum)format, (GLenum)type, pbo_size, nullptr); + + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + + m_fence.reset(); + copied = true; + } + + void fill_texture(gl::texture* tex) + { + if (!copied) + { + //LOG_WARNING(RSX, "Request to fill texture rejected because contents were not read"); + return; + } + + u32 min_width = std::min((u32)tex->width(), current_width); + u32 min_height = std::min((u32)tex->height(), current_height); + + tex->bind(); + glPixelStorei(GL_UNPACK_SWAP_BYTES, pack_unpack_swap_bytes); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_id); + glTexSubImage2D((GLenum)tex->get_target(), 0, 0, 0, min_width, min_height, (GLenum)format, (GLenum)type, nullptr); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + } + + bool flush() + { + if (!copied) + { + LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base); + copy_texture(); + + if (!copied) + { + LOG_WARNING(RSX, "Nothing to copy; Setting section to readable and moving on..."); + protect(utils::protection::ro); + return false; + } + } + + protect(utils::protection::rw); + m_fence.wait_for_signal(); + flushed = true; + + glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); + void *data = glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, pbo_size, GL_MAP_READ_BIT); + u8 *dst = vm::ps3::_ptr(cpu_address_base); + + //throw if map failed since we'll segfault anyway + verify(HERE), data != nullptr; + + if (real_pitch >= current_pitch) + { + memcpy(dst, data, cpu_address_range); + } + else + { + //TODO: Use compression hint from the gcm tile information + //TODO: Fall back to bilinear filtering if samples > 2 + + const u8 pixel_size = get_pixel_size(format, type); + const u8 samples = current_pitch / real_pitch; + rsx::scale_image_nearest(dst, const_cast(data), current_width, current_height, current_pitch, real_pitch, pixel_size, samples); + } + + glUnmapBuffer(GL_PIXEL_PACK_BUFFER); + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + protect(utils::protection::ro); + + return true; + } + + void destroy() + { + if (!locked && pbo_id == 0 && vram_texture == 0 && m_fence.is_empty()) + //Already destroyed + return; + + if (locked) + unprotect(); + + if (pbo_id == 0) + { + //Read-only texture, destroy texture memory + glDeleteTextures(1, &vram_texture); vram_texture = 0; } - - void create_read_only(const u32 id, const u32 width, const u32 height) + else { - //Only to be used for ro memory, we dont care about most members, just dimensions and the vram texture handle - current_width = width; - current_height = height; - vram_texture = id; - - current_pitch = 0; - real_pitch = 0; + //Destroy pbo cache since vram texture is managed elsewhere + glDeleteBuffers(1, &pbo_id); + pbo_id = 0; + pbo_size = 0; } - bool matches(const u32 rsx_address, const u32 rsx_size) + m_fence.destroy(); + } + + texture::format get_format() const + { + return format; + } + + bool exists() const + { + return vram_texture != 0; + } + + bool is_flushable() const + { + return pbo_id != 0; + } + + bool is_flushed() const + { + return flushed; + } + + bool is_synchronized() const + { + return copied; + } + + void set_flushed(const bool state) + { + flushed = state; + } + + bool is_empty() const + { + return vram_texture == 0; + } + + u32 get_raw_view() const + { + return vram_texture; + } + + u32 get_raw_texture() const + { + return vram_texture; + } + + u32 get_width() const + { + return current_width; + } + + u32 get_height() const + { + return current_height; + } + + bool is_depth_texture() const + { + return is_depth; + } + + bool has_compatible_format(gl::texture* tex) const + { + GLenum fmt; + glBindTexture(GL_TEXTURE_2D, vram_texture); + glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_INTERNAL_FORMAT, (GLint*)&fmt); + + if (auto as_rtt = dynamic_cast(tex)) { - return rsx::buffered_section::matches(rsx_address, rsx_size); + return (GLenum)as_rtt->get_compatible_internal_format() == fmt; } - bool matches(const u32 rsx_address, const u32 width, const u32 height) - { - if (cpu_address_base == rsx_address && !dirty) - { - //Mostly only used to debug; matches textures without checking dimensions - if (width == 0 && height == 0) - return true; - - return (current_width == width && current_height == height); - } - - return false; - } - - void set_dimensions(u32 width, u32 height, u32 pitch) - { - current_width = width; - current_height = height; - current_pitch = pitch; - - real_pitch = width * get_pixel_size(format, type); - } - - void set_format(const texture::format gl_format, const texture::type gl_type, const bool swap_bytes) - { - format = gl_format; - type = gl_type; - pack_unpack_swap_bytes = swap_bytes; - - real_pitch = current_width * get_pixel_size(format, type); - } - - void set_depth_flag(bool is_depth_fmt) - { - is_depth = is_depth_fmt; - } - - void set_source(gl::texture &source) - { - vram_texture = source.id(); - } - - void copy_texture() - { - if (!glIsTexture(vram_texture)) - { - LOG_ERROR(RSX, "Attempted to download rtt texture, but texture handle was invalid! (0x%X)", vram_texture); - return; - } - - glPixelStorei(GL_PACK_SWAP_BYTES, pack_unpack_swap_bytes); - glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); - - if (get_driver_caps().EXT_dsa_supported) - glGetTextureImageEXT(vram_texture, GL_TEXTURE_2D, 0, (GLenum)format, (GLenum)type, nullptr); - else - glGetTextureImage(vram_texture, 0, (GLenum)format, (GLenum)type, pbo_size, nullptr); - - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - - m_fence.reset(); - copied = true; - } - - void fill_texture(gl::texture &tex) - { - if (!copied) - { - //LOG_WARNING(RSX, "Request to fill texture rejected because contents were not read"); - return; - } - - u32 min_width = std::min((u32)tex.width(), current_width); - u32 min_height = std::min((u32)tex.height(), current_height); - - tex.bind(); - glPixelStorei(GL_UNPACK_SWAP_BYTES, pack_unpack_swap_bytes); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_id); - glTexSubImage2D((GLenum)tex.get_target(), 0, 0, 0, min_width, min_height, (GLenum)format, (GLenum)type, nullptr); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - } - - void flush() - { - if (!copied) - { - LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base); - copy_texture(); - - if (!copied) - { - LOG_WARNING(RSX, "Nothing to copy; Setting section to readable and moving on..."); - protect(utils::protection::ro); - return; - } - } - - protect(utils::protection::rw); - m_fence.wait_for_signal(); - flushed = true; - - glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); - void *data = glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, pbo_size, GL_MAP_READ_BIT); - u8 *dst = vm::ps3::_ptr(cpu_address_base); - - //throw if map failed since we'll segfault anyway - verify(HERE), data != nullptr; - - if (real_pitch >= current_pitch) - { - memcpy(dst, data, cpu_address_range); - } - else - { - //TODO: Use compression hint from the gcm tile information - //TODO: Fall back to bilinear filtering if samples > 2 - - const u8 pixel_size = get_pixel_size(format, type); - const u8 samples = current_pitch / real_pitch; - rsx::scale_image_nearest(dst, const_cast(data), current_width, current_height, current_pitch, real_pitch, pixel_size, samples); - } - - glUnmapBuffer(GL_PIXEL_PACK_BUFFER); - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - protect(utils::protection::ro); - } - - void destroy() - { - if (!locked && pbo_id == 0 && vram_texture == 0 && m_fence.is_empty()) - //Already destroyed - return; - - if (locked) - unprotect(); - - if (pbo_id == 0) - { - //Read-only texture, destroy texture memory - glDeleteTextures(1, &vram_texture); - vram_texture = 0; - } - else - { - //Destroy pbo cache since vram texture is managed elsewhere - glDeleteBuffers(1, &pbo_id); - pbo_id = 0; - pbo_size = 0; - } - - m_fence.destroy(); - } - - bool is_flushed() const - { - return flushed; - } - - void set_flushed(const bool state) - { - flushed = state; - } - - void set_copied(const bool state) - { - copied = state; - } - - bool is_empty() const - { - return vram_texture == 0; - } - - const u32 id() - { - return vram_texture; - } - - std::tuple get_dimensions() - { - return std::make_tuple(current_width, current_height); - } - - bool is_depth_texture() const - { - return is_depth; - } - }; - + return (gl::texture::format)fmt == tex->get_internal_format(); + } + }; + + class texture_cache : public rsx::texture_cache + { + private: + class blitter { fbo blit_src; @@ -371,8 +435,7 @@ namespace gl blit_src.remove(); } - u32 scale_image(u32 src, u32 dst, const areai src_rect, const areai dst_rect, const GLenum dst_format, const position2i dst_offset, const position2i /*clip_offset*/, - const size2i dst_dims, const size2i clip_dims, bool /*is_argb8*/, bool is_depth_copy, bool linear_interpolation) + u32 scale_image(u32 src, u32 dst, const areai src_rect, const areai dst_rect, bool linear_interpolation, bool is_depth_copy) { s32 old_fbo = 0; glGetIntegerv(GL_FRAMEBUFFER_BINDING, &old_fbo); @@ -380,15 +443,6 @@ namespace gl u32 dst_tex = dst; filter interp = linear_interpolation ? filter::linear : filter::nearest; - if (!dst_tex) - { - glGenTextures(1, &dst_tex); - glBindTexture(GL_TEXTURE_2D, dst_tex); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexStorage2D(GL_TEXTURE_2D, 1, dst_format, dst_dims.width, dst_dims.height); - } - GLenum attachment = is_depth_copy ? GL_DEPTH_ATTACHMENT : GL_COLOR_ATTACHMENT0; blit_src.bind(); @@ -399,30 +453,11 @@ namespace gl glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, dst_tex, 0); blit_dst.check(); - u32 src_width = src_rect.x2 - src_rect.x1; - u32 src_height = src_rect.y2 - src_rect.y1; - u32 dst_width = dst_rect.x2 - dst_rect.x1; - u32 dst_height = dst_rect.y2 - dst_rect.y1; - - if (clip_dims.width != dst_width || - clip_dims.height != dst_height) - { - //clip reproject - src_width = (src_width * clip_dims.width) / dst_width; - src_height = (src_height * clip_dims.height) / dst_height; - } - GLboolean scissor_test_enabled = glIsEnabled(GL_SCISSOR_TEST); if (scissor_test_enabled) glDisable(GL_SCISSOR_TEST); - areai dst_area = dst_rect; - dst_area.x1 += dst_offset.x; - dst_area.x2 += dst_offset.x; - dst_area.y1 += dst_offset.y; - dst_area.y2 += dst_offset.y; - - blit_src.blit(blit_dst, src_rect, dst_area, is_depth_copy ? buffers::depth : buffers::color, interp); + blit_src.blit(blit_dst, src_rect, dst_rect, is_depth_copy ? buffers::depth : buffers::color, interp); if (scissor_test_enabled) glEnable(GL_SCISSOR_TEST); @@ -433,109 +468,44 @@ namespace gl }; private: - std::vector read_only_memory_sections; - std::vector no_access_memory_sections; - std::vector m_temporary_surfaces; - - std::pair read_only_range = std::make_pair(0xFFFFFFFF, 0); - std::pair no_access_range = std::make_pair(0xFFFFFFFF, 0); blitter m_hw_blitter; - - std::atomic_bool in_access_violation_handler = { false }; - shared_mutex m_section_mutex; - - GLGSRender *m_renderer; - std::thread::id m_renderer_thread; + std::vector m_temporary_surfaces; cached_texture_section& create_texture(u32 id, u32 texaddr, u32 texsize, u32 w, u32 h) { - for (cached_texture_section &tex : read_only_memory_sections) - { - if (tex.is_dirty()) - { - tex.destroy(); - tex.reset(texaddr, texsize, false); - tex.create_read_only(id, w, h); - - read_only_range = tex.get_min_max(read_only_range); - return tex; - } - } - - cached_texture_section tex; + cached_texture_section& tex = find_cached_texture(texaddr, texsize, true, w, h); tex.reset(texaddr, texsize, false); tex.create_read_only(id, w, h); read_only_range = tex.get_min_max(read_only_range); - - read_only_memory_sections.push_back(tex); - return read_only_memory_sections.back(); + return tex; } void clear() { - for (cached_texture_section &tex : read_only_memory_sections) + for (auto &address_range : m_cache) { - tex.destroy(); + auto &range_data = address_range.second; + for (auto &tex : range_data.data) + { + tex.destroy(); + } + + range_data.data.resize(0); } - for (cached_texture_section &tex : no_access_memory_sections) - { - tex.destroy(); - } - - read_only_memory_sections.resize(0); - no_access_memory_sections.resize(0); - - clear_temporary_surfaces(); + clear_temporary_subresources(); + m_unreleased_texture_objects = 0; } - - cached_texture_section *create_locked_view_of_section(u32 base, u32 size) + + void clear_temporary_subresources() { - cached_texture_section *region = find_cached_rtt_section(base, size); - - if (!region) + for (u32 &id : m_temporary_surfaces) { - for (cached_texture_section &rtt : no_access_memory_sections) - { - if (rtt.is_dirty()) - { - rtt.reset(base, size, true); - rtt.protect(utils::protection::no); - region = &rtt; - break; - } - } - - if (!region) - { - cached_texture_section section; - section.reset(base, size, true); - section.set_dirty(true); - section.protect(utils::protection::no); - - no_access_memory_sections.push_back(section); - region = &no_access_memory_sections.back(); - } - - no_access_range = region->get_min_max(no_access_range); - } - else - { - //This section view already exists - if (region->get_section_size() != size) - { - region->unprotect(); - region->reset(base, size, true); - } - - if (!region->is_locked() || region->is_flushed()) - { - region->protect(utils::protection::no); - } + glDeleteTextures(1, &id); } - return region; + m_temporary_surfaces.resize(0); } u32 create_temporary_subresource(u32 src_id, GLenum sized_internal_fmt, u16 x, u16 y, u16 width, u16 height) @@ -566,6 +536,80 @@ namespace gl return dst_id; } + + protected: + + void free_texture_section(cached_texture_section& tex) override + { + tex.destroy(); + } + + u32 create_temporary_subresource_view(void*&, u32* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h) override + { + const GLenum ifmt = gl::get_sized_internal_format(gcm_format); + return create_temporary_subresource(*src, ifmt, x, y, w, h); + } + + u32 create_temporary_subresource_view(void*&, gl::texture* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h) override + { + if (auto as_rtt = dynamic_cast(src)) + { + return create_temporary_subresource(src->id(), (GLenum)as_rtt->get_compatible_internal_format(), x, y, w, h); + } + else + { + const GLenum ifmt = gl::get_sized_internal_format(gcm_format); + return create_temporary_subresource(src->id(), ifmt, x, y, w, h); + } + } + + cached_texture_section* create_new_texture(void*&, u32 rsx_address, u32 rsx_size, u16 width, u16 height, u16 depth, u16 mipmaps, const u32 gcm_format, + const rsx::texture_dimension_extended type, const rsx::texture_create_flags flags, std::pair, std::array>& remap_vector) override + { + u32 vram_texture = gl::create_texture(gcm_format, width, height, depth, mipmaps, type); + bool depth_flag = false; + + switch (gcm_format) + { + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH16: + depth_flag = true; + break; + } + + auto& cached = create_texture(vram_texture, rsx_address, rsx_size, width, height); + cached.protect(utils::protection::ro); + cached.set_dirty(false); + cached.set_depth_flag(depth_flag); + + return &cached; + } + + cached_texture_section* upload_image_from_cpu(void*&, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, const u32 gcm_format, + std::vector& subresource_layout, const rsx::texture_dimension_extended type, const bool swizzled, + std::pair, std::array>& remap_vector) override + { + void* unused = nullptr; + auto section = create_new_texture(unused, rsx_address, pitch * height, width, height, depth, mipmaps, gcm_format, type, + rsx::texture_create_flags::default_component_order, remap_vector); + + gl::upload_texture(section->get_raw_texture(), rsx_address, gcm_format, width, height, depth, mipmaps, pitch, swizzled, type, subresource_layout, remap_vector, false); + return section; + } + + void enforce_surface_creation_type(cached_texture_section& section, const rsx::texture_create_flags flags) override + { + } + + void insert_texture_barrier() override + { + auto &caps = gl::get_driver_caps(); + + if (caps.ARB_texture_barrier_supported) + glTextureBarrier(); + else if (caps.NV_texture_barrier_supported) + glTextureBarrierNV(); + } public: @@ -573,479 +617,18 @@ namespace gl ~texture_cache() {} - void initialize(GLGSRender *renderer) + void initialize() { - m_renderer = renderer; - m_renderer_thread = std::this_thread::get_id(); - m_hw_blitter.init(); } - void close() + void destroy() override { clear(); - m_hw_blitter.destroy(); } - - cached_texture_section *find_texture_from_dimensions(u32 texaddr, u32 w, u32 h) - { - reader_lock lock(m_section_mutex); - - for (cached_texture_section &tex : read_only_memory_sections) - { - if (tex.matches(texaddr, w, h) && !tex.is_dirty()) - return &tex; - } - - return nullptr; - } - - /** - * Searches for a texture from read_only memory sections - * Texture origin + size must be a subsection of the existing texture - */ - cached_texture_section *find_texture_from_range(u32 texaddr, u32 range) - { - reader_lock lock(m_section_mutex); - - auto test = std::make_pair(texaddr, range); - for (cached_texture_section &tex : read_only_memory_sections) - { - if (tex.get_section_base() > texaddr) - continue; - - if (tex.overlaps(test, true) && !tex.is_dirty()) - return &tex; - } - - return nullptr; - } - - cached_texture_section* find_cached_rtt_section(u32 base, u32 size) - { - for (cached_texture_section &rtt : no_access_memory_sections) - { - if (rtt.matches(base, size)) - { - return &rtt; - } - } - - return nullptr; - } - - template - void upload_texture(int index, RsxTextureType &tex, rsx::gl::texture &gl_texture, gl_render_targets &m_rtts) - { - const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); - const u32 range = (u32)get_texture_size(tex); - - const u32 format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - const u32 tex_width = tex.width(); - const u32 tex_height = tex.height(); - const u32 native_pitch = (tex_width * get_format_block_size_in_bytes(format)); - const u32 tex_pitch = (tex.pitch() == 0)? native_pitch: tex.pitch(); - - if (!texaddr || !range) - { - LOG_ERROR(RSX, "Texture upload requested but texture not found, (address=0x%X, size=0x%X)", texaddr, range); - gl_texture.bind(); - return; - } - - glActiveTexture(GL_TEXTURE0 + index); - - /** - * Check for sampleable rtts from previous render passes - */ - gl::render_target *texptr = nullptr; - if ((texptr = m_rtts.get_texture_from_render_target_if_applicable(texaddr))) - { - for (const auto& tex : m_rtts.m_bound_render_targets) - { - if (std::get<0>(tex) == texaddr) - { - if (g_cfg.video.strict_rendering_mode) - { - LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr); - create_temporary_subresource(texptr->id(), (GLenum)texptr->get_compatible_internal_format(), 0, 0, texptr->width(), texptr->height()); - return; - } - else - { - //issue a texture barrier to ensure previous writes are visible - auto &caps = gl::get_driver_caps(); - - if (caps.ARB_texture_barrier_supported) - glTextureBarrier(); - else if (caps.NV_texture_barrier_supported) - glTextureBarrierNV(); - - break; - } - } - } - - texptr->bind(); - return; - } - - if ((texptr = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))) - { - if (texaddr == std::get<0>(m_rtts.m_bound_depth_stencil)) - { - if (g_cfg.video.strict_rendering_mode) - { - LOG_WARNING(RSX, "Attempting to sample a currently bound depth surface @ 0x%x", texaddr); - create_temporary_subresource(texptr->id(), (GLenum)texptr->get_compatible_internal_format(), 0, 0, texptr->width(), texptr->height()); - return; - } - else - { - //issue a texture barrier to ensure previous writes are visible - auto &caps = gl::get_driver_caps(); - - if (caps.ARB_texture_barrier_supported) - glTextureBarrier(); - else if (caps.NV_texture_barrier_supported) - glTextureBarrierNV(); - } - } - - texptr->bind(); - return; - } - - /** - * Check if we are re-sampling a subresource of an RTV/DSV texture, bound or otherwise - * (Turbo: Super Stunt Squad does this; bypassing the need for a sync object) - * The engine does not read back the texture resource through cell, but specifies a texture location that is - * a bound render target. We can bypass the expensive download in this case - */ - - const f32 internal_scale = (f32)tex_pitch / native_pitch; - const u32 internal_width = (const u32)(tex_width * internal_scale); - - const auto rsc = m_rtts.get_surface_subresource_if_applicable(texaddr, internal_width, tex_height, tex_pitch, true); - if (rsc.surface) - { - //Check that this region is not cpu-dirty before doing a copy - //This section is guaranteed to have a locking section *if* this bit has been bypassed before - - //Is this really necessary? - bool upload_from_cpu = false; - - for (cached_texture_section §ion : no_access_memory_sections) - { - if (section.overlaps(std::make_pair(texaddr, range)) && section.is_dirty()) - { - LOG_ERROR(RSX, "Cell wrote to render target section we are uploading from!"); - - upload_from_cpu = true; - break; - } - } - - if (!upload_from_cpu) - { - if (tex.get_extended_texture_dimension() != rsx::texture_dimension_extended::texture_dimension_2d) - { - LOG_ERROR(RSX, "Sampling of RTT region as non-2D texture! addr=0x%x, Type=%d, dims=%dx%d", - texaddr, (u8)tex.get_extended_texture_dimension(), tex.width(), tex.height()); - } - else - { - u32 bound_index = ~0U; - - bool dst_is_compressed = (format == CELL_GCM_TEXTURE_COMPRESSED_DXT1 || format == CELL_GCM_TEXTURE_COMPRESSED_DXT23 || format == CELL_GCM_TEXTURE_COMPRESSED_DXT45); - - if (!dst_is_compressed) - { - GLenum src_format = (GLenum)rsc.surface->get_internal_format(); - GLenum dst_format = std::get<0>(get_format_type(format)); - - if (src_format != dst_format) - { - LOG_WARNING(RSX, "Sampling from a section of a render target, but formats might be incompatible (0x%X vs 0x%X)", src_format, dst_format); - } - } - else - { - LOG_WARNING(RSX, "Surface blit from a compressed texture"); - } - - if (!rsc.is_bound || !g_cfg.video.strict_rendering_mode) - { - if (rsc.w == tex_width && rsc.h == tex_height) - { - if (rsc.is_bound) - { - LOG_WARNING(RSX, "Sampling from a currently bound render target @ 0x%x", texaddr); - - auto &caps = gl::get_driver_caps(); - if (caps.ARB_texture_barrier_supported) - glTextureBarrier(); - else if (caps.NV_texture_barrier_supported) - glTextureBarrierNV(); - } - - rsc.surface->bind(); - } - else - bound_index = create_temporary_subresource(rsc.surface->id(), (GLenum)rsc.surface->get_compatible_internal_format(), rsc.x, rsc.y, rsc.w, rsc.h); - } - else - { - LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr); - bound_index = create_temporary_subresource(rsc.surface->id(), (GLenum)rsc.surface->get_compatible_internal_format(), rsc.x, rsc.y, rsc.w, rsc.h); - } - - if (bound_index) - return; - } - } - } - - /** - * If all the above failed, then its probably a generic texture. - * Search in cache and upload/bind - */ - - cached_texture_section *cached_texture = find_texture_from_dimensions(texaddr, tex_width, tex_height); - if (cached_texture) - { - verify(HERE), cached_texture->is_empty() == false; - - gl_texture.set_id(cached_texture->id()); - gl_texture.bind(); - - //external gl::texture objects should always be undefined/uninitialized! - gl_texture.set_id(0); - return; - } - - /** - * Check for subslices from the cache in case we only have a subset a larger texture - */ - cached_texture = find_texture_from_range(texaddr, range); - if (cached_texture) - { - const u32 address_offset = texaddr - cached_texture->get_section_base(); - const u32 format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - const GLenum ifmt = gl::get_sized_internal_format(format); - - u16 offset_x = 0, offset_y = 0; - - if (address_offset) - { - const u32 bpp = get_format_block_size_in_bytes(format); - - offset_y = address_offset / tex_pitch; - offset_x = address_offset % tex_pitch; - - offset_x /= bpp; - offset_y /= bpp; - } - - u32 texture_id = create_temporary_subresource(cached_texture->id(), ifmt, offset_x, offset_y, tex_width, tex_height); - if (texture_id) return; - } - - gl_texture.init(index, tex); - - writer_lock lock(m_section_mutex); - - cached_texture_section &cached = create_texture(gl_texture.id(), texaddr, (const u32)get_texture_size(tex), tex_width, tex_height); - cached.protect(utils::protection::ro); - cached.set_dirty(false); - - //external gl::texture objects should always be undefined/uninitialized! - gl_texture.set_id(0); - } - - void save_rtt(u32 base, u32 size) - { - writer_lock lock(m_section_mutex); - - cached_texture_section *region = find_cached_rtt_section(base, size); - - if (!region) - { - LOG_ERROR(RSX, "Attempted to download render target that does not exist. Please report to developers"); - return; - } - - if (!region->is_locked()) - { - verify(HERE), region->is_dirty(); - LOG_WARNING(RSX, "Cell write to bound render target area"); - - region->protect(utils::protection::no); - region->set_dirty(false); - } - - region->copy_texture(); - } - - void lock_rtt_region(const u32 base, const u32 size, const u16 width, const u16 height, const u16 pitch, const texture::format format, const texture::type type, const bool swap_bytes, gl::texture &source) - { - writer_lock lock(m_section_mutex); - - cached_texture_section *region = create_locked_view_of_section(base, size); - - if (!region->matches(base, size)) - { - //This memory region overlaps our own region, but does not match it exactly - if (region->is_locked()) - region->unprotect(); - - region->reset(base, size, true); - region->protect(utils::protection::no); - } - - region->set_dimensions(width, height, pitch); - region->set_format(format, type, swap_bytes); - region->set_dirty(false); - region->set_flushed(false); - region->set_copied(false); - region->set_source(source); - - verify(HERE), region->is_locked() == true; - } - - bool load_rtt(gl::texture &tex, const u32 address, const u32 pitch) - { - const u32 range = tex.height() * pitch; - cached_texture_section *rtt = find_cached_rtt_section(address, range); - - if (rtt && !rtt->is_dirty()) - { - rtt->fill_texture(tex); - return true; - } - - //No valid object found in cache - return false; - } - - bool mark_as_dirty(u32 address) - { - return invalidate_range(address, 4096 - (address & 4095)); - } - - bool invalidate_range(u32 address, u32 size, bool unprotect=true) - { - bool response = false; - std::pair trampled_range = std::make_pair(address, address + size); - - //TODO: Optimize this function! - //Multi-pass checking is slow. Pre-calculate dependency tree at section creation - rsx::conditional_lock lock(in_access_violation_handler, m_section_mutex); - - if (trampled_range.second >= read_only_range.first && - trampled_range.first < read_only_range.second) - { - for (int i = 0; i < read_only_memory_sections.size(); ++i) - { - auto &tex = read_only_memory_sections[i]; - if (!tex.is_locked()) continue; - - auto overlapped = tex.overlaps_page(trampled_range, address); - if (std::get<0>(overlapped)) - { - auto &new_range = std::get<1>(overlapped); - - if (new_range.first != trampled_range.first || - new_range.second != trampled_range.second) - { - trampled_range = new_range; - i = 0; - } - - if (unprotect) - { - tex.unprotect(); - tex.set_dirty(true); - } - else - { - //abandon memory - tex.discard(); - } - - response = true; - } - } - } - - if (trampled_range.second >= no_access_range.first && - trampled_range.first < no_access_range.second) - { - rsx::conditional_lock lock(in_access_violation_handler, m_section_mutex); - - for (int i = 0; i < no_access_memory_sections.size(); ++i) - { - auto &tex = no_access_memory_sections[i]; - if (tex.is_dirty() || !tex.is_locked()) continue; - - auto overlapped = tex.overlaps_page(trampled_range, address); - if (std::get<0>(overlapped)) - { - auto &new_range = std::get<1>(overlapped); - - if (new_range.first != trampled_range.first || - new_range.second != trampled_range.second) - { - trampled_range = new_range; - i = 0; - } - - if (unprotect) - { - tex.unprotect(); - tex.set_dirty(true); - } - else - { - LOG_WARNING(RSX, "Framebuffer region 0x%X -> 0x%X is being discarded", tex.get_section_base(), tex.get_section_base() + tex.get_section_size()); - tex.discard(); - } - - response = true; - } - } - } - - return response; - } - - bool flush_section(u32 address); - - void clear_temporary_surfaces() - { - for (u32 &id : m_temporary_surfaces) - { - glDeleteTextures(1, &id); - } - - m_temporary_surfaces.clear(); - } - - void purge_dirty() - { - reader_lock lock(m_section_mutex); - - for (cached_texture_section &tex : read_only_memory_sections) - { - if (tex.is_dirty()) - { - tex.destroy(); - } - } - } - - bool is_depth_texture(const u32 rsx_address) + + bool is_depth_texture(const u32 rsx_address) override { auto section = find_texture_from_range(rsx_address, 64u); if (section != nullptr) return section->is_depth_texture(); @@ -1053,283 +636,30 @@ namespace gl return false; } - bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, gl_render_targets &m_rtts) + void on_frame_end() override { - //Since we will have dst in vram, we can 'safely' ignore the swizzle flag - //TODO: Verify correct behavior - bool is_depth_blit = false; - bool src_is_render_target = false; - bool dst_is_render_target = false; - bool dst_is_argb8 = (dst.format == rsx::blit_engine::transfer_destination_format::a8r8g8b8); - bool src_is_argb8 = (src.format == rsx::blit_engine::transfer_source_format::a8r8g8b8); - - GLenum src_gl_sized_format = src_is_argb8? GL_RGBA8: GL_RGB565; - GLenum src_gl_format = src_is_argb8 ? GL_BGRA : GL_RGB; - GLenum src_gl_type = src_is_argb8? GL_UNSIGNED_INT_8_8_8_8: GL_UNSIGNED_SHORT_5_6_5; - - u32 vram_texture = 0; - u32 dest_texture = 0; - - const u32 src_address = (u32)((u64)src.pixels - (u64)vm::base(0)); - const u32 dst_address = (u32)((u64)dst.pixels - (u64)vm::base(0)); - - //Check if src/dst are parts of render targets - auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, false); - dst_is_render_target = dst_subres.surface != nullptr; - - //TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate - auto src_subres = m_rtts.get_surface_subresource_if_applicable(src_address, src.width, src.height, src.pitch, true, true, false); - src_is_render_target = src_subres.surface != nullptr; - - //Always use GPU blit if src or dst is in the surface store - if (!g_cfg.video.use_gpu_texture_scaling && !(src_is_render_target || dst_is_render_target)) - return false; - - u16 max_dst_width = dst.width; - u16 max_dst_height = dst.height; - - //Prepare areas and offsets - //Copy from [src.offset_x, src.offset_y] a region of [clip.width, clip.height] - //Stretch onto [dst.offset_x, y] with clipping performed on the source region - //The implementation here adds the inverse scaled clip dimensions onto the source to completely bypass final clipping step - - float scale_x = (f32)dst.width / src.width; - float scale_y = (f32)dst.height / src.height; - - //Clip offset is unused if the clip offsets are reprojected onto the source - position2i clip_offset = { 0, 0 };//{ dst.clip_x, dst.clip_y }; - position2i dst_offset = { dst.offset_x, dst.offset_y }; - - size2i clip_dimensions = { dst.clip_width, dst.clip_height }; - //Dimensions passed are restricted to powers of 2; get real height from clip_height and width from pitch - const size2i dst_dimensions = { dst.pitch / (dst_is_argb8 ? 4 : 2), dst.clip_height }; - - //Offset in x and y for src is 0 (it is already accounted for when getting pixels_src) - //Reproject final clip onto source... - const u16 src_w = (const u16)((f32)clip_dimensions.width / scale_x); - const u16 src_h = (const u16)((f32)clip_dimensions.height / scale_y); - - areai src_area = { 0, 0, src_w, src_h }; - areai dst_area = { 0, 0, dst.clip_width, dst.clip_height }; - - //If destination is neither a render target nor an existing texture in VRAM - //its possible that this method is being used to perform a memcpy into RSX memory, so we check - //parameters. Whenever a simple memcpy can get the job done, use it instead. - //Dai-3-ji Super Robot Taisen for example uses this to copy program code to GPU RAM - - bool is_memcpy = false; - u32 memcpy_bytes_length = 0; - - if (dst_is_argb8 == src_is_argb8 && !dst.swizzled) + if (m_unreleased_texture_objects >= m_max_zombie_objects) { - if ((src.slice_h == 1 && dst.clip_height == 1) || - (dst.clip_width == src.width && dst.clip_height == src.slice_h && src.pitch == dst.pitch)) - { - const u8 bpp = dst_is_argb8 ? 4 : 2; - is_memcpy = true; - memcpy_bytes_length = dst.clip_width * bpp * dst.clip_height; - } - } - - cached_texture_section* cached_dest = nullptr; - if (!dst_is_render_target) - { - //First check if this surface exists in VRAM with exact dimensions - //Since scaled GPU resources are not invalidated by the CPU, we need to reuse older surfaces if possible - cached_dest = find_texture_from_dimensions(dst.rsx_address, dst_dimensions.width, dst_dimensions.height); - - //Check for any available region that will fit this one - if (!cached_dest) cached_dest = find_texture_from_range(dst.rsx_address, dst.pitch * dst.clip_height); - - if (cached_dest) - { - //TODO: Verify that the new surface will fit - dest_texture = cached_dest->id(); - - //TODO: Move this code into utils since it is used alot - const u32 address_offset = dst.rsx_address - cached_dest->get_section_base(); - - const u16 bpp = dst_is_argb8 ? 4 : 2; - const u16 offset_y = address_offset / dst.pitch; - const u16 offset_x = address_offset % dst.pitch; - - dst_offset.x += offset_x / bpp; - dst_offset.y += offset_y; - - std::tie(max_dst_width, max_dst_height) = cached_dest->get_dimensions(); - } - else if (is_memcpy) - { - memcpy(dst.pixels, src.pixels, memcpy_bytes_length); - return true; - } - } - else - { - dst_offset.x = dst_subres.x; - dst_offset.y = dst_subres.y; - - dest_texture = dst_subres.surface->id(); - - auto dims = dst_subres.surface->get_dimensions(); - max_dst_width = dims.first; - max_dst_height = dims.second; - - if (is_memcpy) - { - //Some render target descriptions are actually invalid - //Confirm this is a flushable RTT - const auto rsx_pitch = dst_subres.surface->get_rsx_pitch(); - const auto native_pitch = dst_subres.surface->get_native_pitch(); - - if (rsx_pitch <= 64 && native_pitch != rsx_pitch) - { - memcpy(dst.pixels, src.pixels, memcpy_bytes_length); - return true; - } - } - } - - //Create source texture if does not exist - if (!src_is_render_target) - { - auto preloaded_texture = find_texture_from_dimensions(src_address, src.width, src.slice_h); - - if (preloaded_texture != nullptr) - { - vram_texture = preloaded_texture->id(); - } - else - { - flush_section(src_address); - - GLboolean swap_bytes = !src_is_argb8; - if (dst.swizzled) - { - //TODO: Check results against 565 textures - if (src_is_argb8) - { - src_gl_format = GL_RGBA; - swap_bytes = true; - } - else - { - LOG_ERROR(RSX, "RGB565 swizzled texture upload found"); - } - } - - glGenTextures(1, &vram_texture); - glBindTexture(GL_TEXTURE_2D, vram_texture); - glTexStorage2D(GL_TEXTURE_2D, 1, src_gl_sized_format, src.width, src.slice_h); - glPixelStorei(GL_UNPACK_ROW_LENGTH, src.pitch / (src_is_argb8 ? 4 : 2)); - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - glPixelStorei(GL_UNPACK_SWAP_BYTES, swap_bytes); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, src.width, src.slice_h, src_gl_format, src_gl_type, src.pixels); - - writer_lock lock(m_section_mutex); - - auto §ion = create_texture(vram_texture, src_address, src.pitch * src.slice_h, src.width, src.slice_h); - section.protect(utils::protection::ro); - section.set_dirty(false); - } - } - else - { - if (src_subres.w != clip_dimensions.width || - src_subres.h != clip_dimensions.height) - { - f32 subres_scaling_x = (f32)src.pitch / src_subres.surface->get_native_pitch(); - - dst_area.x2 = (int)(src_subres.w * scale_x * subres_scaling_x); - dst_area.y2 = (int)(src_subres.h * scale_y); - } - - src_area.x2 = src_subres.w; - src_area.y2 = src_subres.h; - - src_area.x1 += src_subres.x; - src_area.x2 += src_subres.x; - src_area.y1 += src_subres.y; - src_area.y2 += src_subres.y; - - vram_texture = src_subres.surface->id(); - } - - bool format_mismatch = false; - - if (src_subres.is_depth_surface) - { - if (dest_texture) - { - if (dst_is_render_target && !dst_subres.is_depth_surface) - { - LOG_ERROR(RSX, "Depth->RGBA blit requested but not supported"); - return true; - } - - GLenum internal_fmt; - glBindTexture(GL_TEXTURE_2D, dest_texture); - glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_INTERNAL_FORMAT, (GLint*)&internal_fmt); - - if (internal_fmt != (GLenum)src_subres.surface->get_compatible_internal_format()) - format_mismatch = true; - } - - is_depth_blit = true; + purge_dirty(); } - //TODO: Check for other types of format mismatch - if (format_mismatch) - { - invalidate_range(cached_dest->get_section_base(), cached_dest->get_section_size()); - - dest_texture = 0; - cached_dest = nullptr; - } + clear_temporary_subresources(); + } - //Validate clip offsets (Persona 4 Arena at 720p) - //Check if can fit - //NOTE: It is possible that the check is simpler (if (clip_x >= clip_width)) - //Needs verification - if ((dst.offset_x + dst.clip_x + dst.clip_width) > max_dst_width) dst.clip_x = 0; - if ((dst.offset_y + dst.clip_y + dst.clip_height) > max_dst_height) dst.clip_y = 0; + bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool linear_interpolate, gl_render_targets& m_rtts) + { + void* unused = nullptr; + return upload_scaled_image(src, dst, linear_interpolate, unused, m_rtts, m_hw_blitter); + } - if (dst.clip_x || dst.clip_y) - { - //Reproject clip offsets onto source - const u16 scaled_clip_offset_x = (const u16)((f32)dst.clip_x / scale_x); - const u16 scaled_clip_offset_y = (const u16)((f32)dst.clip_y / scale_y); + template + void upload_and_bind_texture(int index, GLenum target, RsxTextureType &tex, gl_render_targets &m_rtts) + { + glActiveTexture(GL_TEXTURE0 + index); + void* unused = nullptr; - src_area.x1 += scaled_clip_offset_x; - src_area.x2 += scaled_clip_offset_x; - src_area.y1 += scaled_clip_offset_y; - src_area.y2 += scaled_clip_offset_y; - } - - GLenum dst_format = (is_depth_blit) ? (GLenum)src_subres.surface->get_compatible_internal_format() : (dst_is_argb8) ? GL_RGBA8 : GL_RGB565; - u32 texture_id = m_hw_blitter.scale_image(vram_texture, dest_texture, src_area, dst_area, dst_format, dst_offset, clip_offset, - dst_dimensions, clip_dimensions, dst_is_argb8, is_depth_blit, interpolate); - - if (dest_texture) - return true; - - //TODO: Verify if any titles ever scale into CPU memory. It defeats the purpose of uploading data to the GPU, but it could happen - //If so, add this texture to the no_access queue not the read_only queue - writer_lock lock(m_section_mutex); - - const u8 bpp = dst_is_argb8 ? 4 : 2; - const u32 real_width = dst.pitch / bpp; - cached_texture_section &cached = create_texture(texture_id, dst.rsx_address, dst.pitch * dst.clip_height, real_width, dst.clip_height); - //These textures are completely GPU resident so we dont watch for CPU access - //There's no data to be fetched from the CPU - //Its is possible for a title to attempt to read from the region, but the CPU path should be used in such cases - cached.protect(utils::protection::ro); - cached.set_dirty(false); - cached.set_depth_flag(is_depth_blit); - - return true; + auto id = upload_texture(unused, tex, m_rtts); + glBindTexture(target, id); } }; } diff --git a/rpcs3/Emu/RSX/RSXTexture.cpp b/rpcs3/Emu/RSX/RSXTexture.cpp index 4400fd7b71..d4f45de577 100644 --- a/rpcs3/Emu/RSX/RSXTexture.cpp +++ b/rpcs3/Emu/RSX/RSXTexture.cpp @@ -364,6 +364,15 @@ namespace rsx return ((registers[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 8)] >> 12) & 0xf); } + std::pair, std::array> vertex_texture::decoded_remap() const + { + return + { + { CELL_GCM_TEXTURE_REMAP_FROM_A, CELL_GCM_TEXTURE_REMAP_FROM_R, CELL_GCM_TEXTURE_REMAP_FROM_G, CELL_GCM_TEXTURE_REMAP_FROM_B }, + { CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP, CELL_GCM_TEXTURE_REMAP_REMAP } + }; + } + u8 vertex_texture::zfunc() const { return ((registers[NV4097_SET_VERTEX_TEXTURE_ADDRESS + (m_index * 8)] >> 28) & 0xf); diff --git a/rpcs3/Emu/RSX/RSXTexture.h b/rpcs3/Emu/RSX/RSXTexture.h index ac9f12b6ed..6501d8d16a 100644 --- a/rpcs3/Emu/RSX/RSXTexture.h +++ b/rpcs3/Emu/RSX/RSXTexture.h @@ -129,6 +129,8 @@ namespace rsx u8 aniso_bias() const; u8 signed_remap() const; + std::pair, std::array> decoded_remap() const; + // Control0 bool enabled() const; u16 min_lod() const; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 85cb19fb45..bf11cfd08b 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -638,6 +638,9 @@ VKGSRender::VKGSRender() : GSRender() m_current_frame = &frame_context_storage[0]; + m_texture_cache.initialize((*m_device), m_memory_type_mapping, m_optimal_tiling_supported_formats, m_swap_chain->get_present_queue(), + m_texture_upload_buffer_ring_info, m_texture_upload_buffer_ring_info.heap.get()); + supports_multidraw = true; } @@ -742,16 +745,18 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) { if (g_cfg.video.write_color_buffers || g_cfg.video.write_depth_buffer) { - bool flushable, synchronized; - u64 sync_timestamp; - std::tie(flushable, synchronized, sync_timestamp) = m_texture_cache.address_is_flushable(address); + bool flushable; + vk::cached_texture_section* section; + + std::tie(flushable, section) = m_texture_cache.address_is_flushable(address); if (!flushable) return false; - + + const u64 sync_timestamp = section->get_sync_timestamp(); const bool is_rsxthr = std::this_thread::get_id() == rsx_thread; - if (synchronized) + if (section->is_synchronized()) { //Wait for any cb submitted after the sync timestamp to finish while (true) @@ -841,7 +846,7 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing) void VKGSRender::on_notify_memory_unmapped(u32 address_base, u32 size) { if (m_texture_cache.invalidate_range(address_base, size, false)) - m_texture_cache.flush(true); + m_texture_cache.purge_dirty(); } void VKGSRender::begin() @@ -1038,8 +1043,7 @@ void VKGSRender::end() continue; } - vk::image_view *texture0 = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts, m_memory_type_mapping, - m_texture_upload_buffer_ring_info, m_texture_upload_buffer_ring_info.heap.get()); + vk::image_view *texture0 = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts); if (!texture0) { @@ -1093,8 +1097,7 @@ void VKGSRender::end() continue; } - vk::image_view *texture0 = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts, m_memory_type_mapping, - m_texture_upload_buffer_ring_info, m_texture_upload_buffer_ring_info.heap.get()); + vk::image_view *texture0 = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts); if (!texture0) { @@ -1429,7 +1432,7 @@ void VKGSRender::copy_render_targets_to_dma_location() if (!m_surface_info[index].pitch) continue; - m_texture_cache.flush_memory_to_cache(m_surface_info[index].address, m_surface_info[index].pitch * m_surface_info[index].height, + m_texture_cache.flush_memory_to_cache(m_surface_info[index].address, m_surface_info[index].pitch * m_surface_info[index].height, true, *m_current_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue()); } } @@ -1440,7 +1443,7 @@ void VKGSRender::copy_render_targets_to_dma_location() if (m_depth_surface_info.pitch) { - m_texture_cache.flush_memory_to_cache(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, + m_texture_cache.flush_memory_to_cache(m_depth_surface_info.address, m_depth_surface_info.pitch * m_depth_surface_info.height, true, *m_current_command_buffer, m_memory_type_mapping, m_swap_chain->get_present_queue()); } } @@ -1529,7 +1532,7 @@ void VKGSRender::advance_queued_frames() m_rtts.free_invalidated(); //texture cache is also double buffered to prevent use-after-free - m_texture_cache.flush(); + m_texture_cache.on_frame_end(); //Remove stale framebuffers. Ref counted to prevent use-after-free m_framebuffers_to_clean.remove_if([](std::unique_ptr& fbo) @@ -2179,7 +2182,7 @@ void VKGSRender::prepare_rtts() const u32 range = m_surface_info[index].pitch * m_surface_info[index].height; m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range, - m_surface_info[index].width, m_surface_info[index].height); + m_surface_info[index].width, m_surface_info[index].height, m_surface_info[index].pitch); } } @@ -2192,7 +2195,7 @@ void VKGSRender::prepare_rtts() const u32 range = pitch * m_depth_surface_info.height; m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, - m_depth_surface_info.width, m_depth_surface_info.height); + m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch); } } @@ -2584,6 +2587,5 @@ bool VKGSRender::scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst { close_render_pass(); - return m_texture_cache.upload_scaled_image(src, dst, interpolate, (*m_device), *m_current_command_buffer, m_memory_type_mapping, - m_swap_chain->get_present_queue(), m_rtts, m_texture_upload_buffer_ring_info, m_texture_upload_buffer_ring_info.heap.get()); -} \ No newline at end of file + return m_texture_cache.blit(src, dst, interpolate, m_rtts, *m_current_command_buffer); +} diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index d100412fb3..5c9ee4d27b 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -1481,5 +1481,5 @@ namespace vk */ void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, VkImage dst_image, const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, - vk::vk_data_heap &upload_heap, vk::buffer* upload_buffer); + VkImageAspectFlags flags, vk::vk_data_heap &upload_heap, vk::buffer* upload_buffer); } diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 7762fe55e9..f6acbd36a0 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -16,7 +16,7 @@ struct ref_counted namespace vk { - struct render_target : public image, public ref_counted + struct render_target : public image, public ref_counted, public rsx::render_target_descriptor { bool dirty = false; u16 native_pitch = 0; @@ -51,6 +51,31 @@ namespace vk return view.get(); } + + vk::image* get_surface() const override + { + return (vk::image*)this; + } + + u16 get_surface_width() const override + { + return width(); + } + + u16 get_surface_height() const override + { + return height(); + } + + u16 get_rsx_pitch() const override + { + return rsx_pitch; + } + + u16 get_native_pitch() const override + { + return native_pitch; + } }; struct framebuffer_holder: public vk::framebuffer, public ref_counted diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index 1ba4960bc3..68937af42d 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -157,7 +157,7 @@ namespace vk void copy_mipmaped_image_using_buffer(VkCommandBuffer cmd, VkImage dst_image, const std::vector& subresource_layout, int format, bool is_swizzled, u16 mipmap_count, - vk::vk_data_heap &upload_heap, vk::buffer* upload_buffer) + VkImageAspectFlags flags, vk::vk_data_heap &upload_heap, vk::buffer* upload_buffer) { u32 mipmap_level = 0; u32 block_in_pixel = get_format_block_size_in_texel(format); @@ -178,7 +178,7 @@ namespace vk copy_info.imageExtent.height = layout.height_in_block * block_in_pixel; copy_info.imageExtent.width = layout.width_in_block * block_in_pixel; copy_info.imageExtent.depth = layout.depth; - copy_info.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + copy_info.imageSubresource.aspectMask = flags; copy_info.imageSubresource.layerCount = 1; copy_info.imageSubresource.baseArrayLayer = mipmap_level / mipmap_count; copy_info.imageSubresource.mipLevel = mipmap_level % mipmap_count; diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 79a0e6cc20..1b0193c329 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -6,6 +6,7 @@ #include "../Common/TextureUtils.h" #include "../rsx_utils.h" #include "Utilities/mutex.h" +#include "../Common/texture_cache.h" extern u64 get_system_time(); @@ -134,6 +135,16 @@ namespace vk return managed_texture; } + vk::image_view* get_raw_view() + { + return uploaded_image_view.get(); + } + + vk::image* get_raw_texture() + { + return managed_texture.get(); + } + VkFormat get_format() { return vram_texture->info.format; @@ -151,7 +162,7 @@ namespace vk return (protection == utils::protection::rw && uploaded_image_view.get() == nullptr && managed_texture.get() == nullptr); } - void copy_texture(vk::command_buffer& cmd, u32 heap_index, VkQueue submit_queue, bool manage_cb_lifetime = false) + void copy_texture(bool manage_cb_lifetime, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue) { if (m_device == nullptr) { @@ -167,7 +178,7 @@ namespace vk if (dma_buffer.get() == nullptr) { - dma_buffer.reset(new vk::buffer(*m_device, align(cpu_address_range, 256), heap_index, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0)); + dma_buffer.reset(new vk::buffer(*m_device, align(cpu_address_range, 256), memory_types.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0)); } if (manage_cb_lifetime) @@ -243,7 +254,7 @@ namespace vk } } - bool flush(vk::render_device& dev, vk::command_buffer& cmd, u32 heap_index, VkQueue submit_queue) + bool flush(vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue) { if (m_device == nullptr) m_device = &dev; @@ -254,7 +265,7 @@ namespace vk if (!synchronized) { LOG_WARNING(RSX, "Cache miss at address 0x%X. This is gonna hurt...", cpu_address_base); - copy_texture(cmd, heap_index, submit_queue, true); + copy_texture(true, cmd, memory_types, submit_queue); result = false; } @@ -311,43 +322,29 @@ namespace vk return (sync_timestamp > last_use_timestamp); } + bool has_compatible_format(vk::image* tex) const + { + return vram_texture->info.format == tex->info.format; + } + u64 get_sync_timestamp() const { return sync_timestamp; } }; + - class texture_cache + class texture_cache : public rsx::texture_cache { - struct ranged_storage - { - std::vector data; //Stored data - std::atomic_int valid_count = { 0 }; //Number of usable (non-dirty) blocks - u32 max_range = 0; //Largest stored block - - void notify(u32 data_size) - { - max_range = std::max(data_size, max_range); - valid_count++; - } - - void add(cached_texture_section& section, u32 data_size) - { - max_range = std::max(data_size, max_range); - valid_count++; - - data.push_back(std::move(section)); - } - }; - private: - std::atomic_bool in_access_violation_handler = { false }; - shared_mutex m_cache_mutex; - std::unordered_map m_cache; + //Vulkan internals + vk::render_device* m_device; + vk::memory_type_mapping m_memory_types; + vk::gpu_formats_support m_formats_support; + VkQueue m_submit_queue; + vk_data_heap* m_texture_upload_heap; + vk::buffer* m_texture_upload_buffer; - std::pair read_only_range = std::make_pair(0xFFFFFFFF, 0); - std::pair no_access_range = std::make_pair(0xFFFFFFFF, 0); - //Stuff that has been dereferenced goes into these std::vector > m_temporary_image_view; std::vector> m_dirty_textures; @@ -355,132 +352,7 @@ namespace vk //Stuff that has been dereferenced twice goes here. Contents are evicted before new ones are added std::vector> m_image_views_to_purge; std::vector> m_images_to_purge; - - // Keep track of cache misses to pre-emptively flush some addresses - struct framebuffer_memory_characteristics - { - u32 misses; - u32 block_size; - VkFormat format; - }; - - std::unordered_map m_cache_miss_statistics_table; - - //Memory usage - const s32 m_max_zombie_objects = 32; //Limit on how many texture objects to keep around for reuse after they are invalidated - s32 m_unreleased_texture_objects = 0; //Number of invalidated objects not yet freed from memory - - cached_texture_section *find_texture_from_range(u32 rsx_address, u32 range) - { - auto test = std::make_pair(rsx_address, range); - for (auto &address_range : m_cache) - { - auto &range_data = address_range.second; - for (auto &tex : range_data.data) - { - if (tex.get_section_base() > rsx_address) - continue; - - if (!tex.is_dirty() && tex.overlaps(test, true)) - return &tex; - } - } - - return nullptr; - } - - cached_texture_section *find_texture_from_dimensions(u32 rsx_address, u32 /*rsx_size*/, u16 width = 0, u16 height = 0, u16 mipmaps = 0) - { - auto found = m_cache.find(rsx_address); - if (found != m_cache.end()) - { - auto &range_data = found->second; - for (auto &tex : range_data.data) - { - if (tex.matches(rsx_address, width, height, mipmaps) && !tex.is_dirty()) - { - return &tex; - } - } - } - - return nullptr; - } - - cached_texture_section& find_cached_texture(u32 rsx_address, u32 rsx_size, bool confirm_dimensions = false, u16 width = 0, u16 height = 0, u16 mipmaps = 0) - { - { - reader_lock lock(m_cache_mutex); - - auto found = m_cache.find(rsx_address); - if (found != m_cache.end()) - { - auto &range_data = found->second; - - for (auto &tex : range_data.data) - { - if (tex.matches(rsx_address, rsx_size) && !tex.is_dirty()) - { - if (!confirm_dimensions) return tex; - - if (tex.matches(rsx_address, width, height, mipmaps)) - return tex; - else - { - LOG_ERROR(RSX, "Cached object for address 0x%X was found, but it does not match stored parameters.", rsx_address); - LOG_ERROR(RSX, "%d x %d vs %d x %d", width, height, tex.get_width(), tex.get_height()); - } - } - } - - for (auto &tex : range_data.data) - { - if (tex.is_dirty()) - { - if (tex.exists()) - { - m_unreleased_texture_objects--; - - m_dirty_textures.push_back(std::move(tex.get_texture())); - m_temporary_image_view.push_back(std::move(tex.get_view())); - } - - tex.release_dma_resources(); - range_data.notify(rsx_size); - return tex; - } - } - } - } - - writer_lock lock(m_cache_mutex); - - cached_texture_section tmp; - m_cache[rsx_address].add(tmp, rsx_size); - return m_cache[rsx_address].data.back(); - } - - cached_texture_section* find_flushable_section(const u32 address, const u32 range) - { - reader_lock lock(m_cache_mutex); - - auto found = m_cache.find(address); - if (found != m_cache.end()) - { - auto &range_data = found->second; - for (auto &tex : range_data.data) - { - if (tex.is_dirty()) continue; - if (!tex.is_flushable() && !tex.is_flushed()) continue; - - if (tex.matches(address, range)) - return &tex; - } - } - - return nullptr; - } - + void purge_cache() { for (auto &address_range : m_cache) @@ -511,45 +383,17 @@ namespace vk m_unreleased_texture_objects = 0; } + + protected: - //Helpers - VkComponentMapping get_component_map(rsx::fragment_texture &tex, u32 gcm_format) + void free_texture_section(cached_texture_section& tex) override { - //Decoded remap returns 2 arrays; a redirection table and a lookup reference - auto decoded_remap = tex.decoded_remap(); - - //NOTE: Returns mapping in A-R-G-B - auto native_mapping = vk::get_component_mapping(gcm_format); - VkComponentSwizzle final_mapping[4] = {}; - - for (u8 channel = 0; channel < 4; ++channel) - { - switch (decoded_remap.second[channel]) - { - case CELL_GCM_TEXTURE_REMAP_ONE: - final_mapping[channel] = VK_COMPONENT_SWIZZLE_ONE; - break; - case CELL_GCM_TEXTURE_REMAP_ZERO: - final_mapping[channel] = VK_COMPONENT_SWIZZLE_ZERO; - break; - default: - LOG_ERROR(RSX, "Unknown remap lookup value %d", decoded_remap.second[channel]); - case CELL_GCM_TEXTURE_REMAP_REMAP: - final_mapping[channel] = native_mapping[decoded_remap.first[channel]]; - break; - } - } - - return { final_mapping[1], final_mapping[2], final_mapping[3], final_mapping[0] }; + m_dirty_textures.push_back(std::move(tex.get_texture())); + m_temporary_image_view.push_back(std::move(tex.get_view())); + tex.release_dma_resources(); } - VkComponentMapping get_component_map(rsx::vertex_texture&, u32 gcm_format) - { - auto mapping = vk::get_component_mapping(gcm_format); - return { mapping[1], mapping[2], mapping[3], mapping[0] }; - } - - vk::image_view* create_temporary_subresource(vk::command_buffer& cmd, vk::image* source, u32 x, u32 y, u32 w, u32 h, const vk::memory_type_mapping &memory_type_mapping) + vk::image_view* create_temporary_subresource_view(vk::command_buffer& cmd, vk::image* source, u32 gcm_format, u16 x, u16 y, u16 w, u16 h) override { VkImageAspectFlags aspect = VK_IMAGE_ASPECT_COLOR_BIT; @@ -569,7 +413,7 @@ namespace vk std::unique_ptr image; std::unique_ptr view; - image.reset(new vk::image(*vk::get_current_renderer(), memory_type_mapping.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + image.reset(new vk::image(*vk::get_current_renderer(), m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, source->info.imageType, source->info.format, source->width(), source->height(), source->depth(), 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, @@ -580,7 +424,7 @@ namespace vk VkImageLayout old_src_layout = source->current_layout; - vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_GENERAL, subresource_range); + vk::change_image_layout(cmd, image.get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); vk::change_image_layout(cmd, source, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range); VkImageCopy copy_rgn; @@ -600,17 +444,203 @@ namespace vk return m_temporary_image_view.back().get(); } + vk::image_view* create_temporary_subresource_view(vk::command_buffer& cmd, vk::image** source, u32 gcm_format, u16 x, u16 y, u16 w, u16 h) override + { + return create_temporary_subresource_view(cmd, *source, gcm_format, x, y, w, h); + } + + cached_texture_section* create_new_texture(vk::command_buffer& cmd, u32 rsx_address, u32 rsx_size, u16 width, u16 height, u16 depth, u16 mipmaps, const u32 gcm_format, + const rsx::texture_dimension_extended type, const rsx::texture_create_flags flags, std::pair, std::array>& remap_vector) override + { + const bool is_cubemap = type == rsx::texture_dimension_extended::texture_dimension_cubemap; + VkFormat vk_format; + VkComponentMapping mapping; + VkImageAspectFlags aspect_flags; + VkImageType image_type; + VkImageViewType image_view_type; + u8 layer = 0; + + switch (type) + { + case rsx::texture_dimension_extended::texture_dimension_1d: + image_type = VK_IMAGE_TYPE_1D; + image_view_type = VK_IMAGE_VIEW_TYPE_1D; + height = 1; + depth = 1; + layer = 1; + break; + case rsx::texture_dimension_extended::texture_dimension_2d: + image_type = VK_IMAGE_TYPE_2D; + image_view_type = VK_IMAGE_VIEW_TYPE_2D; + depth = 1; + layer = 1; + break; + case rsx::texture_dimension_extended::texture_dimension_cubemap: + image_type = VK_IMAGE_TYPE_2D; + image_view_type = VK_IMAGE_VIEW_TYPE_CUBE; + depth = 1; + layer = 6; + break; + case rsx::texture_dimension_extended::texture_dimension_3d: + image_type = VK_IMAGE_TYPE_3D; + image_view_type = VK_IMAGE_VIEW_TYPE_3D; + layer = 1; + break; + } + + switch (gcm_format) + { + case CELL_GCM_TEXTURE_DEPTH24_D8: + aspect_flags = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + vk_format = m_formats_support.d24_unorm_s8? VK_FORMAT_D24_UNORM_S8_UINT : VK_FORMAT_D32_SFLOAT_S8_UINT; + break; + case CELL_GCM_TEXTURE_DEPTH16: + aspect_flags = VK_IMAGE_ASPECT_DEPTH_BIT; + vk_format = VK_FORMAT_D16_UNORM; + break; + default: + aspect_flags = VK_IMAGE_ASPECT_COLOR_BIT; + vk_format = get_compatible_sampler_format(gcm_format); + break; + } + + vk::image *image = new vk::image(*m_device, m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + image_type, + vk_format, + width, height, depth, mipmaps, layer, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0); + + switch (flags) + { + case rsx::texture_create_flags::default_component_order: + { + auto native_mapping = vk::get_component_mapping(gcm_format); + VkComponentSwizzle final_mapping[4] = {}; + + for (u8 channel = 0; channel < 4; ++channel) + { + switch (remap_vector.second[channel]) + { + case CELL_GCM_TEXTURE_REMAP_ONE: + final_mapping[channel] = VK_COMPONENT_SWIZZLE_ONE; + break; + case CELL_GCM_TEXTURE_REMAP_ZERO: + final_mapping[channel] = VK_COMPONENT_SWIZZLE_ZERO; + break; + case CELL_GCM_TEXTURE_REMAP_REMAP: + final_mapping[channel] = native_mapping[remap_vector.first[channel]]; + break; + default: + LOG_ERROR(RSX, "Unknown remap lookup value %d", remap_vector.second[channel]); + } + } + + mapping = { final_mapping[1], final_mapping[2], final_mapping[3], final_mapping[0] }; + break; + } + case rsx::texture_create_flags::native_component_order: + mapping = image->native_component_map; + break; + case rsx::texture_create_flags::swapped_native_component_order: + mapping = {VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A}; + break; + default: + fmt::throw_exception("Unknown create flags 0x%X", (u32)flags); + } + + vk::image_view *view = new vk::image_view(*m_device, image->value, image_view_type, vk_format, + mapping, { (aspect_flags & ~VK_IMAGE_ASPECT_STENCIL_BIT), 0, mipmaps, 0, layer}); + + change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { aspect_flags, 0, mipmaps, 0, layer }); + + cached_texture_section& region = find_cached_texture(rsx_address, rsx_size, true, width, height, 0); + region.reset(rsx_address, rsx_size); + region.create(width, height, depth, mipmaps, view, image); + region.protect(utils::protection::ro); + region.set_dirty(false); + + read_only_range = region.get_min_max(read_only_range); + return ®ion; + } + + cached_texture_section* upload_image_from_cpu(vk::command_buffer& cmd, u32 rsx_address, u16 width, u16 height, u16 depth, u16 mipmaps, u16 pitch, const u32 gcm_format, + std::vector& subresource_layout, const rsx::texture_dimension_extended type, const bool swizzled, + std::pair, std::array>& remap_vector) override + { + auto section = create_new_texture(cmd, rsx_address, pitch * height, width, height, depth, mipmaps, gcm_format, type, + rsx::texture_create_flags::default_component_order, remap_vector); + + auto image = section->get_raw_texture(); + auto subres_range = section->get_raw_view()->info.subresourceRange; + + change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subres_range); + + vk::enter_uninterruptible(); + + vk::copy_mipmaped_image_using_buffer(cmd, image->value, subresource_layout, gcm_format, swizzled, mipmaps, subres_range.aspectMask, + *m_texture_upload_heap, m_texture_upload_buffer); + + vk::leave_uninterruptible(); + + change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subres_range); + + return section; + } + + void enforce_surface_creation_type(cached_texture_section& section, const rsx::texture_create_flags expected_flags) override + { + VkComponentMapping mapping; + vk::image* image = section.get_raw_texture(); + auto& view = section.get_view(); + + switch (expected_flags) + { + case rsx::texture_create_flags::native_component_order: + mapping = image->native_component_map; + break; + case rsx::texture_create_flags::swapped_native_component_order: + mapping = { VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A }; + break; + default: + return; + } + + if (mapping.a != view->info.components.a || + mapping.b != view->info.components.b || + mapping.g != view->info.components.g || + mapping.r != view->info.components.r) + { + //Replace view map + vk::image_view *new_view = new vk::image_view(*m_device, image->value, view->info.viewType, view->info.format, + mapping, view->info.subresourceRange); + + view.reset(new_view); + } + } + + void insert_texture_barrier() override + {} + public: - texture_cache() {} - ~texture_cache() {} + void initialize(vk::render_device& device, vk::memory_type_mapping& memory_types, vk::gpu_formats_support& formats_support, + VkQueue submit_queue, vk::vk_data_heap& upload_heap, vk::buffer* upload_buffer) + { + m_memory_types = memory_types; + m_formats_support = formats_support; + m_device = &device; + m_submit_queue = submit_queue; + m_texture_upload_heap = &upload_heap; + m_texture_upload_buffer = upload_buffer; + } - void destroy() + void destroy() override { purge_cache(); } - bool is_depth_texture(const u32 texaddr) + bool is_depth_texture(const u32 texaddr) override { reader_lock lock(m_cache_mutex); @@ -641,429 +671,11 @@ namespace vk return false; } - template - vk::image_view* upload_texture(command_buffer &cmd, RsxTextureType &tex, rsx::vk_render_targets &m_rtts, const vk::memory_type_mapping &memory_type_mapping, vk_data_heap& upload_heap, vk::buffer* upload_buffer) + void on_frame_end() override { - const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); - const u32 range = (u32)get_texture_size(tex); - - if (!texaddr || !range) + if (m_unreleased_texture_objects >= m_max_zombie_objects) { - LOG_ERROR(RSX, "Texture upload requested but texture not found, (address=0x%X, size=0x%X)", texaddr, range); - return nullptr; - } - - //First check if it exists as an rtt... - vk::render_target *rtt_texture = nullptr; - if ((rtt_texture = m_rtts.get_texture_from_render_target_if_applicable(texaddr))) - { - if (g_cfg.video.strict_rendering_mode) - { - for (const auto& tex : m_rtts.m_bound_render_targets) - { - if (std::get<0>(tex) == texaddr) - { - LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr); - return create_temporary_subresource(cmd, rtt_texture, 0, 0, rtt_texture->width(), rtt_texture->height(), memory_type_mapping); - } - } - } - - return rtt_texture->get_view(); - } - - if ((rtt_texture = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))) - { - if (g_cfg.video.strict_rendering_mode) - { - if (std::get<0>(m_rtts.m_bound_depth_stencil) == texaddr) - { - LOG_WARNING(RSX, "Attempting to sample a currently bound depth surface @ 0x%x", texaddr); - return create_temporary_subresource(cmd, rtt_texture, 0, 0, rtt_texture->width(), rtt_texture->height(), memory_type_mapping); - } - } - - return rtt_texture->get_view(); - } - - VkImageType image_type; - VkImageViewType image_view_type; - u16 height = 0; - u16 depth = 0; - u8 layer = 0; - - switch (tex.get_extended_texture_dimension()) - { - case rsx::texture_dimension_extended::texture_dimension_1d: - image_type = VK_IMAGE_TYPE_1D; - image_view_type = VK_IMAGE_VIEW_TYPE_1D; - height = 1; - depth = 1; - layer = 1; - break; - case rsx::texture_dimension_extended::texture_dimension_2d: - image_type = VK_IMAGE_TYPE_2D; - image_view_type = VK_IMAGE_VIEW_TYPE_2D; - height = tex.height(); - depth = 1; - layer = 1; - break; - case rsx::texture_dimension_extended::texture_dimension_cubemap: - image_type = VK_IMAGE_TYPE_2D; - image_view_type = VK_IMAGE_VIEW_TYPE_CUBE; - height = tex.height(); - depth = 1; - layer = 6; - break; - case rsx::texture_dimension_extended::texture_dimension_3d: - image_type = VK_IMAGE_TYPE_3D; - image_view_type = VK_IMAGE_VIEW_TYPE_3D; - height = tex.height(); - depth = tex.depth(); - layer = 1; - break; - } - - //Ignoring the mipmaps count is intentional - its common for games to pass in incorrect values as mipmap count - cached_texture_section& region = find_cached_texture(texaddr, range, true, tex.width(), height, 0); - if (region.exists() && !region.is_dirty()) - { - return region.get_view().get(); - } - - bool is_cubemap = tex.get_extended_texture_dimension() == rsx::texture_dimension_extended::texture_dimension_cubemap; - VkImageSubresourceRange subresource_range = vk::get_image_subresource_range(0, 0, is_cubemap ? 6 : 1, tex.get_exact_mipmap_count(), VK_IMAGE_ASPECT_COLOR_BIT); - - //If for some reason invalid dimensions are requested, fail - if (!height || !depth || !layer || !tex.width()) - { - LOG_ERROR(RSX, "Texture upload requested but invalid texture dimensions passed"); - return nullptr; - } - - u32 raw_format = tex.format(); - u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - - VkComponentMapping mapping = get_component_map(tex, format); - VkFormat vk_format = get_compatible_sampler_format(format); - - vk::image *image = new vk::image(*vk::get_current_renderer(), memory_type_mapping.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - image_type, - vk_format, - tex.width(), height, depth, tex.get_exact_mipmap_count(), layer, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, is_cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0); - change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); - - vk::image_view *view = new vk::image_view(*vk::get_current_renderer(), image->value, image_view_type, vk_format, - mapping, - subresource_range); - - //We cannot split mipmap uploads across multiple command buffers (must explicitly open and close operations on the same cb) - vk::enter_uninterruptible(); - - copy_mipmaped_image_using_buffer(cmd, image->value, get_subresources_layout(tex), format, !(tex.format() & CELL_GCM_TEXTURE_LN), tex.get_exact_mipmap_count(), - upload_heap, upload_buffer); - - change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subresource_range); - - vk::leave_uninterruptible(); - writer_lock lock(m_cache_mutex); - - region.reset(texaddr, range); - region.create(tex.width(), height, depth, tex.get_exact_mipmap_count(), view, image); - region.protect(utils::protection::ro); - region.set_dirty(false); - - read_only_range = region.get_min_max(read_only_range); - return view; - } - - void lock_memory_region(vk::render_target* image, const u32 memory_address, const u32 memory_size, const u32 width, const u32 height) - { - cached_texture_section& region = find_cached_texture(memory_address, memory_size, true, width, height, 1); - - writer_lock lock(m_cache_mutex); - - if (!region.is_locked()) - { - region.reset(memory_address, memory_size); - region.set_dirty(false); - no_access_range = region.get_min_max(no_access_range); - } - - region.protect(utils::protection::no); - region.create(width, height, 1, 1, nullptr, image, image->native_pitch, false); - } - - bool flush_memory_to_cache(const u32 memory_address, const u32 memory_size, vk::command_buffer&cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue, bool skip_synchronized = false) - { - cached_texture_section* region = find_flushable_section(memory_address, memory_size); - - //TODO: Make this an assertion - if (region == nullptr) - { - LOG_ERROR(RSX, "Failed to find section for render target 0x%X + 0x%X", memory_address, memory_size); - return false; - } - - if (skip_synchronized && region->is_synchronized()) - return false; - - region->copy_texture(cmd, memory_types.host_visible_coherent, submit_queue); - return true; - } - - std::tuple address_is_flushable(u32 address) - { - if (address < no_access_range.first || - address > no_access_range.second) - return std::make_tuple(false, false, 0ull); - - reader_lock lock(m_cache_mutex); - - auto found = m_cache.find(address); - if (found != m_cache.end()) - { - auto &range_data = found->second; - for (auto &tex : range_data.data) - { - if (tex.is_dirty()) continue; - if (!tex.is_flushable()) continue; - - if (tex.overlaps(address)) - return std::make_tuple(true, tex.is_synchronized(), tex.get_sync_timestamp()); - } - } - - for (auto &address_range : m_cache) - { - if (address_range.first == address) - continue; - - auto &range_data = address_range.second; - - //Quickly discard range - const u32 lock_base = address_range.first & ~0xfff; - const u32 lock_limit = align(range_data.max_range + address_range.first, 4096); - - if (address < lock_base || address >= lock_limit) - continue; - - for (auto &tex : range_data.data) - { - if (tex.is_dirty()) continue; - if (!tex.is_flushable()) continue; - - if (tex.overlaps(address)) - return std::make_tuple(true, tex.is_synchronized(), tex.get_sync_timestamp()); - } - } - - return std::make_tuple(false, false, 0ull); - } - - bool flush_address(u32 address, vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue) - { - if (address < no_access_range.first || - address > no_access_range.second) - return false; - - bool response = false; - std::pair trampled_range = std::make_pair(0xffffffff, 0x0); - std::unordered_map processed_ranges; - - rsx::conditional_lock lock(in_access_violation_handler, m_cache_mutex); - - for (auto It = m_cache.begin(); It != m_cache.end(); It++) - { - auto &range_data = It->second; - const u32 base = It->first; - bool range_reset = false; - - if (processed_ranges[base] || range_data.valid_count == 0) - continue; - - //Quickly discard range - const u32 lock_base = base & ~0xfff; - const u32 lock_limit = align(range_data.max_range + base, 4096); - - if ((trampled_range.first >= lock_limit || lock_base >= trampled_range.second) && - (lock_base > address || lock_limit <= address)) - { - processed_ranges[base] = true; - continue; - } - - for (int i = 0; i < range_data.data.size(); i++) - { - auto &tex = range_data.data[i]; - - if (tex.is_dirty()) continue; - if (!tex.is_flushable()) continue; - - auto overlapped = tex.overlaps_page(trampled_range, address); - if (std::get<0>(overlapped)) - { - auto &new_range = std::get<1>(overlapped); - - if (new_range.first != trampled_range.first || - new_range.second != trampled_range.second) - { - i = 0; - trampled_range = new_range; - range_reset = true; - } - - //TODO: Map basic host_visible memory without coherent constraint - if (!tex.flush(dev, cmd, memory_types.host_visible_coherent, submit_queue)) - { - //Missed address, note this - //TODO: Lower severity when successful to keep the cache from overworking - record_cache_miss(tex); - } - - response = true; - } - } - - if (range_reset) - { - processed_ranges.clear(); - It = m_cache.begin(); - } - - processed_ranges[base] = true; - } - - return response; - } - - bool invalidate_address(u32 address) - { - return invalidate_range(address, 4096 - (address & 4095)); - } - - bool invalidate_range(u32 address, u32 range, bool unprotect=true) - { - std::pair trampled_range = std::make_pair(address, address + range); - - if (trampled_range.second < read_only_range.first || - trampled_range.first > read_only_range.second) - { - //Doesnt fall in the read_only textures range; check render targets - if (trampled_range.second < no_access_range.first || - trampled_range.first > no_access_range.second) - return false; - } - - bool response = false; - std::unordered_map processed_ranges; - - rsx::conditional_lock lock(in_access_violation_handler, m_cache_mutex); - - for (auto It = m_cache.begin(); It != m_cache.end(); It++) - { - auto &range_data = It->second; - const u32 base = It->first; - bool range_reset = false; - - if (processed_ranges[base] || range_data.valid_count == 0) - continue; - - //Quickly discard range - const u32 lock_base = base & ~0xfff; - const u32 lock_limit = align(range_data.max_range + base, 4096); - - if (trampled_range.first >= lock_limit || lock_base >= trampled_range.second) - { - processed_ranges[base] = true; - continue; - } - - for (int i = 0; i < range_data.data.size(); i++) - { - auto &tex = range_data.data[i]; - - if (tex.is_dirty()) continue; - if (!tex.is_locked()) continue; //flushable sections can be 'clean' but unlocked. TODO: Handle this better - - auto overlapped = tex.overlaps_page(trampled_range, address); - if (std::get<0>(overlapped)) - { - auto &new_range = std::get<1>(overlapped); - - if (new_range.first != trampled_range.first || - new_range.second != trampled_range.second) - { - i = 0; - trampled_range = new_range; - range_reset = true; - } - - if (unprotect) - { - m_unreleased_texture_objects++; - - tex.set_dirty(true); - tex.unprotect(); - } - else - { - tex.discard(); - } - - range_data.valid_count--; - response = true; - } - } - - if (range_reset) - { - processed_ranges.clear(); - It = m_cache.begin(); - } - - processed_ranges[base] = true; - } - - return response; - } - - void flush(bool purge_dirty=false) - { - if (purge_dirty || m_unreleased_texture_objects >= m_max_zombie_objects) - { - //Reclaims all graphics memory consumed by dirty textures - std::vector empty_addresses; - empty_addresses.resize(32); - - for (auto &address_range : m_cache) - { - auto &range_data = address_range.second; - - if (range_data.valid_count == 0) - empty_addresses.push_back(address_range.first); - - for (auto &tex : range_data.data) - { - if (!tex.is_dirty()) - continue; - - if (tex.exists()) - { - m_dirty_textures.push_back(std::move(tex.get_texture())); - m_temporary_image_view.push_back(std::move(tex.get_view())); - } - - tex.release_dma_resources(); - } - } - - //Free descriptor objects as well - for (const auto &address : empty_addresses) - { - m_cache.erase(address); - } + purge_dirty(); } m_image_views_to_purge.clear(); @@ -1073,437 +685,26 @@ namespace vk m_images_to_purge = std::move(m_dirty_textures); } - void record_cache_miss(cached_texture_section &tex) + bool blit(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, rsx::vk_render_targets& m_rtts, vk::command_buffer& cmd) { - const u32 memory_address = tex.get_section_base(); - const u32 memory_size = tex.get_section_size(); - const VkFormat fmt = tex.get_format(); - - auto It = m_cache_miss_statistics_table.find(memory_address); - if (It == m_cache_miss_statistics_table.end()) + struct blit_helper { - m_cache_miss_statistics_table[memory_address] = { 1, memory_size, fmt }; - return; - } - - auto &value = It->second; - if (value.format != fmt || value.block_size != memory_size) - { - m_cache_miss_statistics_table[memory_address] = { 1, memory_size, fmt }; - return; - } - - value.misses++; - } - - void flush_if_cache_miss_likely(const VkFormat fmt, const u32 memory_address, const u32 memory_size, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue) - { - auto It = m_cache_miss_statistics_table.find(memory_address); - if (It == m_cache_miss_statistics_table.end()) - { - m_cache_miss_statistics_table[memory_address] = { 0, memory_size, fmt }; - return; - } - - auto &value = It->second; - - if (value.format != fmt || value.block_size != memory_size) - { - //Reset since the data has changed - //TODO: Keep track of all this information together - m_cache_miss_statistics_table[memory_address] = { 0, memory_size, fmt }; - return; - } - - //Properly synchronized - no miss - if (!value.misses) return; - - //Auto flush if this address keeps missing (not properly synchronized) - if (value.misses > 16) - { - //TODO: Determine better way of setting threshold - if (!flush_memory_to_cache(memory_address, memory_size, cmd, memory_types, submit_queue, true)) - value.misses --; - } - } - - bool upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool /*interpolate*/, - vk::render_device& dev, vk::command_buffer& cmd, vk::memory_type_mapping& memory_types, VkQueue submit_queue, - rsx::vk_render_targets &m_rtts, vk_data_heap &upload_heap, vk::buffer* upload_buffer) - { - //Since we will have dst in vram, we can 'safely' ignore the swizzle flag - //TODO: Verify correct behavior - - bool src_is_render_target = false; - bool dst_is_render_target = false; - bool dst_is_argb8 = (dst.format == rsx::blit_engine::transfer_destination_format::a8r8g8b8); - bool src_is_argb8 = (src.format == rsx::blit_engine::transfer_source_format::a8r8g8b8); - - const VkComponentMapping rgba_map = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }; - const VkComponentMapping bgra_map = { VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A }; - - auto dest_mapping = (!dst_is_argb8 || dst.swizzled) ? bgra_map : rgba_map; - - vk::image* vram_texture = nullptr; - vk::image* dest_texture = nullptr; - cached_texture_section* cached_dest = nullptr; - - const u32 src_address = (u32)((u64)src.pixels - (u64)vm::base(0)); - const u32 dst_address = (u32)((u64)dst.pixels - (u64)vm::base(0)); - - //Check if src/dst are parts of render targets - auto dst_subres = m_rtts.get_surface_subresource_if_applicable(dst_address, dst.width, dst.clip_height, dst.pitch, true, true, false); - dst_is_render_target = dst_subres.surface != nullptr; - - //TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate - auto src_subres = m_rtts.get_surface_subresource_if_applicable(src_address, src.width, src.height, src.pitch, true, true, false); - src_is_render_target = src_subres.surface != nullptr; - - //Always use GPU blit if src or dst is in the surface store - if (!g_cfg.video.use_gpu_texture_scaling && !(src_is_render_target || dst_is_render_target)) - return false; - - u16 max_dst_width = dst.width; - u16 max_dst_height = dst.height; - - //Prepare areas and offsets - //Copy from [src.offset_x, src.offset_y] a region of [clip.width, clip.height] - //Stretch onto [dst.offset_x, y] with clipping performed on the source region - //The implementation here adds the inverse scaled clip dimensions onto the source to completely bypass final clipping step - - float scale_x = (f32)dst.width / src.width; - float scale_y = (f32)dst.height / src.height; - - //Clip offset is unused if the clip offsets are reprojected onto the source - position2i clip_offset = { 0, 0 };//{ dst.clip_x, dst.clip_y }; - position2i dst_offset = { dst.offset_x, dst.offset_y }; - - size2i clip_dimensions = { dst.clip_width, dst.clip_height }; - //Dimensions passed are restricted to powers of 2; get real height from clip_height and width from pitch - const size2i dst_dimensions = { dst.pitch / (dst_is_argb8 ? 4 : 2), dst.clip_height }; - - //Offset in x and y for src is 0 (it is already accounted for when getting pixels_src) - //Reproject final clip onto source... - const u16 src_w = (const u16)((f32)clip_dimensions.width / scale_x); - const u16 src_h = (const u16)((f32)clip_dimensions.height / scale_y); - - areai src_area = { 0, 0, src_w, src_h }; - areai dst_area = { 0, 0, dst.clip_width, dst.clip_height }; - - //If destination is neither a render target nor an existing texture in VRAM - //its possible that this method is being used to perform a memcpy into RSX memory, so we check - //parameters. Whenever a simple memcpy can get the job done, use it instead. - //Dai-3-ji Super Robot Taisen for example uses this to copy program code to GPU RAM - - bool is_memcpy = false; - u32 memcpy_bytes_length = 0; - if (dst_is_argb8 == src_is_argb8 && !dst.swizzled) - { - if ((src.slice_h == 1 && dst.clip_height == 1) || - (dst.clip_width == src.width && dst.clip_height == src.slice_h && src.pitch == dst.pitch)) + vk::command_buffer* commands; + blit_helper(vk::command_buffer *c) : commands(c) {} + void scale_image(vk::image* src, vk::image* dst, areai src_area, areai dst_area, bool interpolate, bool is_depth) { - const u8 bpp = dst_is_argb8 ? 4 : 2; - is_memcpy = true; - memcpy_bytes_length = dst.clip_width * bpp * dst.clip_height; + VkImageAspectFlagBits aspect = VK_IMAGE_ASPECT_COLOR_BIT; + if (is_depth) aspect = (VkImageAspectFlagBits)(src->info.format == VK_FORMAT_D16_UNORM ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT); + + copy_scaled_image(*commands, src->value, dst->value, src->current_layout, dst->current_layout, src_area.x1, src_area.y1, src_area.x2 - src_area.x1, src_area.y2 - src_area.y1, + dst_area.x1, dst_area.y1, dst_area.x2 - dst_area.x1, dst_area.y2 - dst_area.y1, 1, aspect); + + change_image_layout(*commands, dst, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, {(VkImageAspectFlags)aspect, 0, dst->info.mipLevels, 0, dst->info.arrayLayers}); } } + helper(&cmd); - if (!dst_is_render_target) - { - //First check if this surface exists in VRAM with exact dimensions - //Since scaled GPU resources are not invalidated by the CPU, we need to reuse older surfaces if possible - cached_dest = find_texture_from_dimensions(dst.rsx_address, dst.pitch * dst.clip_height, dst_dimensions.width, dst_dimensions.height); - - //Check for any available region that will fit this one - if (!cached_dest) cached_dest = find_texture_from_range(dst.rsx_address, dst.pitch * dst.clip_height); - - if (cached_dest) - { - //TODO: Verify that the new surface will fit - dest_texture = cached_dest->get_texture().get(); - - //TODO: Move this code into utils since it is used alot - const u32 address_offset = dst.rsx_address - cached_dest->get_section_base(); - - const u16 bpp = dst_is_argb8 ? 4 : 2; - const u16 offset_y = address_offset / dst.pitch; - const u16 offset_x = address_offset % dst.pitch; - - dst_offset.x += offset_x / bpp; - dst_offset.y += offset_y; - - max_dst_width = cached_dest->get_width(); - max_dst_height = cached_dest->get_height(); - - //If dest has a component swizzle (usually caused by ARGB->BGRA compatibility when uploading from cpu) remove it - auto& image_view = cached_dest->get_view(); - - if (image_view->info.components.a != dest_mapping.a || - image_view->info.components.r != dest_mapping.r || - image_view->info.components.g != dest_mapping.g || - image_view->info.components.b != dest_mapping.b) - { - auto create_info = image_view->info; - create_info.components = dest_mapping; - - m_temporary_image_view.push_back(std::move(image_view)); - image_view.reset(new vk::image_view(dev, create_info)); - } - } - else if (is_memcpy) - { - memcpy(dst.pixels, src.pixels, memcpy_bytes_length); - return true; - } - } - else - { - dst_offset.x = dst_subres.x; - dst_offset.y = dst_subres.y; - - dest_texture = dst_subres.surface; - - max_dst_width = dst_subres.surface->width(); - max_dst_height = dst_subres.surface->height(); - - if (is_memcpy) - { - //Some render target descriptions are actually invalid - //Confirm this is a flushable RTT - const auto rsx_pitch = dst_subres.surface->rsx_pitch; - const auto native_pitch = dst_subres.surface->native_pitch; - - if (rsx_pitch <= 64 && native_pitch != rsx_pitch) - { - memcpy(dst.pixels, src.pixels, memcpy_bytes_length); - return true; - } - } - } - - //Create source texture if does not exist - if (!src_is_render_target) - { - auto preloaded_texture = find_texture_from_dimensions(src_address, src.pitch * src.slice_h, src.width, src.slice_h); - - if (preloaded_texture != nullptr) - { - vram_texture = preloaded_texture->get_texture().get(); - } - else - { - flush_address(src_address, dev, cmd, memory_types, submit_queue); - - const VkFormat src_vk_format = src_is_argb8 ? VK_FORMAT_R8G8B8A8_UNORM : VK_FORMAT_R5G6B5_UNORM_PACK16; - const VkComponentMapping component_mapping = (!src_is_argb8 || dst.swizzled) ? bgra_map : rgba_map; - - //Upload texture from CPU - vk::image *image = new vk::image(*vk::get_current_renderer(), memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - VK_IMAGE_TYPE_2D, - src_vk_format, - src.width, src.slice_h, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 0); - - vk::image_view *view = new vk::image_view(*vk::get_current_renderer(), image->value, VK_IMAGE_VIEW_TYPE_2D, src_vk_format, - component_mapping, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); - - change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); - - cached_texture_section& region = find_cached_texture(src_address, src.pitch * src.slice_h, true, src.width, src.slice_h, 1); - - writer_lock lock(m_cache_mutex); - region.reset(src_address, src.pitch * src.slice_h); - region.create(src.width, src.slice_h, 1, 1, view, image); - region.protect(utils::protection::ro); - region.set_dirty(false); - - read_only_range = region.get_min_max(read_only_range); - - vk::enter_uninterruptible(); - - std::vector layout(1); - auto &subres = layout.back(); - subres.width_in_block = src.width; - subres.height_in_block = src.slice_h; - subres.pitch_in_bytes = src.width; //Seems to be a typo - should be pitch_in_block - subres.depth = 1; - subres.data = {(const gsl::byte*)src.pixels, align(src.pitch, 256) * src.slice_h}; - - copy_mipmaped_image_using_buffer(cmd, image->value, layout, src_is_argb8? CELL_GCM_TEXTURE_A8R8G8B8: CELL_GCM_TEXTURE_R5G6B5, - false, 1, upload_heap, upload_buffer); - - vk::leave_uninterruptible(); - - vram_texture = image; - } - } - else - { - if (src_subres.w != clip_dimensions.width || - src_subres.h != clip_dimensions.height) - { - f32 subres_scaling_x = (f32)src.pitch / src_subres.surface->native_pitch; - - dst_area.x2 = (int)(src_subres.w * scale_x * subres_scaling_x); - dst_area.y2 = (int)(src_subres.h * scale_y); - } - - src_area.x2 = src_subres.w; - src_area.y2 = src_subres.h; - - src_area.x1 += src_subres.x; - src_area.x2 += src_subres.x; - src_area.y1 += src_subres.y; - src_area.y2 += src_subres.y; - - vram_texture = src_subres.surface; - } - - VkImageAspectFlags aspect_to_copy = VK_IMAGE_ASPECT_COLOR_BIT; - bool dest_exists = dest_texture != nullptr; - VkFormat dst_vk_format = dst_is_argb8 ? VK_FORMAT_B8G8R8A8_UNORM : VK_FORMAT_R5G6B5_UNORM_PACK16; - const u8 bpp = dst_is_argb8 ? 4 : 2; - const u32 real_width = dst.pitch / bpp; - - //If src is depth, dest has to be depth as well - bool format_mismatch = false; - - if (src_subres.is_depth_surface) - { - if (dest_exists) - { - if (dst_is_render_target && !dst_subres.is_depth_surface) - { - LOG_ERROR(RSX, "Depth->RGBA blit requested but not supported"); - return true; - } - - if (!dst_is_render_target) - { - if (dest_texture->info.format != src_subres.surface->info.format) - { - format_mismatch = true; - } - } - else - { - if (dst_subres.surface->info.format != src_subres.surface->info.format) - { - LOG_ERROR(RSX, "Depth blit requested, but formats do not match (0x%X vs 0x%X)", - (u32)dst_subres.surface->info.format, (u32)src_subres.surface->info.format); - return true; - } - } - } - - dst_vk_format = src_subres.surface->info.format; - dest_mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }; - - if (dst_vk_format == VK_FORMAT_D16_UNORM) - aspect_to_copy = VK_IMAGE_ASPECT_DEPTH_BIT; - else - aspect_to_copy = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - } - else - { - if (dest_exists && dest_texture->info.format != dst_vk_format) - { - LOG_ERROR(RSX, "Format mismatch - expected VkFormat 0x%X but found 0x%X instead", (u32)dst_vk_format, (u32)dest_texture->info.format); - format_mismatch = true; - - if (dst_is_render_target) - { - if (dst_subres.is_bound) - { - LOG_ERROR(RSX, "Blit destination is an active render target but format does not match. Blit operation ignored."); - return true; - } - - m_rtts.invalidate_single_surface(dst_subres.surface, dst_subres.is_depth_surface); - } - } - } - - if (format_mismatch) - { - invalidate_range(cached_dest->get_section_base(), cached_dest->get_section_size()); - - dest_exists = false; - cached_dest = nullptr; - } - - //Validate clip offsets (Persona 4 Arena at 720p) - //Check if can fit - //NOTE: It is possible that the check is simpler (if (clip_x >= clip_width)) - //Needs verification - if ((dst.offset_x + dst.clip_x + dst.clip_width) > max_dst_width) dst.clip_x = 0; - if ((dst.offset_y + dst.clip_y + dst.clip_height) > max_dst_height) dst.clip_y = 0; - - if (dst.clip_x || dst.clip_y) - { - //Reproject clip offsets onto source - const u16 scaled_clip_offset_x = (const u16)((f32)dst.clip_x / scale_x); - const u16 scaled_clip_offset_y = (const u16)((f32)dst.clip_y / scale_y); - - src_area.x1 += scaled_clip_offset_x; - src_area.x2 += scaled_clip_offset_x; - src_area.y1 += scaled_clip_offset_y; - src_area.y2 += scaled_clip_offset_y; - } - - if (!dest_exists) - { - dest_texture = new vk::image(*vk::get_current_renderer(), memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - VK_IMAGE_TYPE_2D, - dst_vk_format, - real_width, dst.clip_height, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 0); - - change_image_layout(cmd, dest_texture, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, { aspect_to_copy, 0, 1, 0, 1}); - } - - //Copy data - u32 src_width = src_area.x2 - src_area.x1; - u32 src_height = src_area.y2 - src_area.y1; - u32 dst_width = dst_area.x2 - dst_area.x1; - u32 dst_height = dst_area.y2 - dst_area.y1; - - if (dst.clip_width != dst_width || - dst.clip_height != dst_height) - { - //clip reproject - src_width = (src_width * dst.clip_width) / dst_width; - src_height = (src_height * dst.clip_height) / dst_height; - } - - copy_scaled_image(cmd, vram_texture->value, dest_texture->value, vram_texture->current_layout, dest_texture->current_layout, - src_area.x1, src_area.y1, src_width, src_height, dst_offset.x, dst_offset.y, dst.clip_width, dst.clip_height, 1, (VkImageAspectFlagBits)aspect_to_copy); - - if (dest_exists) - return true; - - change_image_layout(cmd, dest_texture, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, { aspect_to_copy, 0, 1, 0, 1 }); - - //TODO: Verify if any titles ever scale into CPU memory. It defeats the purpose of uploading data to the GPU, but it could happen - //If so, add this texture to the no_access queue not the read_only queue - cached_texture_section& region = find_cached_texture(dst.rsx_address, dst.pitch * dst.clip_height, true, real_width, dst.clip_height, 1); - writer_lock lock(m_cache_mutex); - - //These textures are completely GPU resident so we dont watch for CPU access - //There's no data to be fetched from the CPU - //Its is possible for a title to attempt to read from the region, but the CPU path should be used in such cases - - vk::image_view *view = new vk::image_view(*vk::get_current_renderer(), dest_texture->value, VK_IMAGE_VIEW_TYPE_2D, dst_vk_format, - dest_mapping, { aspect_to_copy & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 1, 0, 1 }); - - region.reset(dst.rsx_address, dst.pitch * dst.clip_height); - region.create(real_width, dst.clip_height, 1, 1, view, dest_texture); - region.protect(utils::protection::ro); - region.set_dirty(false); - - read_only_range = region.get_min_max(read_only_range); - - return true; + return upload_scaled_image(src, dst, interpolate, cmd, m_rtts, helper, *m_device, cmd, m_memory_types, m_submit_queue); } }; } diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index 3d07ad4b99..930701aec2 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -21,6 +21,8 @@ namespace rsx u16 pitch; void *pixels; + bool compressed_x; + bool compressed_y; u32 rsx_address; }; @@ -36,10 +38,14 @@ namespace rsx u16 clip_y; u16 clip_width; u16 clip_height; + f32 scale_x; + f32 scale_y; bool swizzled; void *pixels; + bool compressed_x; + bool compressed_y; u32 rsx_address; }; diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 55ceae6840..ba2a2888fa 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -593,6 +593,7 @@ namespace rsx const s32 out_offset = out_x * out_bpp + out_pitch * out_y; const tiled_region src_region = rsx->get_tiled_address(src_offset + in_offset, src_dma & 0xf); + const tiled_region dst_region = rsx->get_tiled_address(dst_offset + out_offset, dst_dma & 0xf); u8* pixels_src = src_region.tile ? src_region.ptr + src_region.base : src_region.ptr; u8* pixels_dst = vm::ps3::_ptr(get_address(dst_offset + out_offset, dst_dma)); @@ -626,12 +627,19 @@ namespace rsx u32 convert_h = (u32)(scale_y * in_h); u32 slice_h = clip_h; + blit_src_info src_info = {}; + blit_dst_info dst_info = {}; if (src_region.tile) { - if (src_region.tile->comp == CELL_GCM_COMPMODE_C32_2X2) + switch(src_region.tile->comp) { + case CELL_GCM_COMPMODE_C32_2X2: slice_h *= 2; + src_info.compressed_y = true; + case CELL_GCM_COMPMODE_C32_2X1: + src_info.compressed_x = true; + break; } u32 size = slice_h * in_pitch; @@ -643,20 +651,29 @@ namespace rsx } } + if (dst_region.tile) + { + switch (dst_region.tile->comp) + { + case CELL_GCM_COMPMODE_C32_2X2: + dst_info.compressed_y = true; + case CELL_GCM_COMPMODE_C32_2X1: + dst_info.compressed_x = true; + break; + } + } + if (dst_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER) { //For now, only use this for actual scaled images, there are use cases that should not go through 3d engine, e.g program ucode transfer //TODO: Figure out more instances where we can use this without problems - - blit_src_info src_info; - blit_dst_info dst_info; - + //NOTE: In cases where slice_h is modified due to compression (read from tiled memory), the new value (clip_h * 2) does not matter if memory is on the GPU src_info.format = src_color_format; src_info.origin = in_origin; src_info.width = in_w; src_info.height = in_h; src_info.pitch = in_pitch; - src_info.slice_h = slice_h; + src_info.slice_h = clip_h; src_info.offset_x = (u16)in_x; src_info.offset_y = (u16)in_y; src_info.pixels = pixels_src; @@ -672,6 +689,8 @@ namespace rsx dst_info.offset_x = out_x; dst_info.offset_y = out_y; dst_info.pitch = out_pitch; + dst_info.scale_x = scale_x; + dst_info.scale_y = scale_y; dst_info.pixels = pixels_dst; dst_info.rsx_address = get_address(dst_offset, dst_dma); dst_info.swizzled = (method_registers.blit_engine_context_surface() == blit_engine::context_surface::swizzle2d); diff --git a/rpcs3/GLGSRender.vcxproj b/rpcs3/GLGSRender.vcxproj index 7c2880cc5e..ebe9e3d385 100644 --- a/rpcs3/GLGSRender.vcxproj +++ b/rpcs3/GLGSRender.vcxproj @@ -61,6 +61,9 @@ + + + {c4a10229-4712-4bd2-b63e-50d93c67a038} @@ -84,7 +87,6 @@ - @@ -95,4 +97,4 @@ - + \ No newline at end of file diff --git a/rpcs3/GLGSRender.vcxproj.filters b/rpcs3/GLGSRender.vcxproj.filters index 35bae19d35..932ef00516 100644 --- a/rpcs3/GLGSRender.vcxproj.filters +++ b/rpcs3/GLGSRender.vcxproj.filters @@ -10,7 +10,6 @@ - diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 242e2d8e32..cb274188b5 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -90,9 +90,9 @@ NotUsing - - - NotUsing + + + NotUsing @@ -647,6 +647,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 4e97901bb9..b94d771642 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -929,6 +929,9 @@ Emu\Cell\lv2 + + Source Files + @@ -1795,5 +1798,8 @@ Emu\Cell\lv2 + + Emu\GPU\RSX\Common + \ No newline at end of file