rsx: Texture search rewrite

- Perform a full search across all resource types as needed without
taking too many shortcuts/hacks
This commit is contained in:
kd-11 2019-02-25 18:03:14 +03:00 committed by kd-11
parent 6ef9dcd62e
commit 3a071a9c07
12 changed files with 818 additions and 534 deletions

View File

@ -7,7 +7,7 @@
namespace rsx
{
enum texture_upload_context
enum texture_upload_context : u32
{
shader_read = 1,
blit_engine_src = 2,
@ -15,7 +15,7 @@ namespace rsx
framebuffer_storage = 8
};
enum texture_colorspace
enum texture_colorspace : u32
{
rgb_linear = 0,
srgb_nonlinear = 1

View File

@ -108,6 +108,7 @@ namespace rsx
virtual u16 get_surface_height() const = 0;
virtual u16 get_rsx_pitch() const = 0;
virtual u16 get_native_pitch() const = 0;
virtual bool is_depth_surface() const = 0;
void save_aa_mode()
{
@ -216,6 +217,9 @@ namespace rsx
std::unordered_map<u32, surface_storage_type> m_render_targets_storage = {};
std::unordered_map<u32, surface_storage_type> m_depth_stencil_storage = {};
rsx::address_range m_render_targets_memory_range;
rsx::address_range m_depth_stencil_memory_range;
public:
std::array<std::tuple<u32, surface_type>, 4> m_bound_render_targets = {};
std::tuple<u32, surface_type> m_bound_depth_stencil = {};
@ -323,7 +327,8 @@ namespace rsx
surface_type bind_address_as_render_targets(
command_list_type command_list,
u32 address,
surface_color_format color_format, size_t width, size_t height,
surface_color_format color_format,
size_t width, size_t height, size_t pitch,
Args&&... extra_params)
{
// TODO: Fix corner cases
@ -360,6 +365,10 @@ namespace rsx
m_render_targets_storage.erase(address);
}
// Range test
rsx::address_range range = rsx::address_range::start_length(address, u32(pitch * height));
m_render_targets_memory_range = range.get_min_max(m_render_targets_memory_range);
// Select source of original data if any
auto contents_to_copy = old_surface == nullptr ? convert_surface : old_surface;
@ -410,7 +419,8 @@ namespace rsx
surface_type bind_address_as_depth_stencil(
command_list_type command_list,
u32 address,
surface_depth_format depth_format, size_t width, size_t height,
surface_depth_format depth_format,
size_t width, size_t height, size_t pitch,
Args&&... extra_params)
{
surface_storage_type old_surface_storage;
@ -445,6 +455,10 @@ namespace rsx
m_depth_stencil_storage.erase(address);
}
// Range test
rsx::address_range range = rsx::address_range::start_length(address, u32(pitch * height));
m_depth_stencil_memory_range = range.get_min_max(m_depth_stencil_memory_range);
// Select source of original data if any
auto contents_to_copy = old_surface == nullptr ? convert_surface : old_surface;
@ -525,8 +539,9 @@ namespace rsx
if (surface_addresses[surface_index] == 0)
continue;
const auto pitch = clip_width * 4; // TODO
m_bound_render_targets[surface_index] = std::make_tuple(surface_addresses[surface_index],
bind_address_as_render_targets(command_list, surface_addresses[surface_index], color_format, clip_width, clip_height, std::forward<Args>(extra_params)...));
bind_address_as_render_targets(command_list, surface_addresses[surface_index], color_format, clip_width, clip_height, pitch, std::forward<Args>(extra_params)...));
}
// Same for depth buffer
@ -538,8 +553,10 @@ namespace rsx
if (!address_z)
return;
// TODO
const auto pitch = (depth_format == rsx::surface_depth_format::z16) ? clip_width * 2 : clip_width * 4;
m_bound_depth_stencil = std::make_tuple(address_z,
bind_address_as_depth_stencil(command_list, address_z, depth_format, clip_width, clip_height, std::forward<Args>(extra_params)...));
bind_address_as_depth_stencil(command_list, address_z, depth_format, clip_width, clip_height, pitch, std::forward<Args>(extra_params)...));
}
/**
@ -566,6 +583,19 @@ namespace rsx
return surface_type();
}
surface_type get_surface_at(u32 address)
{
auto It = m_render_targets_storage.find(address);
if (It != m_render_targets_storage.end())
return Traits::get(It->second);
auto _It = m_depth_stencil_storage.find(address);
if (_It != m_depth_stencil_storage.end())
return Traits::get(_It->second);
fmt::throw_exception("Unreachable" HERE);
}
/**
* Get bound color surface raw data.
*/
@ -749,7 +779,7 @@ namespace rsx
*/
void invalidate_surface_address(u32 addr, bool depth)
{
if (address_is_bound(addr, depth))
if (address_is_bound(addr))
{
LOG_ERROR(RSX, "Cannot invalidate a currently bound render target!");
return;
@ -862,14 +892,8 @@ namespace rsx
return (offset < range);
}
bool address_is_bound(u32 address, bool is_depth) const
bool address_is_bound(u32 address) const
{
if (is_depth)
{
const u32 bound_depth_address = std::get<0>(m_bound_depth_stencil);
return (bound_depth_address == address);
}
for (auto &surface : m_bound_render_targets)
{
const u32 bound_address = std::get<0>(surface);
@ -877,6 +901,9 @@ namespace rsx
return true;
}
if (std::get<0>(m_bound_depth_stencil) == address)
return true;
return false;
}
@ -966,7 +993,7 @@ namespace rsx
}
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
return{ this_address, surface, x_offset, y_offset, w, h, address_is_bound(this_address, false), false, clipped };
return{ this_address, surface, x_offset, y_offset, w, h, address_is_bound(this_address), false, clipped };
}
}
@ -992,16 +1019,18 @@ namespace rsx
}
if (test_surface(surface, this_address, x_offset, y_offset, w, h, clipped))
return{ this_address, surface, x_offset, y_offset, w, h, address_is_bound(this_address, true), true, clipped };
return{ this_address, surface, x_offset, y_offset, w, h, address_is_bound(this_address), true, clipped };
}
}
return{};
}
std::vector<surface_overlap_info> get_merged_texture_memory_region(u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch, u32 bpp)
template <typename commandbuffer_type>
std::vector<surface_overlap_info> get_merged_texture_memory_region(commandbuffer_type& cmd, u32 texaddr, u32 required_width, u32 required_height, u32 required_pitch, u32 bpp)
{
std::vector<surface_overlap_info> result;
std::vector<std::pair<u32, bool>> dirty;
const u32 limit = texaddr + (required_pitch * required_height);
auto process_list_function = [&](std::unordered_map<u32, surface_storage_type>& data, bool is_depth)
@ -1021,6 +1050,12 @@ namespace rsx
if ((this_address + texture_size) <= texaddr)
continue;
if (surface->read_barrier(cmd); !surface->test())
{
dirty.emplace_back(this_address, is_depth);
continue;
}
surface_overlap_info info;
info.surface = surface;
info.is_depth = is_depth;
@ -1050,8 +1085,27 @@ namespace rsx
}
};
process_list_function(m_render_targets_storage, false);
process_list_function(m_depth_stencil_storage, true);
// Range test helper to quickly discard blocks
// Fortunately, render targets tend to be clustered anyway
rsx::address_range test = rsx::address_range::start_end(texaddr, limit-1);
if (test.overlaps(m_render_targets_memory_range))
{
process_list_function(m_render_targets_storage, false);
}
if (test.overlaps(m_depth_stencil_memory_range))
{
process_list_function(m_depth_stencil_storage, true);
}
if (!dirty.empty())
{
for (const auto& p : dirty)
{
invalidate_surface_address(p.first, p.second);
}
}
if (result.size() > 1)
{

File diff suppressed because it is too large Load Diff

View File

@ -99,6 +99,17 @@ namespace rsx
fmt::throw_exception("Unreachable " HERE);
}
constexpr invalidation_cause defer() const
{
AUDIT(!deferred_flush());
if (cause == read)
return deferred_read;
else if (cause == write)
return deferred_write;
else
fmt::throw_exception("Unreachable " HERE);
}
constexpr invalidation_cause() : cause(invalid) {}
constexpr invalidation_cause(enum_type _cause) : cause(_cause) {}
operator enum_type&() { return cause; }

View File

@ -330,12 +330,12 @@ void GLGSRender::end()
_SelectTexture(GL_FRAGMENT_TEXTURES_START + i);
gl::texture_view* view = nullptr;
if (rsx::method_registers.fragment_textures[i].enabled())
{
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
view = sampler_state->image_handle;
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
if (!view && sampler_state->external_subresource_desc.external_handle)
if (rsx::method_registers.fragment_textures[i].enabled() &&
sampler_state->validate())
{
if (view = sampler_state->image_handle; UNLIKELY(!view))
{
view = m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc);
}
@ -375,13 +375,17 @@ void GLGSRender::end()
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
_SelectTexture(GL_VERTEX_TEXTURES_START + i);
if (sampler_state->image_handle)
if (rsx::method_registers.vertex_textures[i].enabled() &&
sampler_state->validate())
{
sampler_state->image_handle->bind();
}
else if (sampler_state->external_subresource_desc.external_handle)
{
m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc)->bind();
if (LIKELY(sampler_state->image_handle))
{
sampler_state->image_handle->bind();
}
else
{
m_gl_texture_cache.create_temporary_subresource(cmd, sampler_state->external_subresource_desc)->bind();
}
}
else
{
@ -1640,7 +1644,8 @@ void GLGSRender::flip(int buffer)
}
else
{
const auto overlap_info = m_rtts.get_merged_texture_memory_region(absolute_address, buffer_width, buffer_height, buffer_pitch, 4);
gl::command_context cmd = { gl_state };
const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch, 4);
verify(HERE), !overlap_info.empty();
if (overlap_info.back().surface == render_target_texture)
@ -1825,8 +1830,8 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
is_writing ? (can_flush ? rsx::invalidation_cause::write : rsx::invalidation_cause::deferred_write)
: (can_flush ? rsx::invalidation_cause::read : rsx::invalidation_cause::deferred_read);
gl::command_context null_cmd;
auto result = m_gl_texture_cache.invalidate_address(null_cmd, address, cause);
auto cmd = can_flush ? gl::command_context{ gl_state } : gl::command_context{};
auto result = m_gl_texture_cache.invalidate_address(cmd, address, cause);
if (!result.violation_handled)
return false;

View File

@ -375,30 +375,37 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
m_gl_texture_cache.clear_ro_tex_invalidate_intr();
//Mark buffer regions as NO_ACCESS on Cell visible side
if (g_cfg.video.write_color_buffers)
const auto color_format = rsx::internals::surface_color_format_to_gl(layout.color_format);
for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i)
{
auto color_format = rsx::internals::surface_color_format_to_gl(layout.color_format);
if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue;
for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i)
const auto surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]);
if (g_cfg.video.write_color_buffers)
{
if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue;
const utils::address_range surface_range = m_surface_info[i].get_memory_range(layout.aa_factors[1]);
// Mark buffer regions as NO_ACCESS on Cell-visible side
m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_render_targets[i]), surface_range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
std::tuple<>{}, color_format.format, color_format.type, color_format.swap_bytes);
}
else
{
m_gl_texture_cache.commit_framebuffer_memory_region(cmd, surface_range);
}
}
if (g_cfg.video.write_depth_buffer)
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
{
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
const auto surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
if (g_cfg.video.write_depth_buffer)
{
const auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(layout.depth_format);
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
m_gl_texture_cache.lock_memory_region(cmd, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range, m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch,
std::tuple<>{}, depth_format_gl.format, depth_format_gl.type, true);
}
else
{
m_gl_texture_cache.commit_framebuffer_memory_region(cmd, surface_range);
}
}
if (m_gl_texture_cache.get_ro_tex_invalidate_intr())
@ -585,6 +592,8 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init
{
gl::g_hw_blitter->fast_clear_image(cmd, this, {});
}
on_write();
}
return;

View File

@ -107,6 +107,19 @@ namespace gl
return surface_height;
}
bool is_depth_surface() const override
{
switch (get_internal_format())
{
case gl::texture::internal_format::depth16:
case gl::texture::internal_format::depth24_stencil8:
case gl::texture::internal_format::depth32f_stencil8:
return true;
default:
return false;
}
}
texture* get_surface() override
{
return (gl::texture*)this;

View File

@ -387,6 +387,7 @@ namespace gl
if (synchronized)
return;
verify(HERE), cmd.drv;
copy_texture(cmd, blocking);
if (blocking)
@ -700,7 +701,7 @@ namespace gl
return result;
}
std::array<GLenum, 4> get_component_mapping(u32 gcm_format, rsx::texture_create_flags flags)
std::array<GLenum, 4> get_component_mapping(u32 gcm_format, rsx::texture_create_flags flags) const
{
switch (gcm_format)
{
@ -733,7 +734,7 @@ namespace gl
}
}
void copy_transfer_regions_impl(gl::command_context& cmd, gl::texture* dst_image, const std::vector<copy_region_descriptor>& sources)
void copy_transfer_regions_impl(gl::command_context& cmd, gl::texture* dst_image, const std::vector<copy_region_descriptor>& sources) const
{
for (const auto &slice : sources)
{
@ -759,6 +760,38 @@ namespace gl
}
}
gl::texture* get_template_from_collection_impl(const std::vector<copy_region_descriptor>& sections_to_transfer) const
{
gl::texture* result = nullptr;
for (const auto &section : sections_to_transfer)
{
if (!section.src)
continue;
if (!result)
{
result = section.src;
}
else
{
const auto set1 = result->get_native_component_layout();
const auto set2 = section.src->get_native_component_layout();
if (set1[0] != set2[0] ||
set1[1] != set2[1] ||
set1[2] != set2[2] ||
set1[3] != set2[3])
{
// TODO
// This requires a far more complex setup as its not always possible to mix and match without compute assistance
return nullptr;
}
}
}
return result;
}
protected:
gl::texture_view* create_temporary_subresource_view(gl::command_context&, gl::texture** src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h,
@ -909,6 +942,7 @@ namespace gl
gl::upload_texture(section->get_raw_texture()->id(), gcm_format, width, height, depth, mipmaps,
input_swizzled, type, subresource_layout);
section->last_write_tag = rsx::get_shared_tag();
return section;
}
@ -966,6 +1000,7 @@ namespace gl
return (ifmt == gl::texture::internal_format::depth24_stencil8 ||
ifmt == gl::texture::internal_format::depth32f_stencil8 ||
ifmt == gl::texture::internal_format::depth_stencil);
case CELL_GCM_TEXTURE_X16:
case CELL_GCM_TEXTURE_DEPTH16:
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
return (ifmt == gl::texture::internal_format::depth16 ||

View File

@ -1626,12 +1626,12 @@ void VKGSRender::end()
if (current_fp_metadata.referenced_textures_mask & (1 << i))
{
vk::image_view* view = nullptr;
if (rsx::method_registers.fragment_textures[i].enabled())
{
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
view = sampler_state->image_handle;
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
if (!view && sampler_state->external_subresource_desc.external_handle)
if (rsx::method_registers.fragment_textures[i].enabled() &&
sampler_state->validate())
{
if (view = sampler_state->image_handle; !view)
{
//Requires update, copy subresource
view = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc);
@ -1705,7 +1705,7 @@ void VKGSRender::end()
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
auto image_ptr = sampler_state->image_handle;
if (!image_ptr && sampler_state->external_subresource_desc.external_handle)
if (!image_ptr && sampler_state->validate())
{
image_ptr = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc);
m_vertex_textures_dirty[i] = true;
@ -2970,28 +2970,36 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
m_texture_cache.notify_surface_changed(layout.zeta_address);
}
if (g_cfg.video.write_color_buffers)
const auto color_fmt_info = vk::get_compatible_gcm_format(layout.color_format);
for (u8 index : m_draw_buffers)
{
const auto color_fmt_info = vk::get_compatible_gcm_format(layout.color_format);
for (u8 index : m_draw_buffers)
{
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]);
const utils::address_range surface_range = m_surface_info[index].get_memory_range(layout.aa_factors[1]);
if (g_cfg.video.write_color_buffers)
{
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_render_targets[index]), surface_range,
m_surface_info[index].width, m_surface_info[index].height, layout.actual_color_pitch[index], std::tuple<VkQueue>{ m_swapchain->get_graphics_queue() }, color_fmt_info.first, color_fmt_info.second);
}
else
{
m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue());
}
}
if (g_cfg.video.write_depth_buffer)
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
{
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
if (g_cfg.video.write_depth_buffer)
{
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16)? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
const utils::address_range surface_range = m_depth_surface_info.get_memory_range(layout.aa_factors[1]);
const u32 gcm_format = (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) ? CELL_GCM_TEXTURE_DEPTH16 : CELL_GCM_TEXTURE_DEPTH24_D8;
m_texture_cache.lock_memory_region(*m_current_command_buffer, std::get<1>(m_rtts.m_bound_depth_stencil), surface_range,
m_depth_surface_info.width, m_depth_surface_info.height, layout.actual_zeta_pitch, std::tuple<VkQueue>{ m_swapchain->get_graphics_queue() }, gcm_format, false);
}
else
{
m_texture_cache.commit_framebuffer_memory_region(*m_current_command_buffer, surface_range, m_swapchain->get_graphics_queue());
}
}
auto vk_depth_format = (layout.zeta_address == 0) ? VK_FORMAT_UNDEFINED : vk::get_compatible_depth_surface_format(m_device->get_formats_support(), layout.depth_format);
@ -3301,7 +3309,7 @@ void VKGSRender::flip(int buffer)
}
else
{
const auto overlap_info = m_rtts.get_merged_texture_memory_region(absolute_address, buffer_width, buffer_height, buffer_pitch, 4);
const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, absolute_address, buffer_width, buffer_height, buffer_pitch, 4);
verify(HERE), !overlap_info.empty();
if (overlap_info.back().surface == render_target_texture)
@ -3341,16 +3349,14 @@ void VKGSRender::flip(int buffer)
{
// Read from cell
const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height);
const auto overlap = m_texture_cache.find_texture_from_range(range);
const u32 lookup_mask = rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::framebuffer_storage;
const auto overlap = m_texture_cache.find_texture_from_range(range, 0, lookup_mask);
bool flush_queue = false;
for (const auto & section : overlap)
{
if (section->get_protection() == utils::protection::no)
{
section->copy_texture(*m_current_command_buffer, false, m_swapchain->get_graphics_queue());
flush_queue = true;
}
section->copy_texture(*m_current_command_buffer, false, m_swapchain->get_graphics_queue());
flush_queue = true;
}
if (flush_queue)

View File

@ -51,6 +51,11 @@ namespace vk
return native_pitch;
}
bool is_depth_surface() const override
{
return !!(attachment_aspect_flag & VK_IMAGE_ASPECT_DEPTH_BIT);
}
bool matches_dimensions(u16 _width, u16 _height) const
{
//Use forward scaling to account for rounding and clamping errors

View File

@ -123,7 +123,7 @@ namespace vk
if (src->current_layout != preferred_src_format)
change_image_layout(cmd, src->value, src_layout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect));
if (dst->current_layout != preferred_dst_format)
if (dst->current_layout != preferred_dst_format && src != dst)
change_image_layout(cmd, dst->value, dst_layout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect));
auto scratch_buf = vk::get_scratch_buffer();
@ -196,7 +196,7 @@ namespace vk
if (src_layout != preferred_src_format)
change_image_layout(cmd, src->value, preferred_src_format, src_layout, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect));
if (dst_layout != preferred_dst_format)
if (dst_layout != preferred_dst_format && src != dst)
change_image_layout(cmd, dst->value, preferred_dst_format, dst_layout, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect));
}
@ -231,7 +231,7 @@ namespace vk
if (srcLayout != preferred_src_format)
change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect));
if (dstLayout != preferred_dst_format)
if (dstLayout != preferred_dst_format && src != dst)
change_image_layout(cmd, dst, dstLayout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect));
for (u32 mip_level = 0; mip_level < mipmaps; ++mip_level)
@ -245,7 +245,7 @@ namespace vk
if (srcLayout != preferred_src_format)
change_image_layout(cmd, src, preferred_src_format, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, src_aspect));
if (dstLayout != preferred_dst_format)
if (dstLayout != preferred_dst_format && src != dst)
change_image_layout(cmd, dst, preferred_dst_format, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, dst_aspect));
}
@ -272,7 +272,7 @@ namespace vk
if (srcLayout != preferred_src_format)
change_image_layout(cmd, src, srcLayout, preferred_src_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
if (dstLayout != preferred_dst_format)
if (dstLayout != preferred_dst_format && src != dst)
change_image_layout(cmd, dst, dstLayout, preferred_dst_format, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
if (compatible_formats && src_width == dst_width && src_height == dst_height)
@ -296,7 +296,7 @@ namespace vk
}
else
{
auto stretch_image_typeless_unsafe = [&cmd, preferred_src_format, preferred_dst_format](VkImage src, VkImage dst, VkImage typeless,
auto stretch_image_typeless_unsafe = [&cmd, preferred_src_format, preferred_dst_format, filter](VkImage src, VkImage dst, VkImage typeless,
const areai& src_rect, const areai& dst_rect, VkImageAspectFlags aspect, VkImageAspectFlags transfer_flags = 0xFF)
{
const u32 src_w = u32(src_rect.x2 - src_rect.x1);
@ -314,14 +314,14 @@ namespace vk
//2. Blit typeless surface to self
copy_scaled_image(cmd, typeless, typeless, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL,
0, 0, src_w, src_h, 0, src_h, dst_w, dst_h, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT, VK_FILTER_NEAREST);
0, 0, src_w, src_h, 0, src_h, dst_w, dst_h, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT, filter);
//3. Copy back the aspect bits
copy_image(cmd, typeless, dst, VK_IMAGE_LAYOUT_GENERAL, preferred_dst_format,
{0, (s32)src_h, (s32)dst_w, s32(src_h + dst_h) }, dst_rect, 1, VK_IMAGE_ASPECT_COLOR_BIT, aspect, 0xFF, transfer_flags);
};
auto stretch_image_typeless_safe = [&cmd, preferred_src_format, preferred_dst_format](VkImage src, VkImage dst, VkImage typeless,
auto stretch_image_typeless_safe = [&cmd, preferred_src_format, preferred_dst_format, filter](VkImage src, VkImage dst, VkImage typeless,
const areai& src_rect, const areai& dst_rect, VkImageAspectFlags aspect, VkImageAspectFlags transfer_flags = 0xFF)
{
const u32 src_w = u32(src_rect.x2 - src_rect.x1);
@ -345,7 +345,7 @@ namespace vk
//2. Blit typeless surface to self
copy_scaled_image(cmd, typeless, typeless, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL,
0, 0, src_w, src_h, 0, src_h, dst_w, dst_h, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT, VK_FILTER_NEAREST);
0, 0, src_w, src_h, 0, src_h, dst_w, dst_h, 1, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT, filter);
//3. Copy back the aspect bits
info.imageExtent = { dst_w, dst_h, 1 };
@ -423,7 +423,7 @@ namespace vk
if (srcLayout != preferred_src_format)
change_image_layout(cmd, src, preferred_src_format, srcLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
if (dstLayout != preferred_dst_format)
if (dstLayout != preferred_dst_format && src != dst)
change_image_layout(cmd, dst, preferred_dst_format, dstLayout, vk::get_image_subresource_range(0, 0, 1, 1, aspect));
}

View File

@ -457,7 +457,7 @@ namespace vk
m_discarded_memory_size = 0;
}
VkComponentMapping apply_component_mapping_flags(u32 gcm_format, rsx::texture_create_flags flags, const texture_channel_remap_t& remap_vector)
VkComponentMapping apply_component_mapping_flags(u32 gcm_format, rsx::texture_create_flags flags, const texture_channel_remap_t& remap_vector) const
{
switch (gcm_format)
{
@ -496,7 +496,7 @@ namespace vk
return mapping;
}
void copy_transfer_regions_impl(vk::command_buffer& cmd, vk::image* dst, const std::vector<copy_region_descriptor>& sections_to_transfer)
void copy_transfer_regions_impl(vk::command_buffer& cmd, vk::image* dst, const std::vector<copy_region_descriptor>& sections_to_transfer) const
{
for (const auto &section : sections_to_transfer)
{
@ -515,10 +515,19 @@ namespace vk
copy_rgn.srcOffset = { section.src_x, section.src_y, 0 };
copy_rgn.dstOffset = { section.dst_x, section.dst_y, 0 };
copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, section.dst_z, 1 };
copy_rgn.dstSubresource = { dst_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 };
copy_rgn.srcSubresource = { src_aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1 };
copy_rgn.extent = { section.src_w, section.src_h, 1 };
if (dst->info.imageType == VK_IMAGE_TYPE_3D)
{
copy_rgn.dstOffset.z = section.dst_z;
}
else
{
copy_rgn.dstSubresource.baseArrayLayer = section.dst_z;
}
vk::change_image_layout(cmd, section.src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, src_range);
vkCmdCopyImage(cmd, section.src->value, section.src->current_layout, dst->value, dst->current_layout, 1, &copy_rgn);
vk::change_image_layout(cmd, section.src, old_src_layout, src_range);
@ -579,6 +588,35 @@ namespace vk
}
}
vk::image* get_template_from_collection_impl(const std::vector<copy_region_descriptor>& sections_to_transfer) const
{
vk::image* result = nullptr;
for (const auto &section : sections_to_transfer)
{
if (!section.src)
continue;
if (!result)
{
result = section.src;
}
else
{
if (section.src->native_component_map.a != result->native_component_map.a ||
section.src->native_component_map.r != result->native_component_map.r ||
section.src->native_component_map.g != result->native_component_map.g ||
section.src->native_component_map.b != result->native_component_map.b)
{
// TODO
// This requires a far more complex setup as its not always possible to mix and match without compute assistance
return nullptr;
}
}
}
return result;
}
protected:
vk::image_view* create_temporary_subresource_view_impl(vk::command_buffer& cmd, vk::image* source, VkImageType image_type, VkImageViewType view_type,
u32 gcm_format, u16 x, u16 y, u16 w, u16 h, const texture_channel_remap_t& remap_vector, bool copy)
@ -587,7 +625,7 @@ namespace vk
std::unique_ptr<vk::image_view> view;
VkImageAspectFlags aspect;
VkImageCreateFlags image_flags;
VkImageCreateFlags image_flags = (view_type == VK_IMAGE_VIEW_TYPE_CUBE) ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0;
VkFormat dst_format = vk::get_compatible_sampler_format(m_formats_support, gcm_format);
if (source)
@ -599,13 +637,10 @@ namespace vk
//HACK! Should use typeless transfer
dst_format = source->info.format;
}
image_flags = source->info.flags;
}
else
{
aspect = vk::get_aspect_flags(dst_format);
image_flags = (view_type == VK_IMAGE_VIEW_TYPE_CUBE)? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0;
}
image.reset(new vk::viewable_image(*vk::get_current_renderer(), m_memory_types.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
@ -772,13 +807,25 @@ namespace vk
auto result = create_temporary_subresource_view_impl(cmd, nullptr, VK_IMAGE_TYPE_2D,
VK_IMAGE_VIEW_TYPE_2D, gcm_format, 0, 0, width, height, remap_vector, false);
const auto image = result->image();
VkImageAspectFlags dst_aspect = vk::get_aspect_flags(result->info.format);
VkImageSubresourceRange dst_range = { dst_aspect, 0, 1, 0, 1 };
vk::change_image_layout(cmd, result->image(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range);
vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, dst_range);
copy_transfer_regions_impl(cmd, result->image(), sections_to_copy);
if (!(dst_aspect & VK_IMAGE_ASPECT_DEPTH_BIT))
{
VkClearColorValue clear = {};
vkCmdClearColorImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range);
}
else
{
VkClearDepthStencilValue clear = { 1.f, 0 };
vkCmdClearDepthStencilImage(cmd, image->value, image->current_layout, &clear, 1, &dst_range);
}
vk::change_image_layout(cmd, result->image(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range);
copy_transfer_regions_impl(cmd, image, sections_to_copy);
vk::change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, dst_range);
return result;
}
@ -940,6 +987,7 @@ namespace vk
change_image_layout(cmd, image, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, subres_range);
section->last_write_tag = rsx::get_shared_tag();
return section;
}
@ -988,6 +1036,7 @@ namespace vk
case CELL_GCM_TEXTURE_DEPTH24_D8:
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT:
return (vk_format == VK_FORMAT_D24_UNORM_S8_UINT || vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT);
case CELL_GCM_TEXTURE_X16:
case CELL_GCM_TEXTURE_DEPTH16:
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
return (vk_format == VK_FORMAT_D16_UNORM);