rsx: Fix texture readback

- gl: Fix up the calculation for internal image pitch
- vk: Implement GPU-side resizing for read back textures (fixes WCB zoom)
This commit is contained in:
kd-11 2018-06-21 18:28:53 +03:00 committed by kd-11
parent 278cb52f19
commit f45dcfe18a
3 changed files with 183 additions and 69 deletions

View File

@ -1469,6 +1469,8 @@ namespace gl
GLuint m_height = 0;
GLuint m_depth = 0;
GLuint m_mipmaps = 0;
GLuint m_pitch = 0;
GLuint m_compressed = GL_FALSE;
target m_target = target::texture2D;
internal_format m_internal_format = internal_format::rgba8;
@ -1553,6 +1555,26 @@ namespace gl
m_height = height;
m_depth = depth;
m_mipmaps = mipmaps;
GLenum query_target = (target == GL_TEXTURE_CUBE_MAP) ? GL_TEXTURE_CUBE_MAP_POSITIVE_X : target;
glGetTexLevelParameteriv(query_target, 0, GL_TEXTURE_COMPRESSED, (GLint*)&m_compressed);
if (m_compressed)
{
GLint compressed_size;
glGetTexLevelParameteriv(query_target, 0, GL_TEXTURE_COMPRESSED_IMAGE_SIZE, &compressed_size);
m_pitch = compressed_size / height;
}
else
{
GLint r, g, b, a;
glGetTexLevelParameteriv(query_target, 0, GL_TEXTURE_RED_SIZE, &r);
glGetTexLevelParameteriv(query_target, 0, GL_TEXTURE_GREEN_SIZE, &g);
glGetTexLevelParameteriv(query_target, 0, GL_TEXTURE_BLUE_SIZE, &b);
glGetTexLevelParameteriv(query_target, 0, GL_TEXTURE_ALPHA_SIZE, &a);
m_pitch = width * (r + g + b + a) / 8;
}
}
m_target = static_cast<texture::target>(target);
@ -1622,6 +1644,16 @@ namespace gl
return m_mipmaps;
}
GLuint pitch() const
{
return m_pitch;
}
GLboolean compressed() const
{
return m_compressed;
}
sizei size2D() const
{
return{ (int)m_width, (int)m_height };

View File

@ -296,6 +296,7 @@ namespace gl
this->width = w;
this->height = h;
this->rsx_pitch = rsx_pitch;
this->real_pitch = 0;
this->depth = depth;
this->mipmaps = mipmaps;
@ -330,7 +331,6 @@ namespace gl
this->width = width;
this->height = height;
rsx_pitch = pitch;
real_pitch = width * get_pixel_size(format, type);
}
void set_format(texture::format gl_format, texture::type gl_type, bool swap_bytes)
@ -351,8 +351,6 @@ namespace gl
break;
}
}
real_pitch = width * get_pixel_size(format, type);
}
void set_depth_flag(bool is_depth_fmt)
@ -374,7 +372,7 @@ namespace gl
gl::texture* target_texture = vram_texture;
if ((rsx::get_resolution_scale_percent() != 100 && context == rsx::texture_upload_context::framebuffer_storage) ||
(real_pitch != rsx_pitch))
(vram_texture->pitch() != rsx_pitch))
{
u32 real_width = width;
u32 real_height = height;
@ -435,6 +433,7 @@ namespace gl
pack_settings.swap_bytes(pack_unpack_swap_bytes);
target_texture->copy_to(nullptr, format, type, pack_settings);
real_pitch = target_texture->pitch();
if (auto error = glGetError())
{
@ -495,6 +494,8 @@ namespace gl
result = false;
}
verify(HERE), real_pitch > 0;
m_fence.wait_for_signal();
flushed = true;
@ -514,7 +515,7 @@ namespace gl
require_manual_shuffle = true;
}
if (real_pitch >= rsx_pitch || scaled_texture != 0 || valid_range.second <= rsx_pitch)
if (real_pitch >= rsx_pitch || valid_range.second <= rsx_pitch)
{
memcpy(dst, src, valid_range.second);
}

View File

@ -66,8 +66,6 @@ namespace vk
else
this->rsx_pitch = cpu_address_range / height;
real_pitch = vk::get_format_texel_width(image->info.format) * width;
//Even if we are managing the same vram section, we cannot guarantee contents are static
//The create method is only invoked when a new mangaged session is required
synchronized = false;
@ -166,25 +164,61 @@ namespace vk
cmd.begin();
}
const u16 internal_width = (context != rsx::texture_upload_context::framebuffer_storage? width : std::min(width, rsx::apply_resolution_scale(width, true)));
const u16 internal_height = (context != rsx::texture_upload_context::framebuffer_storage? height : std::min(height, rsx::apply_resolution_scale(height, true)));
vk::image *target = vram_texture;
real_pitch = vk::get_format_texel_width(vram_texture->info.format) * vram_texture->width();
VkImageAspectFlags aspect_flag = vk::get_aspect_flags(vram_texture->info.format);
//TODO: Read back stencil values (is this really necessary?)
VkBufferImageCopy copyRegion = {};
copyRegion.bufferOffset = 0;
copyRegion.bufferRowLength = internal_width;
copyRegion.bufferImageHeight = internal_height;
copyRegion.imageSubresource = {aspect_flag & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1};
copyRegion.imageOffset = {};
copyRegion.imageExtent = {internal_width, internal_height, 1};
VkImageSubresourceRange subresource_range = { aspect_flag, 0, 1, 0, 1 };
u32 transfer_width = width;
u32 transfer_height = height;
VkImageLayout layout = vram_texture->current_layout;
VkImageLayout old_layout = vram_texture->current_layout;
change_image_layout(cmd, vram_texture, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range);
vkCmdCopyImageToBuffer(cmd, vram_texture->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dma_buffer->value, 1, &copyRegion);
change_image_layout(cmd, vram_texture, layout, subresource_range);
if ((rsx::get_resolution_scale_percent() != 100 && context == rsx::texture_upload_context::framebuffer_storage) ||
(real_pitch != rsx_pitch))
{
if (context == rsx::texture_upload_context::framebuffer_storage)
{
switch (static_cast<vk::render_target*>(vram_texture)->read_aa_mode)
{
case rsx::surface_antialiasing::center_1_sample:
break;
case rsx::surface_antialiasing::diagonal_centered_2_samples:
transfer_width *= 2;
break;
default:
transfer_width *= 2;
transfer_height *= 2;
break;
}
}
if (transfer_width != vram_texture->width() || transfer_height != vram_texture->height())
{
// TODO: Synchronize access to typeles textures
target = vk::get_typeless_helper(vram_texture->info.format);
vk::copy_scaled_image(cmd, vram_texture->value, target->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, target->current_layout,
0, 0, vram_texture->width(), vram_texture->height(), 0, 0, transfer_width, transfer_height, 1, aspect_flag, true, VK_FILTER_NEAREST,
vram_texture->info.format, target->info.format);
}
}
if (target->current_layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
{
// Using a scaled intermediary
verify(HERE), target != vram_texture;
change_image_layout(cmd, target, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range);
}
// TODO: Read back stencil values (is this really necessary?)
VkBufferImageCopy region = {};
region.imageSubresource = {aspect_flag & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 0, 1};
region.imageExtent = {transfer_width, transfer_height, 1};
vkCmdCopyImageToBuffer(cmd, target->value, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dma_buffer->value, 1, &region);
change_image_layout(cmd, vram_texture, old_layout, subresource_range);
real_pitch = vk::get_format_texel_width(vram_texture->info.format) * transfer_width;
if (manage_cb_lifetime)
{
@ -205,7 +239,7 @@ namespace vk
}
template<typename T, bool swapped>
void do_memory_transfer(void *pixels_dst, const void *pixels_src, u32 max_length)
void do_memory_transfer_packed(void *pixels_dst, const void *pixels_src, u32 max_length)
{
if (sizeof(T) == 1 || !swapped)
{
@ -222,6 +256,38 @@ namespace vk
}
}
template<typename T, bool swapped>
void do_memory_transfer_padded(void *pixels_dst, const void *pixels_src, u32 src_pitch, u32 dst_pitch, u32 num_rows)
{
auto src = (char*)pixels_src;
auto dst = (char*)pixels_dst;
if (sizeof(T) == 1 || !swapped)
{
for (u32 y = 0; y < num_rows; ++y)
{
memcpy(dst, src, src_pitch);
src += src_pitch;
dst += dst_pitch;
}
}
else
{
const u32 block_size = src_pitch / sizeof(T);
for (u32 y = 0; y < num_rows; ++y)
{
auto typed_dst = (be_t<T> *)dst;
auto typed_src = (T *)src;
for (u32 px = 0; px < block_size; ++px)
typed_dst[px] = typed_src[px];
src += src_pitch;
dst += dst_pitch;
}
}
}
bool flush(vk::command_buffer& cmd, VkQueue submit_queue)
{
if (flushed) return true;
@ -241,6 +307,7 @@ namespace vk
result = false;
}
verify(HERE), real_pitch > 0;
flushed = true;
const auto valid_range = get_confirmed_range();
@ -250,67 +317,81 @@ namespace vk
const auto texel_layout = vk::get_format_element_size(vram_texture->info.format);
const auto elem_size = texel_layout.first;
//We have to do our own byte swapping since the driver doesnt do it for us
if (real_pitch == rsx_pitch)
{
bool is_depth_format = true;
switch (vram_texture->info.format)
{
case VK_FORMAT_D32_SFLOAT_S8_UINT:
rsx::convert_le_f32_to_be_d24(pixels_dst, pixels_src, valid_range.second >> 2, 1);
break;
case VK_FORMAT_D24_UNORM_S8_UINT:
rsx::convert_le_d24x8_to_be_d24x8(pixels_dst, pixels_src, valid_range.second >> 2, 1);
break;
default:
is_depth_format = false;
break;
}
if (!is_depth_format)
auto memory_transfer_packed = [=]()
{
switch (elem_size)
{
default:
LOG_ERROR(RSX, "Invalid element width %d", elem_size);
case 1:
do_memory_transfer<u8, false>(pixels_dst, pixels_src, valid_range.second);
do_memory_transfer_packed<u8, false>(pixels_dst, pixels_src, valid_range.second);
break;
case 2:
if (pack_unpack_swap_bytes)
do_memory_transfer<u16, true>(pixels_dst, pixels_src, valid_range.second);
do_memory_transfer_packed<u16, true>(pixels_dst, pixels_src, valid_range.second);
else
do_memory_transfer<u16, false>(pixels_dst, pixels_src, valid_range.second);
do_memory_transfer_packed<u16, false>(pixels_dst, pixels_src, valid_range.second);
break;
case 4:
if (pack_unpack_swap_bytes)
do_memory_transfer<u32, true>(pixels_dst, pixels_src, valid_range.second);
do_memory_transfer_packed<u32, true>(pixels_dst, pixels_src, valid_range.second);
else
do_memory_transfer<u32, false>(pixels_dst, pixels_src, valid_range.second);
break;
}
}
}
else
{
//Scale image to fit
//usually we can just get away with nearest filtering
u8 samples_u = 1, samples_v = 1;
switch (static_cast<vk::render_target*>(vram_texture)->read_aa_mode)
{
case rsx::surface_antialiasing::diagonal_centered_2_samples:
samples_u = 2;
break;
case rsx::surface_antialiasing::square_centered_4_samples:
case rsx::surface_antialiasing::square_rotated_4_samples:
samples_u = 2;
samples_v = 2;
do_memory_transfer_packed<u32, false>(pixels_dst, pixels_src, valid_range.second);
break;
}
};
const u16 row_length = u16(width * texel_layout.second);
const u16 usable_height = (valid_range.second / rsx_pitch) / samples_v;
rsx::scale_image_nearest(pixels_dst, pixels_src, row_length, usable_height, rsx_pitch, real_pitch, elem_size, samples_u, samples_v, pack_unpack_swap_bytes);
auto memory_transfer_padded = [=]()
{
const u32 num_rows = valid_range.second / rsx_pitch;
switch (elem_size)
{
default:
LOG_ERROR(RSX, "Invalid element width %d", elem_size);
case 1:
do_memory_transfer_padded<u8, false>(pixels_dst, pixels_src, real_pitch, rsx_pitch, num_rows);
break;
case 2:
if (pack_unpack_swap_bytes)
do_memory_transfer_padded<u16, true>(pixels_dst, pixels_src, real_pitch, rsx_pitch, num_rows);
else
do_memory_transfer_padded<u16, false>(pixels_dst, pixels_src, real_pitch, rsx_pitch, num_rows);
break;
case 4:
if (pack_unpack_swap_bytes)
do_memory_transfer_padded<u32, true>(pixels_dst, pixels_src, real_pitch, rsx_pitch, num_rows);
else
do_memory_transfer_padded<u32, false>(pixels_dst, pixels_src, real_pitch, rsx_pitch, num_rows);
break;
}
};
// NOTE: We have to do our own byte swapping since the driver doesnt do it for us
// TODO: Replace the cpu-side transformations with trivial compute pipelines
if (real_pitch >= rsx_pitch || valid_range.second <= rsx_pitch)
{
switch (vram_texture->info.format)
{
case VK_FORMAT_D32_SFLOAT_S8_UINT:
{
rsx::convert_le_f32_to_be_d24(pixels_dst, pixels_src, valid_range.second >> 2, 1);
break;
}
case VK_FORMAT_D24_UNORM_S8_UINT:
{
rsx::convert_le_d24x8_to_be_d24x8(pixels_dst, pixels_src, valid_range.second >> 2, 1);
break;
}
default:
{
memory_transfer_packed();
break;
}
}
}
else
{
memory_transfer_padded();
switch (vram_texture->info.format)
{