From fd8d2ecbf4033f7b066c576b6a8b5c06db47b064 Mon Sep 17 00:00:00 2001 From: pauls-gh Date: Wed, 21 Mar 2018 21:52:01 -0700 Subject: [PATCH] Remove Volume Texture Compression (VTC) tiling for Vulkan, DX12 and ATI (OpenGL). --- rpcs3/Emu/RSX/Common/TextureUtils.cpp | 55 +++++++++++++++++++++++++-- rpcs3/Emu/RSX/Common/TextureUtils.h | 2 +- rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp | 2 +- rpcs3/Emu/RSX/GL/GLTexture.cpp | 18 +++++---- rpcs3/Emu/RSX/VK/VKTexture.cpp | 2 +- 5 files changed, 65 insertions(+), 14 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index a170584c3e..859baca0b0 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -53,6 +53,38 @@ struct copy_unmodified_block_swizzled } }; +struct copy_unmodified_block_vtc +{ + template + static void copy_mipmap_level(gsl::span dst, gsl::span src, u16 width_in_block, u16 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block) + { + + static_assert(sizeof(T) == sizeof(U), "Type size doesn't match."); + u32 row_element_count = width_in_block * row_count; + u32 dst_offset = 0; + u32 src_offset = 0; + const u32 vtc_tile_row_count = 4; + + // Undo Nvidia VTC tiling - place each 2D texture slice back to back in linear memory + for (int d = 0; d < depth; d++) { + + // copy one slice of the 3d texture + for (u32 i = 0; i < row_element_count; i += 1) { + copy(dst.subspan(dst_offset + i, 1), src.subspan(src_offset + i * vtc_tile_row_count, 1)); + } + + dst_offset += row_element_count; + + if (d && ((d & (vtc_tile_row_count - 1)) == 0)) { + src_offset += row_element_count; + } + else { + src_offset += 1; + } + } + } +}; + namespace { /** @@ -199,7 +231,7 @@ std::vector get_subresources_layout(const rsx::vertex_te return get_subresources_layout_impl(texture); } -void upload_texture_subresource(gsl::span dst_buffer, const rsx_subresource_layout &src_layout, int format, bool is_swizzled, size_t dst_row_pitch_multiple_of) +void upload_texture_subresource(gsl::span dst_buffer, const rsx_subresource_layout &src_layout, int format, bool is_swizzled, bool vtc_support, size_t dst_row_pitch_multiple_of) { u16 w = src_layout.width_in_block; u16 h = src_layout.height_in_block; @@ -270,12 +302,29 @@ void upload_texture_subresource(gsl::span dst_buffer, const rsx_subre break; case CELL_GCM_TEXTURE_COMPRESSED_DXT1: - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + if (depth > 1 && !vtc_support) { + // PS3 uses VTC memory layout for compressed 3d texture + // Remove the VTC tiling to support ATI and Vulkan + copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + } + else { + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + } break; case CELL_GCM_TEXTURE_COMPRESSED_DXT23: case CELL_GCM_TEXTURE_COMPRESSED_DXT45: - copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + if (depth > 1 && !vtc_support) { + // PS3 uses VTC memory layout for compressed 3d texture + // This is only supported using Nvidia OpenGL. + // Remove the VTC tiling to support ATI and Vulkan + copy_unmodified_block_vtc::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + + } + else { + copy_unmodified_block::copy_mipmap_level(as_span_workaround(dst_buffer), gsl::as_span(src_layout.data), w, h, depth, get_row_pitch_in_block(w, dst_row_pitch_multiple_of), src_layout.pitch_in_bytes); + + } break; default: diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index d60c69b75e..00a150499e 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -50,7 +50,7 @@ size_t get_placed_texture_storage_size(const rsx::vertex_texture &texture, size_ std::vector get_subresources_layout(const rsx::fragment_texture &texture); std::vector get_subresources_layout(const rsx::vertex_texture &texture); -void upload_texture_subresource(gsl::span dst_buffer, const rsx_subresource_layout &src_layout, int format, bool is_swizzled, size_t dst_row_pitch_multiple_of); +void upload_texture_subresource(gsl::span dst_buffer, const rsx_subresource_layout &src_layout, int format, bool is_swizzled, bool vtc_support, size_t dst_row_pitch_multiple_of); u8 get_format_block_size_in_bytes(int format); u8 get_format_block_size_in_texel(int format); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 9cf2e25836..4248f60ccd 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -115,7 +115,7 @@ namespace { size_t offset_in_buffer = 0; for (const rsx_subresource_layout &layout : input_layouts) { - upload_texture_subresource(mapped_buffer.subspan(offset_in_buffer), layout, format, is_swizzled, 256); + upload_texture_subresource(mapped_buffer.subspan(offset_in_buffer), layout, format, is_swizzled, false, 256); UINT row_pitch = align(layout.width_in_block * block_size_in_bytes, 256); command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(existing_texture, (UINT)mip_level), 0, 0, 0, &CD3DX12_TEXTURE_COPY_LOCATION(texture_buffer_heap.get_heap(), diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 6120196741..4e6fa01d72 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -367,6 +367,8 @@ namespace gl const std::vector &input_layouts, bool is_swizzled, GLenum gl_format, GLenum gl_type, std::vector& staging_buffer) { int mip_level = 0; + bool vtc_support = gl::get_driver_caps().vendor_NVIDIA; + if (is_compressed_format(format)) { //Compressed formats have a 4-byte alignment @@ -381,7 +383,7 @@ namespace gl { for (const rsx_subresource_layout &layout : input_layouts) { - upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); + upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4); glTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block, gl_format, gl_type, staging_buffer.data()); } } @@ -390,7 +392,7 @@ namespace gl for (const rsx_subresource_layout &layout : input_layouts) { u32 size = layout.width_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); - upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); + upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4); glCompressedTexSubImage1D(GL_TEXTURE_1D, mip_level++, 0, layout.width_in_block * 4, gl_format, size, staging_buffer.data()); } } @@ -403,7 +405,7 @@ namespace gl { for (const rsx_subresource_layout &layout : input_layouts) { - upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); + upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4); glTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block, layout.height_in_block, gl_format, gl_type, staging_buffer.data()); } } @@ -412,7 +414,7 @@ namespace gl for (const rsx_subresource_layout &layout : input_layouts) { u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); - upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); + upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4); glCompressedTexSubImage2D(GL_TEXTURE_2D, mip_level++, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, gl_format, size, staging_buffer.data()); } } @@ -428,7 +430,7 @@ namespace gl { for (const rsx_subresource_layout &layout : input_layouts) { - upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); + upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4); glTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block, layout.height_in_block, gl_format, gl_type, staging_buffer.data()); mip_level++; } @@ -438,7 +440,7 @@ namespace gl for (const rsx_subresource_layout &layout : input_layouts) { u32 size = layout.width_in_block * layout.height_in_block * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); - upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); + upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4); glCompressedTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + mip_level / mipmap_count, mip_level % mipmap_count, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, gl_format, size, staging_buffer.data()); mip_level++; } @@ -452,7 +454,7 @@ namespace gl { for (const rsx_subresource_layout &layout : input_layouts) { - upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); + upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4); glTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block, layout.height_in_block, depth, gl_format, gl_type, staging_buffer.data()); } } @@ -461,7 +463,7 @@ namespace gl for (const rsx_subresource_layout &layout : input_layouts) { u32 size = layout.width_in_block * layout.height_in_block * layout.depth * ((format == CELL_GCM_TEXTURE_COMPRESSED_DXT1) ? 8 : 16); - upload_texture_subresource(staging_buffer, layout, format, is_swizzled, 4); + upload_texture_subresource(staging_buffer, layout, format, is_swizzled, vtc_support, 4); glCompressedTexSubImage3D(GL_TEXTURE_3D, mip_level++, 0, 0, 0, layout.width_in_block * 4, layout.height_in_block * 4, layout.depth, gl_format, size, staging_buffer.data()); } } diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index 93d90fb2cb..8000f84370 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -162,7 +162,7 @@ namespace vk void *mapped_buffer = upload_heap.map(offset_in_buffer, image_linear_size); gsl::span mapped{ (gsl::byte*)mapped_buffer, ::narrow(image_linear_size) }; - upload_texture_subresource(mapped, layout, format, is_swizzled, 256); + upload_texture_subresource(mapped, layout, format, is_swizzled, false, 256); upload_heap.unmap(); VkBufferImageCopy copy_info = {};