From 003c62ba738e8d4f31da550cf6394c487a03a629 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 22 May 2022 18:33:59 +0300 Subject: [PATCH] [GPU] Correct rounding of texture load row size The original multiplication was likely added early during the development of generic resolution scaling. Before generic resolution scaling, invocations were done for unscaled guest blocks, now they're done for scaled blocks, so with 3x1 scaling, an invocation for 8 blocks writes 8 host blocks, not 24. --- src/xenia/gpu/d3d12/d3d12_texture_cache.cc | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc index 46da10934..b5c8dfb47 100644 --- a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc +++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc @@ -1538,12 +1538,11 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, UINT64 host_slice_sizes_mips[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1]; { // Using custom calculations instead of GetCopyableFootprints because - // shaders may copy multiple blocks per thread for simplicity. For 3x - // resolution scaling, the number becomes a multiple of 3 rather than a - // power of 2 - so the 256-byte alignment required anyway by Direct3D 12 is - // not enough. GetCopyableFootprints would be needed to be called with an - // overaligned width - but it may exceed 16384 (the maximum Direct3D 12 - // texture size) for 3x resolution scaling, and the function will fail. + // shaders may unconditionally copy multiple blocks along X per thread for + // simplicity, to make sure all rows (also including the last one - + // GetCopyableFootprints aligns row offsets, but not the total size) are + // properly padded to the number of blocks copied in an invocation without + // implicit assumptions about D3D12_TEXTURE_DATA_PITCH_ALIGNMENT. DXGI_FORMAT host_copy_format; uint32_t host_block_width; uint32_t host_block_height; @@ -1576,8 +1575,7 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, host_slice_layout_base.Footprint.RowPitch = xe::align(xe::round_up(host_slice_layout_base.Footprint.Width / host_block_width, - load_mode_info.host_x_blocks_per_thread * - texture_resolution_scale_x) * + load_mode_info.host_x_blocks_per_thread) * host_bytes_per_block, uint32_t(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)); host_slice_size_base = xe::align( @@ -1622,8 +1620,7 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture, host_slice_layout_mip.Footprint.RowPitch = xe::align(xe::round_up(host_slice_layout_mip.Footprint.Width / host_block_width, - load_mode_info.host_x_blocks_per_thread * - texture_resolution_scale_x) * + load_mode_info.host_x_blocks_per_thread) * host_bytes_per_block, uint32_t(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)); UINT64 host_slice_sizes_mip = xe::align(