[GPU] Correct rounding of texture load row size

The original multiplication was likely added early during the development of generic resolution scaling. Before generic resolution scaling, invocations were done for unscaled guest blocks, now they're done for scaled blocks, so with 3x1 scaling, an invocation for 8 blocks writes 8 host blocks, not 24.
2022-05-22 18:33:59 +03:00 · 2022-05-22 18:33:59 +03:00 · 003c62ba73
parent 6aa30ed074
commit 003c62ba73
1 changed files with 7 additions and 10 deletions
--- a/src/xenia/gpu/d3d12/d3d12_texture_cache.cc
+++ b/src/xenia/gpu/d3d12/d3d12_texture_cache.cc
@ -1538,12 +1538,11 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
  UINT64 host_slice_sizes_mips[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1];
  {
    // Using custom calculations instead of GetCopyableFootprints because
-    // shaders may copy multiple blocks per thread for simplicity. For 3x
+    // shaders may unconditionally copy multiple blocks along X per thread for
-    // resolution scaling, the number becomes a multiple of 3 rather than a
+    // simplicity, to make sure all rows (also including the last one -
-    // power of 2 - so the 256-byte alignment required anyway by Direct3D 12 is
+    // GetCopyableFootprints aligns row offsets, but not the total size) are
-    // not enough. GetCopyableFootprints would be needed to be called with an
+    // properly padded to the number of blocks copied in an invocation without
-    // overaligned width - but it may exceed 16384 (the maximum Direct3D 12
+    // implicit assumptions about D3D12_TEXTURE_DATA_PITCH_ALIGNMENT.
    // texture size) for 3x resolution scaling, and the function will fail.
    DXGI_FORMAT host_copy_format;
    uint32_t host_block_width;
    uint32_t host_block_height;
@ -1576,8 +1575,7 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
      host_slice_layout_base.Footprint.RowPitch =
          xe::align(xe::round_up(host_slice_layout_base.Footprint.Width /
                                     host_block_width,
-                                 load_mode_info.host_x_blocks_per_thread *
+                                 load_mode_info.host_x_blocks_per_thread) *
                                     texture_resolution_scale_x) *
                        host_bytes_per_block,
                    uint32_t(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT));
      host_slice_size_base = xe::align(
@ -1622,8 +1620,7 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
        host_slice_layout_mip.Footprint.RowPitch =
            xe::align(xe::round_up(host_slice_layout_mip.Footprint.Width /
                                       host_block_width,
-                                   load_mode_info.host_x_blocks_per_thread *
+                                   load_mode_info.host_x_blocks_per_thread) *
                                       texture_resolution_scale_x) *
                          host_bytes_per_block,
                      uint32_t(D3D12_TEXTURE_DATA_PITCH_ALIGNMENT));
        UINT64 host_slice_sizes_mip = xe::align(