[D3D12] Texture load code cleanup and resolution scaling fixes
The resolution scale is now taken into account when copying from the mip tail.
This commit is contained in:
@ -562,7 +562,7 @@ class D3D12CommandProcessor : public CommandProcessor {
// Unsubmitted barrier batch.
std::vector<D3D12_RESOURCE_BARRIER> barriers_;
// <Resource, submission where requested>, sorted by the submission number.
// <Submission where requested, resource>, sorted by the submission number.
std::deque<std::pair<uint64_t, ID3D12Resource*>> resources_for_deletion_;
static constexpr uint32_t kScratchBufferSizeIncrement = 16 * 1024 * 1024;
@ -1612,6 +1612,25 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
uint32_t texture_resolution_scale_y =
texture_resolution_scaled ? draw_resolution_scale_y() : 1;
// The loop counter can mean two things depending on whether the packed mip
// tail is stored as mip 0, because in this case, it would be ambiguous since
// both the base and the mips would be on "level 0", but stored in separate
// places.
uint32_t loop_level_first, loop_level_last;
if (level_packed == 0) {
// Packed mip tail is the level 0 - may need to load mip tails for the base,
// the mips, or both.
// Loop iteration 0 - base packed mip tail.
// Loop iteration 1 - mips packed mip tail.
loop_level_first = uint32_t(level_first != 0);
loop_level_last = uint32_t(level_last != 0);
} else {
// Packed mip tail is not the level 0.
// Loop iteration is the actual level being loaded.
loop_level_first = level_stored_first;
loop_level_last = level_stored_last;
// Get the host layout and the buffer.
bool host_block_compressed =
host_formats_[uint32_t(guest_format)].is_block_compressed &&
@ -1631,99 +1650,61 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
// 1...min(level_last, level_packed) if level_packed is not 0, or only 0 if
// level_packed == 0.
host_slice_layouts_mips[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1];
UINT64 host_slice_sizes_mips[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1];
// Using custom calculations instead of GetCopyableFootprints because
// shaders may unconditionally copy multiple blocks along X per thread for
// simplicity, to make sure all rows (also including the last one -
// GetCopyableFootprints aligns row offsets, but not the total size) are
// properly padded to the number of blocks copied in an invocation without
// implicit assumptions about D3D12_TEXTURE_DATA_PITCH_ALIGNMENT.
DXGI_FORMAT host_copy_format =
GetDXGIResourceFormat(guest_format, width, height);
if (!level_first) {
host_slice_layout_base.Offset = copy_buffer_size;
host_slice_layout_base.Footprint.Format = host_copy_format;
if (!level_packed) {
// Loading the packed tail for the base - load the whole tail to copy
// regions out of it.
host_slice_layout_base.Footprint.Width =
guest_layout.base.x_extent_blocks * block_width;
host_slice_layout_base.Footprint.Height =
guest_layout.base.y_extent_blocks * block_height;
host_slice_layout_base.Footprint.Depth = guest_layout.base.z_extent;
} else {
host_slice_layout_base.Footprint.Width = width;
host_slice_layout_base.Footprint.Height = height;
host_slice_layout_base.Footprint.Depth = depth;
host_slice_layout_base.Footprint.Width = xe::round_up(
host_slice_layout_base.Footprint.Width * texture_resolution_scale_x,
host_slice_layout_base.Footprint.Height = xe::round_up(
host_slice_layout_base.Footprint.Height * texture_resolution_scale_y,
host_slice_layout_base.Footprint.RowPitch =
xe::align(xe::round_up(host_slice_layout_base.Footprint.Width /
host_x_blocks_per_thread) *
host_slice_size_base = xe::align(
UINT64(host_slice_layout_base.Footprint.RowPitch) *
(host_slice_layout_base.Footprint.Height / host_block_height) *
copy_buffer_size += host_slice_size_base * array_size;
if (level_last) {
for (uint32_t level = level_stored_first; level <= level_stored_last;
++level) {
D3D12_PLACED_SUBRESOURCE_FOOTPRINT& host_slice_layout_mip =
host_slice_layout_mip.Offset = copy_buffer_size;
host_slice_layout_mip.Footprint.Format = host_copy_format;
if (level == level_packed) {
// Loading the packed tail for the mips - load the whole tail to copy
// regions out of it.
const texture_util::TextureGuestLayout::Level&
guest_layout_packed_mips = guest_layout.mips[level];
host_slice_layout_mip.Footprint.Width =
guest_layout_packed_mips.x_extent_blocks * block_width;
host_slice_layout_mip.Footprint.Height =
guest_layout_packed_mips.y_extent_blocks * block_height;
host_slice_layout_mip.Footprint.Depth =
} else {
host_slice_layout_mip.Footprint.Width =
std::max(width >> level, uint32_t(1));
host_slice_layout_mip.Footprint.Height =
std::max(height >> level, uint32_t(1));
host_slice_layout_mip.Footprint.Depth =
std::max(depth >> level, uint32_t(1));
host_slice_layout_mip.Footprint.Width = xe::round_up(
host_slice_layout_mip.Footprint.Width * texture_resolution_scale_x,
host_slice_layout_mip.Footprint.Height = xe::round_up(
host_slice_layout_mip.Footprint.Height * texture_resolution_scale_y,
host_slice_layout_mip.Footprint.RowPitch =
xe::align(xe::round_up(host_slice_layout_mip.Footprint.Width /
host_x_blocks_per_thread) *
UINT64 host_slice_sizes_mip = xe::align(
UINT64(host_slice_layout_mip.Footprint.RowPitch) *
(host_slice_layout_mip.Footprint.Height / host_block_height) *
host_slice_sizes_mips[level] = host_slice_sizes_mip;
copy_buffer_size += host_slice_sizes_mip * array_size;
UINT64 host_slice_sizes_mips[xenos::kTextureMaxMips];
// Using custom calculations instead of GetCopyableFootprints because
// shaders may unconditionally copy multiple blocks along X per thread for
// simplicity, to make sure all rows (also including the last one -
// GetCopyableFootprints aligns row offsets, but not the total size) are
// properly padded to the number of blocks copied in an invocation without
// implicit assumptions about D3D12_TEXTURE_DATA_PITCH_ALIGNMENT.
DXGI_FORMAT host_copy_format =
GetDXGIResourceFormat(guest_format, width, height);
for (uint32_t loop_level = loop_level_first; loop_level <= loop_level_last;
++loop_level) {
bool is_base = loop_level == 0;
uint32_t level = (level_packed == 0) ? 0 : loop_level;
D3D12_PLACED_SUBRESOURCE_FOOTPRINT& level_host_slice_layout =
is_base ? host_slice_layout_base : host_slice_layouts_mips[level];
level_host_slice_layout.Offset = copy_buffer_size;
level_host_slice_layout.Footprint.Format = host_copy_format;
if (level == level_packed) {
// Loading the packed tail for the base or the mips - load the whole tail
// to copy regions out of it.
const texture_util::TextureGuestLayout::Level& guest_layout_packed =
is_base ? guest_layout.base : guest_layout.mips[level];
level_host_slice_layout.Footprint.Width =
guest_layout_packed.x_extent_blocks * block_width;
level_host_slice_layout.Footprint.Height =
guest_layout_packed.y_extent_blocks * block_height;
level_host_slice_layout.Footprint.Depth = guest_layout_packed.z_extent;
} else {
level_host_slice_layout.Footprint.Width =
std::max(width >> level, uint32_t(1));
level_host_slice_layout.Footprint.Height =
std::max(height >> level, uint32_t(1));
level_host_slice_layout.Footprint.Depth =
std::max(depth >> level, uint32_t(1));
level_host_slice_layout.Footprint.Width = xe::round_up(
level_host_slice_layout.Footprint.Width * texture_resolution_scale_x,
level_host_slice_layout.Footprint.Height = xe::round_up(
level_host_slice_layout.Footprint.Height * texture_resolution_scale_y,
level_host_slice_layout.Footprint.RowPitch = xe::align(
xe::round_up(level_host_slice_layout.Footprint.Width / host_block_width,
host_x_blocks_per_thread) *
UINT64 level_host_slice_size = xe::align(
UINT64(level_host_slice_layout.Footprint.RowPitch) *
(level_host_slice_layout.Footprint.Height / host_block_height) *
(is_base ? host_slice_size_base : host_slice_sizes_mips[level]) =
copy_buffer_size += level_host_slice_size * array_size;
D3D12_RESOURCE_STATES copy_buffer_state =
@ -1771,7 +1752,7 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
// after loading the base is done).
if (!texture_resolution_scaled) {
D3D12SharedMemory& d3d12_shared_memory =
ui::d3d12::util::DescriptorCpuGpuHandlePair descriptor_unscaled_source;
if (bindless_resources_used_) {
@ -1798,24 +1779,6 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
(uint32_t(texture_key.endianness) << 2) |
(texture_resolution_scale_x << 4) | (texture_resolution_scale_y << 6);
// The loop counter can mean two things depending on whether the packed mip
// tail is stored as mip 0, because in this case, it would be ambiguous since
// both the base and the mips would be on "level 0", but stored in separate
// places.
uint32_t loop_level_first, loop_level_last;
if (level_packed == 0) {
// Packed mip tail is the level 0 - may need to load mip tails for the base,
// the mips, or both.
// Loop iteration 0 - base packed mip tail.
// Loop iteration 1 - mips packed mip tail.
loop_level_first = uint32_t(level_first != 0);
loop_level_last = uint32_t(level_last != 0);
} else {
// Packed mip tail is not the level 0.
// Loop iteration is the actual level being loaded.
loop_level_first = level_stored_first;
loop_level_last = level_stored_last;
// The loop is slices within levels because the base and the levels may need
// different portions of the scaled resolve virtual address space to be
// available through buffers, and to create a descriptor, the buffer start
@ -1902,8 +1865,6 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
load_constants.size_blocks[2] = level_depth;
load_constants.height_texels = level_height;
// Each thread group processes 32x32x1 source blocks (resolution-scaled, but
// still compressed if the host needs decompression).
uint32_t group_count_x =
(load_constants.size_blocks[0] +
((UINT32_C(1) << guest_x_blocks_per_group_log2) - 1)) >>
@ -1913,13 +1874,16 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
((UINT32_C(1) << kLoadGuestYBlocksPerGroupLog2) - 1)) >>
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& host_slice_layout =
const D3D12_PLACED_SUBRESOURCE_FOOTPRINT& level_host_slice_layout =
is_base ? host_slice_layout_base : host_slice_layouts_mips[level];
uint32_t host_slice_size =
uint32_t(is_base ? host_slice_size_base : host_slice_sizes_mips[level]);
load_constants.host_offset = uint32_t(host_slice_layout.Offset);
load_constants.host_pitch = host_slice_layout.Footprint.RowPitch;
load_constants.host_offset = uint32_t(level_host_slice_layout.Offset);
load_constants.host_pitch = level_host_slice_layout.Footprint.RowPitch;
uint32_t level_array_slice_stride_bytes_scaled =
level_guest_layout.array_slice_stride_bytes *
(texture_resolution_scale_x * texture_resolution_scale_y);
for (uint32_t slice = 0; slice < array_size; ++slice) {
D3D12_GPU_VIRTUAL_ADDRESS cbuffer_gpu_address;
uint8_t* cbuffer_mapping = cbuffer_pool.Request(
@ -1937,9 +1901,7 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
command_list.D3DDispatch(group_count_x, group_count_y,
load_constants.guest_offset +=
level_guest_layout.array_slice_stride_bytes *
(texture_resolution_scale_x * texture_resolution_scale_y);
load_constants.guest_offset += level_array_slice_stride_bytes_scaled;
load_constants.host_offset += host_slice_size;
@ -1977,15 +1939,21 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,
texture_util::GetPackedMipOffset(width, height, depth, guest_format,
level, level_offset_blocks_x,
level_offset_blocks_y, level_offset_z);
source_box.left = level_offset_blocks_x * block_width;
source_box.top = level_offset_blocks_y * block_height;
source_box.left =
level_offset_blocks_x * block_width * texture_resolution_scale_x;
source_box.top =
level_offset_blocks_y * block_height * texture_resolution_scale_y;
source_box.front = level_offset_z;
source_box.right =
source_box.left +
xe::align(std::max(width >> level, uint32_t(1)), host_block_width);
xe::align(std::max((width * texture_resolution_scale_x) >> level,
source_box.bottom =
source_box.top +
xe::align(std::max(height >> level, uint32_t(1)), host_block_height);
xe::align(std::max((height * texture_resolution_scale_y) >> level,
source_box.back =
source_box.front + std::max(depth >> level, uint32_t(1));
source_box_ptr = &source_box;
@ -173,8 +173,8 @@ struct TextureGuestLayout {
// If mip_max_level specified at calculation time is at least 1, the stored
// mips are min(1, packed_mip_level) through min(mip_max_level,
// packed_mip_level).
Level mips[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1];
uint32_t mip_offsets_bytes[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1];
Level mips[xenos::kTextureMaxMips];
uint32_t mip_offsets_bytes[xenos::kTextureMaxMips];
uint32_t mips_total_extent_bytes;
uint32_t max_level;
// UINT32_MAX if there's no packed mip tail.
@ -1045,6 +1045,10 @@ constexpr uint32_t kTexture3DMaxWidthHeight = 1 << kTexture3DMaxWidthHeightLog2;
constexpr uint32_t kTexture3DMaxDepthLog2 = 10;
constexpr uint32_t kTexture3DMaxDepth = 1 << kTexture3DMaxDepthLog2;
constexpr uint32_t kTextureMaxMips =
std::max(kTexture2DCubeMaxWidthHeightLog2, kTexture3DMaxWidthHeightLog2) +
// Tiled texture sizes are in 32x32 increments for 2D, 32x32x4 for 3D.
// 2DTiledOffset(X * 32 + x, Y * 32 + y) ==
// 2DTiledOffset(X * 32, Y * 32) + 2DTiledOffset(x, y)
Reference in New Issue