[GPU] Fix tiled mip tail extent calculation
Previously, for mips, the dimensions of the texture weren't rounded to powers of two before calculating the mip tail extent, resulting in the mip tail for a 260 blocks tall texture, that contains mips ending at Y of up to 36, having the Y extent calculated as 32. With rounding to powers of two, it would have been 64. However, with the GetTiledAddressUpperBound functions, none of this is necessary at all (and neither is rounding the extents in TextureGuestLayout::Level to 32x32x4 blocks) - using the same code for calculating the XYZ extents of tiled textures as for linear textures now, which, for the mip tail, calculates the actual maximum coordinates of the mips stored in it - and rounding to tiles is done internally by GetTiledAddressUpperBound.
This commit is contained in:
parent
74f1f6bb6d
commit
a37b57ca8d
|
@ -355,90 +355,76 @@ TextureGuestLayout GetGuestTextureLayout(
|
|||
// be smaller (especially in the 1280x720 linear k_8_8_8_8 case in 4E4D083E,
|
||||
// for which memory exactly for 1280x720 is allocated, and aligning the
|
||||
// height to 32 would cause access of an unallocated page) or bigger than
|
||||
// the stride. For tiled textures, this is the dimensions aligned to 32x32x4
|
||||
// blocks (or x1 for the missing dimensions).
|
||||
uint32_t level_width_blocks =
|
||||
xe::align(std::max(width_texels >> level, uint32_t(1)),
|
||||
format_info->block_width) /
|
||||
format_info->block_width;
|
||||
uint32_t level_height_blocks =
|
||||
xe::align(std::max(height_texels >> level, uint32_t(1)),
|
||||
format_info->block_height) /
|
||||
format_info->block_height;
|
||||
uint32_t level_depth = std::max(depth >> level, uint32_t(1));
|
||||
if (is_tiled) {
|
||||
// the stride.
|
||||
if (level == layout.packed_level) {
|
||||
// Calculate the portion of the mip tail actually used by the needed mips.
|
||||
// The actually used region may be significantly smaller than the full
|
||||
// 32x32-texel-aligned (and, for mips, calculated from the base dimensions
|
||||
// rounded to powers of two - 58410A7A has an 80x260 tiled texture with
|
||||
// packed mips at level 3 containing a mip ending at Y = 36, while
|
||||
// 260 >> 3 == 32, but 512 >> 3 == 64) tail. A 2x2 texture (for example,
|
||||
// in 494707D4, there's a 2x2 k_8_8_8_8 linear texture with packed mips),
|
||||
// for instance, would have its 2x2 base at (16, 0) and its 1x1 mip at
|
||||
// (8, 0) - and we need 2 or 1 rows in these cases, not 32 - the 32 rows
|
||||
// in a linear texture (with 256-byte pitch alignment) would span two 4 KB
|
||||
// pages rather than one.
|
||||
level_layout.x_extent_blocks = 0;
|
||||
level_layout.y_extent_blocks = 0;
|
||||
level_layout.z_extent = 0;
|
||||
uint32_t packed_sublevel_last = is_base ? 0 : max_level;
|
||||
for (uint32_t packed_sublevel = layout.packed_level;
|
||||
packed_sublevel <= packed_sublevel_last; ++packed_sublevel) {
|
||||
uint32_t packed_sublevel_x_blocks;
|
||||
uint32_t packed_sublevel_y_blocks;
|
||||
uint32_t packed_sublevel_z;
|
||||
GetPackedMipOffset(width_texels, height_texels, depth, format,
|
||||
packed_sublevel, packed_sublevel_x_blocks,
|
||||
packed_sublevel_y_blocks, packed_sublevel_z);
|
||||
level_layout.x_extent_blocks = std::max(
|
||||
level_layout.x_extent_blocks,
|
||||
packed_sublevel_x_blocks +
|
||||
xe::align(
|
||||
std::max(width_texels >> packed_sublevel, uint32_t(1)),
|
||||
format_info->block_width) /
|
||||
format_info->block_width);
|
||||
level_layout.y_extent_blocks = std::max(
|
||||
level_layout.y_extent_blocks,
|
||||
packed_sublevel_y_blocks +
|
||||
xe::align(
|
||||
std::max(height_texels >> packed_sublevel, uint32_t(1)),
|
||||
format_info->block_height) /
|
||||
format_info->block_height);
|
||||
level_layout.z_extent =
|
||||
std::max(level_layout.z_extent,
|
||||
packed_sublevel_z +
|
||||
std::max(depth >> packed_sublevel, uint32_t(1)));
|
||||
}
|
||||
} else {
|
||||
level_layout.x_extent_blocks =
|
||||
xe::align(level_width_blocks, xenos::kTextureTileWidthHeight);
|
||||
xe::align(std::max(width_texels >> level, uint32_t(1)),
|
||||
format_info->block_width) /
|
||||
format_info->block_width;
|
||||
level_layout.y_extent_blocks =
|
||||
xe::align(level_height_blocks, xenos::kTextureTileWidthHeight);
|
||||
xe::align(std::max(height_texels >> level, uint32_t(1)),
|
||||
format_info->block_height) /
|
||||
format_info->block_height;
|
||||
level_layout.z_extent = std::max(depth >> level, uint32_t(1));
|
||||
}
|
||||
if (is_tiled) {
|
||||
uint32_t bytes_per_block_log2 = xe::log2_floor(bytes_per_block);
|
||||
if (dimension == xenos::DataDimension::k3D) {
|
||||
level_layout.z_extent =
|
||||
xe::align(level_depth, xenos::kTextureTileDepth);
|
||||
// 32-block-row x 4 slice portions laid out sequentially (4-slice-major,
|
||||
// 32-block-row-minor), address extent within a 32x32x4 tile depends on
|
||||
// the pitch. Origins of 32x32x4 tiles grow monotonically, first along
|
||||
// Z, then along Y, then along X.
|
||||
level_layout.array_slice_data_extent_bytes =
|
||||
GetTiledAddressUpperBound3D(
|
||||
level_layout.x_extent_blocks, level_layout.y_extent_blocks,
|
||||
level_layout.z_extent, row_pitch_blocks_tile_aligned,
|
||||
level_layout.y_extent_blocks, bytes_per_block_log2);
|
||||
} else {
|
||||
level_layout.z_extent = 1;
|
||||
// Origins of 32x32 tiles grow monotonically, first along Y, then along
|
||||
// X.
|
||||
level_layout.array_slice_data_extent_bytes =
|
||||
GetTiledAddressUpperBound2D(
|
||||
level_layout.x_extent_blocks, level_layout.y_extent_blocks,
|
||||
row_pitch_blocks_tile_aligned, bytes_per_block_log2);
|
||||
}
|
||||
} else {
|
||||
if (level == layout.packed_level) {
|
||||
// Calculate the portion of the mip tail actually used by the needed
|
||||
// mips. The actually used region may be significantly smaller than the
|
||||
// full 32x32-texel-aligned tail. A 2x2 texture (for example, in
|
||||
// 494707D4, there's a 2x2 k_8_8_8_8 linear texture with packed mips),
|
||||
// for instance, would have its 2x2 base at (16, 0) and its 1x1 mip at
|
||||
// (8, 0) - and we need 2 or 1 rows in these cases, not 32 - the 32 rows
|
||||
// would span two 4 KB pages rather than one, taking the 256-byte pitch
|
||||
// alignment in linear textures into account.
|
||||
level_layout.x_extent_blocks = 0;
|
||||
level_layout.y_extent_blocks = 0;
|
||||
level_layout.z_extent = 0;
|
||||
uint32_t packed_sublevel_last = is_base ? 0 : max_level;
|
||||
for (uint32_t packed_sublevel = layout.packed_level;
|
||||
packed_sublevel <= packed_sublevel_last; ++packed_sublevel) {
|
||||
uint32_t packed_sublevel_x_blocks;
|
||||
uint32_t packed_sublevel_y_blocks;
|
||||
uint32_t packed_sublevel_z;
|
||||
GetPackedMipOffset(width_texels, height_texels, depth, format,
|
||||
packed_sublevel, packed_sublevel_x_blocks,
|
||||
packed_sublevel_y_blocks, packed_sublevel_z);
|
||||
level_layout.x_extent_blocks = std::max(
|
||||
level_layout.x_extent_blocks,
|
||||
packed_sublevel_x_blocks +
|
||||
xe::align(
|
||||
std::max(width_texels >> packed_sublevel, uint32_t(1)),
|
||||
format_info->block_width) /
|
||||
format_info->block_width);
|
||||
level_layout.y_extent_blocks = std::max(
|
||||
level_layout.y_extent_blocks,
|
||||
packed_sublevel_y_blocks +
|
||||
xe::align(
|
||||
std::max(height_texels >> packed_sublevel, uint32_t(1)),
|
||||
format_info->block_height) /
|
||||
format_info->block_height);
|
||||
level_layout.z_extent =
|
||||
std::max(level_layout.z_extent,
|
||||
packed_sublevel_z +
|
||||
std::max(depth >> packed_sublevel, uint32_t(1)));
|
||||
}
|
||||
} else {
|
||||
level_layout.x_extent_blocks = level_width_blocks;
|
||||
level_layout.y_extent_blocks = level_height_blocks;
|
||||
level_layout.z_extent = level_depth;
|
||||
}
|
||||
level_layout.array_slice_data_extent_bytes =
|
||||
z_stride_bytes * (level_layout.z_extent - 1) +
|
||||
level_layout.row_pitch_bytes * (level_layout.y_extent_blocks - 1) +
|
||||
|
|
|
@ -144,24 +144,27 @@ struct TextureGuestLayout {
|
|||
// multiplied by the array slice count.
|
||||
uint32_t array_slice_stride_bytes;
|
||||
|
||||
// Estimated amount of memory this level occupies, and variables involved in
|
||||
// its calculation. Not aligned to kTextureSubresourceAlignmentBytes. For
|
||||
// tiled textures, this will be rounded to 32x32x4 blocks (or 32x32x1
|
||||
// depending on the dimension), but for the linear subresources, this may be
|
||||
// significantly (including less 4 KB pages) smaller than the aligned size
|
||||
// (like for 4E4D083E where aligning the height of a 1280x720 linear texture
|
||||
// results in access violations). For the linear mip tail, this includes all
|
||||
// the mip levels stored in it. If the width is bigger than the pitch, this
|
||||
// will also be taken into account for the last row so all memory actually
|
||||
// used by the texture will be loaded, and may be bigger than the distance
|
||||
// between array slices or levels. The purpose of this parameter is to make
|
||||
// the memory amount that needs to be resident as close to the real amount
|
||||
// as possible, to make sure all the needed data will be read, but also, if
|
||||
// possible, unneeded memory pages won't be accessed (since that may trigger
|
||||
// an access violation on the CPU).
|
||||
// The exclusive upper bound of blocks needed at this level (this level for
|
||||
// non-packed levels, or all the packed levels for the packed mip tail).
|
||||
uint32_t x_extent_blocks;
|
||||
uint32_t y_extent_blocks;
|
||||
uint32_t z_extent;
|
||||
// Estimated amount of memory this level occupies. Not aligned to
|
||||
// kTextureSubresourceAlignmentBytes. For tiled textures, this will be
|
||||
// calculated for the extent rounded to 32x32x4 blocks (or 32x32x1 depending
|
||||
// on the dimensionality), but for linear textures, as well as for mips of
|
||||
// non-power-of-two tiled textures, this may be significantly (including
|
||||
// less 4 KB pages) smaller than the aligned size (like for 4E4D083E where
|
||||
// aligning the height of a 1280x720 linear texture results in access
|
||||
// violations). For the linear mip tail, this includes all the mip levels
|
||||
// stored in it. If the width is bigger than the pitch, this will also be
|
||||
// taken into account for the last row so all memory actually used by the
|
||||
// texture will be loaded, and may be bigger than the distance between array
|
||||
// slices or levels. The purpose of this parameter is to make the memory
|
||||
// amount that needs to be resident as close to the real amount as possible,
|
||||
// to make sure all the needed data will be read, but also, if possible,
|
||||
// unneeded memory pages won't be accessed (since that may trigger an access
|
||||
// violation on the CPU).
|
||||
uint32_t array_slice_data_extent_bytes;
|
||||
// Including all array slices.
|
||||
uint32_t level_data_extent_bytes;
|
||||
|
|
Loading…
Reference in New Issue