[GPU] Linear mip tail exact extent estimation

This commit is contained in:
Triang3l 2021-05-15 18:19:06 +03:00
parent a69a058991
commit dd5ea87213
4 changed files with 309 additions and 280 deletions

View File

@ -2093,14 +2093,35 @@ void TextureCache::BindingInfoFromFetchConstant(
// No texture data at all. // No texture data at all.
return; return;
} }
// TODO(Triang3l): Support long 1D textures. if (fetch.dimension == xenos::DataDimension::k1D) {
if (fetch.dimension == xenos::DataDimension::k1D && bool is_invalid_1d = false;
width > xenos::kTexture2DCubeMaxWidthHeight) { // TODO(Triang3l): Support long 1D textures.
XELOGE( if (width > xenos::kTexture2DCubeMaxWidthHeight) {
"1D texture is too wide ({}) - ignoring! " XELOGE(
"Report the game to Xenia developers", "1D texture is too wide ({}) - ignoring! Report the game to Xenia "
width); "developers",
return; width);
is_invalid_1d = true;
}
assert_false(fetch.tiled);
if (fetch.tiled) {
XELOGE(
"1D texture has tiling enabled in the fetch constant, but this "
"appears to be completely wrong - ignoring! Report the game to Xenia "
"developers");
is_invalid_1d = true;
}
assert_false(fetch.packed_mips);
if (fetch.packed_mips) {
XELOGE(
"1D texture has packed mips enabled in the fetch constant, but this "
"appears to be completely wrong - ignoring! Report the game to Xenia "
"developers");
is_invalid_1d = true;
}
if (is_invalid_1d) {
return;
}
} }
xenos::TextureFormat format = GetBaseFormat(fetch.format); xenos::TextureFormat format = GetBaseFormat(fetch.format);
@ -2411,7 +2432,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
if (!level_packed) { if (!level_packed) {
// Loading the packed tail for the base - load the whole tail to copy // Loading the packed tail for the base - load the whole tail to copy
// regions out of it. // regions out of it.
const texture_util::TextureGuestLevelLayout& guest_layout_base = const texture_util::TextureGuestLayout::Level& guest_layout_base =
texture->guest_layout.base; texture->guest_layout.base;
host_slice_layout_base.Footprint.Width = host_slice_layout_base.Footprint.Width =
guest_layout_base.x_extent_blocks * block_width; guest_layout_base.x_extent_blocks * block_width;
@ -2452,7 +2473,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
if (level == level_packed) { if (level == level_packed) {
// Loading the packed tail for the mips - load the whole tail to copy // Loading the packed tail for the mips - load the whole tail to copy
// regions out of it. // regions out of it.
const texture_util::TextureGuestLevelLayout& const texture_util::TextureGuestLayout::Level&
guest_layout_packed_mips = texture->guest_layout.mips[level]; guest_layout_packed_mips = texture->guest_layout.mips[level];
host_slice_layout_mip.Footprint.Width = host_slice_layout_mip.Footprint.Width =
guest_layout_packed_mips.x_extent_blocks * block_width; guest_layout_packed_mips.x_extent_blocks * block_width;
@ -2634,7 +2655,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
load_constants.guest_offset += load_constants.guest_offset +=
texture->guest_layout.mip_offsets_bytes[level]; texture->guest_layout.mip_offsets_bytes[level];
} }
const texture_util::TextureGuestLevelLayout& level_guest_layout = const texture_util::TextureGuestLayout::Level& level_guest_layout =
is_base ? texture->guest_layout.base is_base ? texture->guest_layout.base
: texture->guest_layout.mips[level]; : texture->guest_layout.mips[level];
uint32_t level_guest_pitch = level_guest_layout.row_pitch_bytes; uint32_t level_guest_pitch = level_guest_layout.row_pitch_bytes;

View File

@ -960,10 +960,10 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
// Need a subregion size, not the full subresource size - thus not aligning // Need a subregion size, not the full subresource size - thus not aligning
// to xenos::kTextureSubresourceAlignmentBytes. // to xenos::kTextureSubresourceAlignmentBytes.
copy_dest_length = copy_dest_length =
texture_util::GetGuestLevelLayout( texture_util::GetGuestTextureLayout(
dest_dimension, copy_dest_pitch_aligned_div_32, uint32_t(x1 - x0), dest_dimension, copy_dest_pitch_aligned_div_32, uint32_t(x1 - x0),
dest_height, dest_depth, true, dest_format, false, 0, false) dest_height, dest_depth, true, dest_format, false, true, 0)
.level_data_extent_bytes; .base.level_data_extent_bytes;
} else { } else {
XELOGE("Tried to resolve to format {}, which is not a ColorFormat", XELOGE("Tried to resolve to format {}, which is not a ColorFormat",
dest_format_info.name); dest_format_info.name);

View File

@ -204,35 +204,22 @@ bool GetPackedMipOffset(uint32_t width, uint32_t height, uint32_t depth,
return true; return true;
} }
TextureGuestLevelLayout GetGuestLevelLayout( TextureGuestLayout GetGuestTextureLayout(
xenos::DataDimension dimension, uint32_t base_pitch_texels_div_32, xenos::DataDimension dimension, uint32_t base_pitch_texels_div_32,
uint32_t width_texels, uint32_t height_texels, uint32_t depth_or_array_size, uint32_t width_texels, uint32_t height_texels, uint32_t depth_or_array_size,
bool is_tiled, xenos::TextureFormat format, bool is_mip, uint32_t level, bool is_tiled, xenos::TextureFormat format, bool has_packed_levels,
bool is_packed_level) { bool has_base, uint32_t max_level) {
// If with packed mips the mips 1... happen to be packed in what's stored as TextureGuestLayout layout;
// mip 0, this mip tail appears to be stored like mips (with power of two size
// rounding) rather than like the base level (with the pitch from the fetch
// constant), so we distinguish between them for mip == 0.
// Base is by definition the level 0.
assert_false(!is_mip && level);
// Level 0 for mips is the special case for a packed mip tail of very small
// textures, where the tail is stored like it's at the level 0.
assert_false(is_mip && !level && !is_packed_level);
TextureGuestLevelLayout layout; if (dimension == xenos::DataDimension::k1D) {
assert_false(is_tiled);
// For safety, for instance, with empty resolve regions (extents calculation // GetPackedMipOffset may result in packing along Y for `width > height`
// may overflow otherwise due to the assumption of at least one row, for // textures.
// example, but an empty texture is empty anyway). assert_false(has_packed_levels);
if (!width_texels || height_texels = 1;
(dimension != xenos::DataDimension::k1D && !height_texels) ||
((dimension == xenos::DataDimension::k2DOrStacked ||
dimension == xenos::DataDimension::k3D) &&
!depth_or_array_size)) {
std::memset(&layout, 0, sizeof(layout));
return layout;
} }
uint32_t depth =
dimension == xenos::DataDimension::k3D ? depth_or_array_size : 1;
switch (dimension) { switch (dimension) {
case xenos::DataDimension::k2DOrStacked: case xenos::DataDimension::k2DOrStacked:
layout.array_size = depth_or_array_size; layout.array_size = depth_or_array_size;
@ -244,158 +231,17 @@ TextureGuestLevelLayout GetGuestLevelLayout(
layout.array_size = 1; layout.array_size = 1;
} }
const FormatInfo* format_info = FormatInfo::Get(format); // For safety, for instance, with empty resolve regions (extents calculation
uint32_t bytes_per_block = format_info->bytes_per_block(); // may overflow otherwise due to the assumption of at least one row, for
// example, but an empty texture is empty anyway).
// Calculate the strides. if (!width_texels || !height_texels || !depth || !layout.array_size) {
// Mips have row / depth slice strides calculated from a mip of a texture std::memset(&layout, 0, sizeof(layout));
// whose base size is a power of two. return layout;
// The base mip has tightly packed depth slices, and takes the row pitch from
// the fetch constant.
// For stride calculation purposes, mip dimensions are always aligned to
// 32x32x4 blocks (or x1 for the missing dimensions), including for linear
// textures.
// Linear texture rows are 256-byte-aligned.
uint32_t row_pitch_texels_unaligned;
uint32_t z_slice_stride_texel_rows_unaligned;
if (is_mip) {
row_pitch_texels_unaligned =
std::max(xe::next_pow2(width_texels) >> level, uint32_t(1));
z_slice_stride_texel_rows_unaligned =
std::max(xe::next_pow2(height_texels) >> level, uint32_t(1));
} else {
row_pitch_texels_unaligned = base_pitch_texels_div_32 << 5;
z_slice_stride_texel_rows_unaligned = height_texels;
}
uint32_t row_pitch_blocks_tile_aligned = xe::align(
xe::align(row_pitch_texels_unaligned, format_info->block_width) /
format_info->block_width,
xenos::kTextureTileWidthHeight);
layout.row_pitch_bytes = row_pitch_blocks_tile_aligned * bytes_per_block;
// Assuming the provided pitch is already 256-byte-aligned for linear, but
// considering the guest-provided pitch more important (no information about
// how the GPU actually handles unaligned rows).
if (!is_tiled && is_mip) {
layout.row_pitch_bytes = xe::align(layout.row_pitch_bytes,
xenos::kTextureLinearRowAlignmentBytes);
}
layout.z_slice_stride_block_rows =
dimension != xenos::DataDimension::k1D
? xe::align(xe::align(z_slice_stride_texel_rows_unaligned,
format_info->block_height) /
format_info->block_height,
xenos::kTextureTileWidthHeight)
: 1;
layout.array_slice_stride_bytes =
layout.row_pitch_bytes * layout.z_slice_stride_block_rows;
uint32_t z_stride_bytes = layout.array_slice_stride_bytes;
if (dimension == xenos::DataDimension::k3D) {
layout.array_slice_stride_bytes *=
xe::align(depth_or_array_size, xenos::kTextureTiledDepthGranularity);
}
uint32_t array_slice_stride_bytes_non_4kb_aligned =
layout.array_slice_stride_bytes;
layout.array_slice_stride_bytes =
xe::align(array_slice_stride_bytes_non_4kb_aligned,
xenos::kTextureSubresourceAlignmentBytes);
// Estimate the memory amount actually referenced by the texture, which may be
// smaller (especially in the 2x2 linear k_8_8_8_8 case in Test Drive
// Unlimited, for which 4 KB are allocated, while the stride is 8 KB) or
// bigger than the stride. For tiled textures, this is the dimensions aligned
// to 32x32x4 blocks (or x1 for the missing dimensions).
// For linear, doing almost the same for the mip tail (which can be used for
// both the mips and, if the texture is very small, the base) because it
// stores multiple mips outside the first mip in it in the tile padding
// (though there's no need to align the size to the next power of two for this
// purpose for mips - packed mips are only used when min(width, height) <= 16,
// and packing is first done along the shorter axis - even if the longer axis
// is larger than 32, nothing will be packed beyond the extent of the longer
// axis). "Almost" because for linear textures, we're rounding the size to
// 32x32x4 texels, not blocks - first packed mips start from 16-texel, not
// 16-block, shortest dimension, and are placed in 32x- or x32-texel tiles,
// while 32 blocks for compressed textures are bigger in memory than 32
// texels.
layout.x_extent_blocks = xe::align(width_texels, format_info->block_width) /
format_info->block_width;
layout.y_extent_blocks =
dimension != xenos::DataDimension::k1D
? xe::align(height_texels, format_info->block_height) /
format_info->block_height
: 1;
layout.z_extent =
dimension == xenos::DataDimension::k3D ? depth_or_array_size : 1;
if (is_tiled) {
layout.x_extent_blocks =
xe::align(layout.x_extent_blocks, xenos::kTextureTileWidthHeight);
assert_true(dimension != xenos::DataDimension::k1D);
layout.y_extent_blocks =
xe::align(layout.y_extent_blocks, xenos::kTextureTileWidthHeight);
if (dimension == xenos::DataDimension::k3D) {
layout.z_extent =
xe::align(layout.z_extent, xenos::kTextureTiledDepthGranularity);
// 3D texture addressing is pretty complex, so it's hard to determine the
// memory extent of a subregion - just use pitch_tiles * height_tiles *
// depth_tiles * bytes_per_tile at least for now, until we find a case
// where it causes issues. width > pitch is a very weird edge case anyway,
// and is extremely unlikely.
assert_true(layout.x_extent_blocks <= row_pitch_blocks_tile_aligned);
layout.array_slice_data_extent_bytes =
array_slice_stride_bytes_non_4kb_aligned;
} else {
// 2D 32x32-block tiles are laid out linearly in the texture.
// Calculate the extent as ((all rows except for the last * pitch in
// tiles + last row length in tiles) * bytes per tile).
layout.array_slice_data_extent_bytes =
(layout.y_extent_blocks - xenos::kTextureTileWidthHeight) *
layout.row_pitch_bytes +
bytes_per_block * layout.x_extent_blocks *
xenos::kTextureTileWidthHeight;
}
} else {
if (is_packed_level) {
layout.x_extent_blocks =
xe::align(layout.x_extent_blocks,
xenos::kTextureTileWidthHeight / format_info->block_width);
if (dimension != xenos::DataDimension::k1D) {
layout.y_extent_blocks =
xe::align(layout.y_extent_blocks, xenos::kTextureTileWidthHeight /
format_info->block_height);
if (dimension == xenos::DataDimension::k3D) {
layout.z_extent =
xe::align(layout.z_extent, xenos::kTextureTiledDepthGranularity);
}
}
}
layout.array_slice_data_extent_bytes =
z_stride_bytes * (layout.z_extent - 1) +
layout.row_pitch_bytes * (layout.y_extent_blocks - 1) +
bytes_per_block * layout.x_extent_blocks;
}
layout.level_data_extent_bytes =
layout.array_slice_stride_bytes * (layout.array_size - 1) +
layout.array_slice_data_extent_bytes;
return layout;
}
TextureGuestLayout GetGuestTextureLayout(
xenos::DataDimension dimension, uint32_t base_pitch_texels_div_32,
uint32_t width_texels, uint32_t height_texels, uint32_t depth_or_array_size,
bool is_tiled, xenos::TextureFormat format, bool has_packed_levels,
bool has_base, uint32_t max_level) {
TextureGuestLayout layout;
if (dimension == xenos::DataDimension::k1D) {
height_texels = 1;
} }
// For safety, clamp the maximum level. // For safety, clamp the maximum level.
uint32_t longest_axis = std::max(width_texels, height_texels); uint32_t max_level_for_dimensions =
if (dimension == xenos::DataDimension::k3D) { xe::log2_floor(std::max(std::max(width_texels, height_texels), depth));
longest_axis = std::max(longest_axis, depth_or_array_size);
}
uint32_t max_level_for_dimensions = xe::log2_floor(longest_axis);
assert_true(max_level <= max_level_for_dimensions); assert_true(max_level <= max_level_for_dimensions);
max_level = std::min(max_level, max_level_for_dimensions); max_level = std::min(max_level, max_level_for_dimensions);
layout.max_level = max_level; layout.max_level = max_level;
@ -404,33 +250,210 @@ TextureGuestLayout GetGuestTextureLayout(
? GetPackedMipLevel(width_texels, height_texels) ? GetPackedMipLevel(width_texels, height_texels)
: UINT32_MAX; : UINT32_MAX;
if (has_base) { // Clear unused level layouts to zero strides/sizes.
layout.base = if (!has_base) {
GetGuestLevelLayout(dimension, base_pitch_texels_div_32, width_texels,
height_texels, depth_or_array_size, is_tiled,
format, false, 0, layout.packed_level == 0);
} else {
std::memset(&layout.base, 0, sizeof(layout.base)); std::memset(&layout.base, 0, sizeof(layout.base));
} }
if (layout.packed_level != 0) {
std::memset(layout.mips, 0, sizeof(layout.mips)); std::memset(&layout.mips[0], 0, sizeof(layout.mips[0]));
std::memset(layout.mip_offsets_bytes, 0, sizeof(layout.mip_offsets_bytes)); }
uint32_t max_stored_level = std::min(max_level, layout.packed_level);
{
uint32_t mips_end = max_stored_level + 1;
assert_true(mips_end <= xe::countof(layout.mips));
uint32_t mips_unused_count = uint32_t(xe::countof(layout.mips)) - mips_end;
if (mips_unused_count) {
std::memset(&layout.mips[mips_end], 0,
sizeof(layout.mips[0]) * mips_unused_count);
std::memset(&layout.mip_offsets_bytes[mips_end], 0,
sizeof(layout.mip_offsets_bytes[0]) * mips_unused_count);
}
}
layout.mips_total_extent_bytes = 0; layout.mips_total_extent_bytes = 0;
if (max_level) {
uint32_t mip_offset_bytes = 0; const FormatInfo* format_info = FormatInfo::Get(format);
uint32_t max_stored_mip = std::min(max_level, layout.packed_level); uint32_t bytes_per_block = format_info->bytes_per_block();
for (uint32_t mip = std::min(uint32_t(1), layout.packed_level);
mip <= max_stored_mip; ++mip) { // The loop counter can mean two things depending on whether the packed mip
layout.mip_offsets_bytes[mip] = mip_offset_bytes; // tail is stored as mip 0, because in this case, it would be ambiguous since
TextureGuestLevelLayout& mip_layout = layout.mips[mip]; // both the base and the mips would be on "level 0", but stored separately and
mip_layout = // possibly with a different layout.
GetGuestLevelLayout(dimension, base_pitch_texels_div_32, width_texels, uint32_t loop_level_last;
height_texels, depth_or_array_size, is_tiled, if (layout.packed_level == 0) {
format, true, mip, mip == layout.packed_level); // Packed mip tail is the level 0 - may need to load mip tails for the base,
// the mips, or both.
// Loop iteration 0 - base packed mip tail.
// Loop iteration 1 - mips packed mip tail.
loop_level_last = uint32_t(max_level != 0);
} else {
// Packed mip tail is not the level 0.
// Loop iteration is the actual level being loaded.
loop_level_last = max_stored_level;
}
uint32_t mip_offset_bytes = 0;
for (uint32_t loop_level = has_base ? 0 : 1; loop_level <= loop_level_last;
++loop_level) {
bool is_base = loop_level == 0;
uint32_t level = (layout.packed_level == 0) ? 0 : loop_level;
TextureGuestLayout::Level& level_layout =
is_base ? layout.base : layout.mips[level];
// Calculate the strides.
// Mips have row / depth slice strides calculated from a mip of a texture
// whose base size is a power of two.
// The base mip has tightly packed depth slices, and takes the row pitch
// from the fetch constant.
// For stride calculation purposes, mip dimensions are always aligned to
// 32x32x4 blocks (or x1 for the missing dimensions), including for linear
// textures.
// Linear texture rows are 256-byte-aligned.
uint32_t row_pitch_texels_unaligned;
uint32_t z_slice_stride_texel_rows_unaligned;
if (is_base) {
row_pitch_texels_unaligned = base_pitch_texels_div_32 << 5;
z_slice_stride_texel_rows_unaligned = height_texels;
} else {
row_pitch_texels_unaligned =
std::max(xe::next_pow2(width_texels) >> level, uint32_t(1));
z_slice_stride_texel_rows_unaligned =
std::max(xe::next_pow2(height_texels) >> level, uint32_t(1));
}
uint32_t row_pitch_blocks_tile_aligned = xe::align(
xe::align(row_pitch_texels_unaligned, format_info->block_width) /
format_info->block_width,
xenos::kTextureTileWidthHeight);
level_layout.row_pitch_bytes =
row_pitch_blocks_tile_aligned * bytes_per_block;
// Assuming the provided pitch is already 256-byte-aligned for linear, but
// considering the guest-provided pitch more important (no information about
// how the GPU actually handles unaligned rows).
if (!is_tiled && !is_base) {
level_layout.row_pitch_bytes = xe::align(
level_layout.row_pitch_bytes, xenos::kTextureLinearRowAlignmentBytes);
}
level_layout.z_slice_stride_block_rows =
dimension != xenos::DataDimension::k1D
? xe::align(xe::align(z_slice_stride_texel_rows_unaligned,
format_info->block_height) /
format_info->block_height,
xenos::kTextureTileWidthHeight)
: 1;
level_layout.array_slice_stride_bytes =
level_layout.row_pitch_bytes * level_layout.z_slice_stride_block_rows;
uint32_t z_stride_bytes = level_layout.array_slice_stride_bytes;
if (dimension == xenos::DataDimension::k3D) {
level_layout.array_slice_stride_bytes *=
xe::align(depth_or_array_size, xenos::kTextureTiledDepthGranularity);
}
uint32_t array_slice_stride_bytes_non_4kb_aligned =
level_layout.array_slice_stride_bytes;
level_layout.array_slice_stride_bytes =
xe::align(array_slice_stride_bytes_non_4kb_aligned,
xenos::kTextureSubresourceAlignmentBytes);
// Estimate the memory amount actually referenced by the texture, which may
// be smaller (especially in the 1280x720 linear k_8_8_8_8 case in Ridge
// Racer Unbounded, for which memory exactly for 1280x720 is allocated, and
// aligning the height to 32 would cause access of an unallocated page) or
// bigger than the stride. For tiled textures, this is the dimensions
// aligned to 32x32x4 blocks (or x1 for the missing dimensions).
uint32_t level_width_blocks =
xe::align(std::max(width_texels >> level, uint32_t(1)),
format_info->block_width) /
format_info->block_width;
uint32_t level_height_blocks =
xe::align(std::max(height_texels >> level, uint32_t(1)),
format_info->block_height) /
format_info->block_height;
uint32_t level_depth = std::max(depth >> level, uint32_t(1));
if (is_tiled) {
level_layout.x_extent_blocks =
xe::align(level_width_blocks, xenos::kTextureTileWidthHeight);
level_layout.y_extent_blocks =
xe::align(level_height_blocks, xenos::kTextureTileWidthHeight);
if (dimension == xenos::DataDimension::k3D) {
level_layout.z_extent =
xe::align(level_depth, xenos::kTextureTiledDepthGranularity);
// 3D texture addressing is pretty complex, so it's hard to determine
// the memory extent of a subregion - just use `pitch_tiles *
// height_tiles * depth_tiles * bytes_per_tile` at least for now, until
// we find a case where it causes issues. `width > pitch` is a very
// weird edge case anyway, and is extremely unlikely.
assert_true(level_layout.x_extent_blocks <=
row_pitch_blocks_tile_aligned);
level_layout.array_slice_data_extent_bytes =
array_slice_stride_bytes_non_4kb_aligned;
} else {
level_layout.z_extent = 1;
// 2D 32x32-block tiles are laid out linearly in the texture.
// Calculate the extent as ((all rows except for the last * pitch in
// tiles + last row length in tiles) * bytes per tile).
level_layout.array_slice_data_extent_bytes =
(level_layout.y_extent_blocks - xenos::kTextureTileWidthHeight) *
level_layout.row_pitch_bytes +
bytes_per_block * level_layout.x_extent_blocks *
xenos::kTextureTileWidthHeight;
}
} else {
if (level == layout.packed_level) {
// Calculate the portion of the mip tail actually used by the needed
// mips. The actually used region may be significantly smaller than the
// full 32x32-texel-aligned tail. A 2x2 texture (for example, in Test
// Drive Unlimited, there's a 2x2 k_8_8_8_8 linear texture with packed
// mips), for instance, would have its 2x2 base at (16, 0) and its 1x1
// mip at (8, 0) - and we need 2 or 1 rows in these cases, not 32.
level_layout.x_extent_blocks = 0;
level_layout.y_extent_blocks = 0;
level_layout.z_extent = 0;
uint32_t packed_sublevel_last = is_base ? 0 : max_level;
for (uint32_t packed_sublevel = layout.packed_level;
packed_sublevel <= packed_sublevel_last; ++packed_sublevel) {
uint32_t packed_sublevel_x_blocks;
uint32_t packed_sublevel_y_blocks;
uint32_t packed_sublevel_z;
GetPackedMipOffset(width_texels, height_texels, depth, format,
packed_sublevel, packed_sublevel_x_blocks,
packed_sublevel_y_blocks, packed_sublevel_z);
level_layout.x_extent_blocks = std::max(
level_layout.x_extent_blocks,
packed_sublevel_x_blocks +
xe::align(
std::max(width_texels >> packed_sublevel, uint32_t(1)),
format_info->block_width) /
format_info->block_width);
level_layout.y_extent_blocks = std::max(
level_layout.y_extent_blocks,
packed_sublevel_y_blocks +
xe::align(
std::max(height_texels >> packed_sublevel, uint32_t(1)),
format_info->block_height) /
format_info->block_height);
level_layout.z_extent =
std::max(level_layout.z_extent,
packed_sublevel_z +
std::max(depth >> packed_sublevel, uint32_t(1)));
}
} else {
level_layout.x_extent_blocks = level_width_blocks;
level_layout.y_extent_blocks = level_height_blocks;
level_layout.z_extent = level_depth;
}
level_layout.array_slice_data_extent_bytes =
z_stride_bytes * (level_layout.z_extent - 1) +
level_layout.row_pitch_bytes * (level_layout.y_extent_blocks - 1) +
bytes_per_block * level_layout.x_extent_blocks;
}
level_layout.level_data_extent_bytes =
level_layout.array_slice_stride_bytes * (layout.array_size - 1) +
level_layout.array_slice_data_extent_bytes;
if (!is_base) {
layout.mip_offsets_bytes[level] = mip_offset_bytes;
layout.mips_total_extent_bytes = layout.mips_total_extent_bytes =
std::max(layout.mips_total_extent_bytes, std::max(layout.mips_total_extent_bytes,
mip_offset_bytes + mip_layout.level_data_extent_bytes); mip_offset_bytes + level_layout.level_data_extent_bytes);
mip_offset_bytes += mip_layout.next_level_distance_bytes(); mip_offset_bytes +=
level_layout.array_slice_stride_bytes * layout.array_size;
} }
} }

View File

@ -97,18 +97,20 @@ bool GetPackedMipOffset(uint32_t width, uint32_t height, uint32_t depth,
// disassembly, which only checks the flag whether the data is packed passed to // disassembly, which only checks the flag whether the data is packed passed to
// it, not the level, to see if it needs to calculate the offset in the mip // it, not the level, to see if it needs to calculate the offset in the mip
// tail, and the offset calculation function doesn't have level == 0 checks in // tail, and the offset calculation function doesn't have level == 0 checks in
// it, only early-out if level < packed tail level (which can be 0). // it, only early-out if level < packed tail level (which can be 0). There are
// examples of textures with packed base, for example, in the intro level of
// Prey (8x8 linear DXT1 - pairs of orange lights in the bottom of gambling
// machines).
// //
// Linear texture rows are aligned to 256 bytes, for both the base and the mips // Linear texture rows are aligned to 256 bytes, for both the base and the mips
// (for the base, Direct3D 9 writes an already 256-byte-aligned pitch to the // (for the base, Direct3D 9 writes an already 256-byte-aligned pitch to the
// fetch constant). // fetch constant).
// //
// However, all the 32x32x4 padding, being just padding, is not necessarily // However, all the 32x32x4 padding, being just padding, is not necessarily
// being actually accessed, especially for linear textures. Test Drive Unlimited // being actually accessed, especially for linear textures. Ridge Racer
// has a 2x2 k_8_8_8_8 linear texture, and allocates 4 KB for it (with accessing // Unbounded has a 1280x720 k_8_8_8_8 linear texture, and allocates memory for
// the page beyond it triggering an access violation), while a 32x32 k_8_8_8_8 // exactly 1280x720, so aligning the height to 32 to 1280x736 results in access
// linear texture, with rows aligned to 256 bytes (so stored like 64x32) would // violations. So, while for stride calculations all the padding must be
// take 8 KB. So, while for stride calculations all the padding must be
// respected, for actual memory loads it's better to avoid trying to access it // respected, for actual memory loads it's better to avoid trying to access it
// when possible: // when possible:
// - If the pitch is bigger than the width, it's better to calculate the last // - If the pitch is bigger than the width, it's better to calculate the last
@ -116,86 +118,69 @@ bool GetPackedMipOffset(uint32_t width, uint32_t height, uint32_t depth,
// in the other direction though - pitch < width is a weird situation, but // in the other direction though - pitch < width is a weird situation, but
// probably legal, and may lead to reading data from beyond the calculated // probably legal, and may lead to reading data from beyond the calculated
// subresource stride). // subresource stride).
// - For linear textures (like that 2x2 example from Test Drive Unlimited), it's // - For linear textures (like that 1280x720 example from Ridge Racer
// easy to calculate the exact memory extent that may be accessed knowing the // Unbounded), it's easy to calculate the exact memory extent that may be
// dimensions (unlike for tiled textures with complex addressing within // accessed knowing the dimensions (unlike for tiled textures with complex
// 32x32x4-block tiles), so there's no need to align them to 32x32x4 for // addressing within 32x32x4-block tiles), so there's no need to align them to
// memory extent calculation - that's what appears to cause that crash in Test // 32x32x4 for memory extent calculation.
// Drive Unlimited. // - For the linear packed mip tail, the extent can be calculated as max of
// - The exception here is the packed mip tail for linear textures, as smaller // (block offsets + block extents) of all levels stored in it.
// mips are stored in the 32x32x4-texel padding. However, the packed mip
// tail needs to be aligned only to 32x32 texels, not to 32x32 blocks - so
// for compressed textures, the padding may be smaller, only to 8x8 blocks.
// //
// 1D textures are always linear. // 1D textures are always linear and likely can't have packed mips (for `width >
// height` textures, mip offset calculation may result in packing along Y).
// //
// Array slices are stored within levels (this is different than how Direct3D // Array slices are stored within levels (this is different than how Direct3D
// 10+ builds subresource indices, for instance). Each array slice or level is // 10+ builds subresource indices, for instance). Each array slice or level is
// aligned to 4 KB (but this doesn't apply to 3D texture slices within one // aligned to 4 KB (but this doesn't apply to 3D texture slices within one
// level). // level).
struct TextureGuestLevelLayout {
// Number of array slices within the mip.
uint32_t array_size;
// Distance between each row of blocks in bytes, including all the needed
// power of two (for mips) and 256-byte (for linear textures) alignment.
uint32_t row_pitch_bytes;
// Distance between Z slices in block rows, aligned to power of two for mips,
// and to tile height.
uint32_t z_slice_stride_block_rows;
// Distance between each array slice within the level in bytes, aligned to
// kTextureSubresourceAlignmentBytes.
uint32_t array_slice_stride_bytes;
// Distance from the beginning of the level to the next stored one.
uint32_t next_level_distance_bytes() const {
return array_slice_stride_bytes * array_size;
}
// Estimated amount of memory this level occupies, and variables involved in
// its calculation. Not aligned to kTextureSubresourceAlignmentBytes. For
// tiled textures, this will be rounded to 32x32x4 blocks (or 32x32x1
// depending on the dimension), and for the linear packed mip tail, this will
// be rounded to the same amount of texels, but for the linear subresources
// that are not the packed mip tail, this may be significantly (including less
// 4 KB pages) smaller than the aligned size (like for Test Drive Unlimited
// allocating 4 KB for a 2x2 linear k_8_8_8_8 texture that would be stored
// like 64x32 and take 8 KB). If the width is bigger than the pitch, this will
// also be taken into account for the last row so all memory actually used by
// the texture will be loaded, and may be bigger than the distance between
// array slices or levels. The purpose of this parameter is to make the memory
// amount that needs to be resident as close to the real amount as possible,
// to make sure all the needed data will be read, but also, if possible,
// unneeded memory pages won't be accessed (since that may trigger an access
// violation on the CPU).
uint32_t x_extent_blocks;
uint32_t y_extent_blocks;
uint32_t z_extent;
uint32_t array_slice_data_extent_bytes;
uint32_t level_data_extent_bytes;
};
// is_base == true - level must be 0 (for the base_address part).
// is_base == false - level may be 0 if is_packed_level is true (for the packed
// tail of mip_address part if the texture is very small so the tail is stored
// like mip 0).
TextureGuestLevelLayout GetGuestLevelLayout(
xenos::DataDimension dimension, uint32_t base_pitch_texels_div_32,
uint32_t width_texels, uint32_t height_texels, uint32_t depth_or_array_size,
bool is_tiled, xenos::TextureFormat format, bool is_mip, uint32_t level,
bool is_packed_level);
struct TextureGuestLayout { struct TextureGuestLayout {
TextureGuestLevelLayout base; struct Level {
// Distance between each row of blocks in bytes, including all the needed
// power of two (for mips) and 256-byte (for linear textures) alignment.
uint32_t row_pitch_bytes;
// Distance between Z slices in block rows, aligned to power of two for
// mips, and to tile height.
uint32_t z_slice_stride_block_rows;
// Distance between each array slice within the level in bytes, aligned to
// kTextureSubresourceAlignmentBytes. The distance to the next level is this
// multiplied by the array slice count.
uint32_t array_slice_stride_bytes;
// Estimated amount of memory this level occupies, and variables involved in
// its calculation. Not aligned to kTextureSubresourceAlignmentBytes. For
// tiled textures, this will be rounded to 32x32x4 blocks (or 32x32x1
// depending on the dimension), but for the linear subresources, this may be
// significantly (including less 4 KB pages) smaller than the aligned size
// (like for Ridge Racer Unbounded where aligning the height of a 1280x720
// linear texture results in access violations). For the linear mip tail,
// this includes all the mip levels stored in it. If the width is bigger
// than the pitch, this will also be taken into account for the last row so
// all memory actually used by the texture will be loaded, and may be bigger
// than the distance between array slices or levels. The purpose of this
// parameter is to make the memory amount that needs to be resident as close
// to the real amount as possible, to make sure all the needed data will be
// read, but also, if possible, unneeded memory pages won't be accessed
// (since that may trigger an access violation on the CPU).
uint32_t x_extent_blocks;
uint32_t y_extent_blocks;
uint32_t z_extent;
uint32_t array_slice_data_extent_bytes;
// Including all array slices.
uint32_t level_data_extent_bytes;
};
Level base;
// If mip_max_level specified at calculation time is at least 1, the stored // If mip_max_level specified at calculation time is at least 1, the stored
// mips are min(1, packed_mip_level) through min(mip_max_level, // mips are min(1, packed_mip_level) through min(mip_max_level,
// packed_mip_level). // packed_mip_level).
TextureGuestLevelLayout mips[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1]; Level mips[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1];
uint32_t mip_offsets_bytes[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1]; uint32_t mip_offsets_bytes[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1];
uint32_t mips_total_extent_bytes; uint32_t mips_total_extent_bytes;
uint32_t max_level; uint32_t max_level;
// UINT32_MAX if there's no packed mip tail. // UINT32_MAX if there's no packed mip tail.
uint32_t packed_level; uint32_t packed_level;
uint32_t array_size;
}; };
TextureGuestLayout GetGuestTextureLayout( TextureGuestLayout GetGuestTextureLayout(