[GPU] Linear mip tail exact extent estimation
This commit is contained in:
parent
a69a058991
commit
dd5ea87213
|
@ -2093,15 +2093,36 @@ void TextureCache::BindingInfoFromFetchConstant(
|
|||
// No texture data at all.
|
||||
return;
|
||||
}
|
||||
if (fetch.dimension == xenos::DataDimension::k1D) {
|
||||
bool is_invalid_1d = false;
|
||||
// TODO(Triang3l): Support long 1D textures.
|
||||
if (fetch.dimension == xenos::DataDimension::k1D &&
|
||||
width > xenos::kTexture2DCubeMaxWidthHeight) {
|
||||
if (width > xenos::kTexture2DCubeMaxWidthHeight) {
|
||||
XELOGE(
|
||||
"1D texture is too wide ({}) - ignoring! "
|
||||
"Report the game to Xenia developers",
|
||||
"1D texture is too wide ({}) - ignoring! Report the game to Xenia "
|
||||
"developers",
|
||||
width);
|
||||
is_invalid_1d = true;
|
||||
}
|
||||
assert_false(fetch.tiled);
|
||||
if (fetch.tiled) {
|
||||
XELOGE(
|
||||
"1D texture has tiling enabled in the fetch constant, but this "
|
||||
"appears to be completely wrong - ignoring! Report the game to Xenia "
|
||||
"developers");
|
||||
is_invalid_1d = true;
|
||||
}
|
||||
assert_false(fetch.packed_mips);
|
||||
if (fetch.packed_mips) {
|
||||
XELOGE(
|
||||
"1D texture has packed mips enabled in the fetch constant, but this "
|
||||
"appears to be completely wrong - ignoring! Report the game to Xenia "
|
||||
"developers");
|
||||
is_invalid_1d = true;
|
||||
}
|
||||
if (is_invalid_1d) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
xenos::TextureFormat format = GetBaseFormat(fetch.format);
|
||||
|
||||
|
@ -2411,7 +2432,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
|||
if (!level_packed) {
|
||||
// Loading the packed tail for the base - load the whole tail to copy
|
||||
// regions out of it.
|
||||
const texture_util::TextureGuestLevelLayout& guest_layout_base =
|
||||
const texture_util::TextureGuestLayout::Level& guest_layout_base =
|
||||
texture->guest_layout.base;
|
||||
host_slice_layout_base.Footprint.Width =
|
||||
guest_layout_base.x_extent_blocks * block_width;
|
||||
|
@ -2452,7 +2473,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
|||
if (level == level_packed) {
|
||||
// Loading the packed tail for the mips - load the whole tail to copy
|
||||
// regions out of it.
|
||||
const texture_util::TextureGuestLevelLayout&
|
||||
const texture_util::TextureGuestLayout::Level&
|
||||
guest_layout_packed_mips = texture->guest_layout.mips[level];
|
||||
host_slice_layout_mip.Footprint.Width =
|
||||
guest_layout_packed_mips.x_extent_blocks * block_width;
|
||||
|
@ -2634,7 +2655,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
|||
load_constants.guest_offset +=
|
||||
texture->guest_layout.mip_offsets_bytes[level];
|
||||
}
|
||||
const texture_util::TextureGuestLevelLayout& level_guest_layout =
|
||||
const texture_util::TextureGuestLayout::Level& level_guest_layout =
|
||||
is_base ? texture->guest_layout.base
|
||||
: texture->guest_layout.mips[level];
|
||||
uint32_t level_guest_pitch = level_guest_layout.row_pitch_bytes;
|
||||
|
|
|
@ -960,10 +960,10 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
|
|||
// Need a subregion size, not the full subresource size - thus not aligning
|
||||
// to xenos::kTextureSubresourceAlignmentBytes.
|
||||
copy_dest_length =
|
||||
texture_util::GetGuestLevelLayout(
|
||||
texture_util::GetGuestTextureLayout(
|
||||
dest_dimension, copy_dest_pitch_aligned_div_32, uint32_t(x1 - x0),
|
||||
dest_height, dest_depth, true, dest_format, false, 0, false)
|
||||
.level_data_extent_bytes;
|
||||
dest_height, dest_depth, true, dest_format, false, true, 0)
|
||||
.base.level_data_extent_bytes;
|
||||
} else {
|
||||
XELOGE("Tried to resolve to format {}, which is not a ColorFormat",
|
||||
dest_format_info.name);
|
||||
|
|
|
@ -204,35 +204,22 @@ bool GetPackedMipOffset(uint32_t width, uint32_t height, uint32_t depth,
|
|||
return true;
|
||||
}
|
||||
|
||||
TextureGuestLevelLayout GetGuestLevelLayout(
|
||||
TextureGuestLayout GetGuestTextureLayout(
|
||||
xenos::DataDimension dimension, uint32_t base_pitch_texels_div_32,
|
||||
uint32_t width_texels, uint32_t height_texels, uint32_t depth_or_array_size,
|
||||
bool is_tiled, xenos::TextureFormat format, bool is_mip, uint32_t level,
|
||||
bool is_packed_level) {
|
||||
// If with packed mips the mips 1... happen to be packed in what's stored as
|
||||
// mip 0, this mip tail appears to be stored like mips (with power of two size
|
||||
// rounding) rather than like the base level (with the pitch from the fetch
|
||||
// constant), so we distinguish between them for mip == 0.
|
||||
// Base is by definition the level 0.
|
||||
assert_false(!is_mip && level);
|
||||
// Level 0 for mips is the special case for a packed mip tail of very small
|
||||
// textures, where the tail is stored like it's at the level 0.
|
||||
assert_false(is_mip && !level && !is_packed_level);
|
||||
bool is_tiled, xenos::TextureFormat format, bool has_packed_levels,
|
||||
bool has_base, uint32_t max_level) {
|
||||
TextureGuestLayout layout;
|
||||
|
||||
TextureGuestLevelLayout layout;
|
||||
|
||||
// For safety, for instance, with empty resolve regions (extents calculation
|
||||
// may overflow otherwise due to the assumption of at least one row, for
|
||||
// example, but an empty texture is empty anyway).
|
||||
if (!width_texels ||
|
||||
(dimension != xenos::DataDimension::k1D && !height_texels) ||
|
||||
((dimension == xenos::DataDimension::k2DOrStacked ||
|
||||
dimension == xenos::DataDimension::k3D) &&
|
||||
!depth_or_array_size)) {
|
||||
std::memset(&layout, 0, sizeof(layout));
|
||||
return layout;
|
||||
if (dimension == xenos::DataDimension::k1D) {
|
||||
assert_false(is_tiled);
|
||||
// GetPackedMipOffset may result in packing along Y for `width > height`
|
||||
// textures.
|
||||
assert_false(has_packed_levels);
|
||||
height_texels = 1;
|
||||
}
|
||||
|
||||
uint32_t depth =
|
||||
dimension == xenos::DataDimension::k3D ? depth_or_array_size : 1;
|
||||
switch (dimension) {
|
||||
case xenos::DataDimension::k2DOrStacked:
|
||||
layout.array_size = depth_or_array_size;
|
||||
|
@ -244,158 +231,17 @@ TextureGuestLevelLayout GetGuestLevelLayout(
|
|||
layout.array_size = 1;
|
||||
}
|
||||
|
||||
const FormatInfo* format_info = FormatInfo::Get(format);
|
||||
uint32_t bytes_per_block = format_info->bytes_per_block();
|
||||
|
||||
// Calculate the strides.
|
||||
// Mips have row / depth slice strides calculated from a mip of a texture
|
||||
// whose base size is a power of two.
|
||||
// The base mip has tightly packed depth slices, and takes the row pitch from
|
||||
// the fetch constant.
|
||||
// For stride calculation purposes, mip dimensions are always aligned to
|
||||
// 32x32x4 blocks (or x1 for the missing dimensions), including for linear
|
||||
// textures.
|
||||
// Linear texture rows are 256-byte-aligned.
|
||||
uint32_t row_pitch_texels_unaligned;
|
||||
uint32_t z_slice_stride_texel_rows_unaligned;
|
||||
if (is_mip) {
|
||||
row_pitch_texels_unaligned =
|
||||
std::max(xe::next_pow2(width_texels) >> level, uint32_t(1));
|
||||
z_slice_stride_texel_rows_unaligned =
|
||||
std::max(xe::next_pow2(height_texels) >> level, uint32_t(1));
|
||||
} else {
|
||||
row_pitch_texels_unaligned = base_pitch_texels_div_32 << 5;
|
||||
z_slice_stride_texel_rows_unaligned = height_texels;
|
||||
}
|
||||
uint32_t row_pitch_blocks_tile_aligned = xe::align(
|
||||
xe::align(row_pitch_texels_unaligned, format_info->block_width) /
|
||||
format_info->block_width,
|
||||
xenos::kTextureTileWidthHeight);
|
||||
layout.row_pitch_bytes = row_pitch_blocks_tile_aligned * bytes_per_block;
|
||||
// Assuming the provided pitch is already 256-byte-aligned for linear, but
|
||||
// considering the guest-provided pitch more important (no information about
|
||||
// how the GPU actually handles unaligned rows).
|
||||
if (!is_tiled && is_mip) {
|
||||
layout.row_pitch_bytes = xe::align(layout.row_pitch_bytes,
|
||||
xenos::kTextureLinearRowAlignmentBytes);
|
||||
}
|
||||
layout.z_slice_stride_block_rows =
|
||||
dimension != xenos::DataDimension::k1D
|
||||
? xe::align(xe::align(z_slice_stride_texel_rows_unaligned,
|
||||
format_info->block_height) /
|
||||
format_info->block_height,
|
||||
xenos::kTextureTileWidthHeight)
|
||||
: 1;
|
||||
layout.array_slice_stride_bytes =
|
||||
layout.row_pitch_bytes * layout.z_slice_stride_block_rows;
|
||||
uint32_t z_stride_bytes = layout.array_slice_stride_bytes;
|
||||
if (dimension == xenos::DataDimension::k3D) {
|
||||
layout.array_slice_stride_bytes *=
|
||||
xe::align(depth_or_array_size, xenos::kTextureTiledDepthGranularity);
|
||||
}
|
||||
uint32_t array_slice_stride_bytes_non_4kb_aligned =
|
||||
layout.array_slice_stride_bytes;
|
||||
layout.array_slice_stride_bytes =
|
||||
xe::align(array_slice_stride_bytes_non_4kb_aligned,
|
||||
xenos::kTextureSubresourceAlignmentBytes);
|
||||
|
||||
// Estimate the memory amount actually referenced by the texture, which may be
|
||||
// smaller (especially in the 2x2 linear k_8_8_8_8 case in Test Drive
|
||||
// Unlimited, for which 4 KB are allocated, while the stride is 8 KB) or
|
||||
// bigger than the stride. For tiled textures, this is the dimensions aligned
|
||||
// to 32x32x4 blocks (or x1 for the missing dimensions).
|
||||
// For linear, doing almost the same for the mip tail (which can be used for
|
||||
// both the mips and, if the texture is very small, the base) because it
|
||||
// stores multiple mips outside the first mip in it in the tile padding
|
||||
// (though there's no need to align the size to the next power of two for this
|
||||
// purpose for mips - packed mips are only used when min(width, height) <= 16,
|
||||
// and packing is first done along the shorter axis - even if the longer axis
|
||||
// is larger than 32, nothing will be packed beyond the extent of the longer
|
||||
// axis). "Almost" because for linear textures, we're rounding the size to
|
||||
// 32x32x4 texels, not blocks - first packed mips start from 16-texel, not
|
||||
// 16-block, shortest dimension, and are placed in 32x- or x32-texel tiles,
|
||||
// while 32 blocks for compressed textures are bigger in memory than 32
|
||||
// texels.
|
||||
layout.x_extent_blocks = xe::align(width_texels, format_info->block_width) /
|
||||
format_info->block_width;
|
||||
layout.y_extent_blocks =
|
||||
dimension != xenos::DataDimension::k1D
|
||||
? xe::align(height_texels, format_info->block_height) /
|
||||
format_info->block_height
|
||||
: 1;
|
||||
layout.z_extent =
|
||||
dimension == xenos::DataDimension::k3D ? depth_or_array_size : 1;
|
||||
if (is_tiled) {
|
||||
layout.x_extent_blocks =
|
||||
xe::align(layout.x_extent_blocks, xenos::kTextureTileWidthHeight);
|
||||
assert_true(dimension != xenos::DataDimension::k1D);
|
||||
layout.y_extent_blocks =
|
||||
xe::align(layout.y_extent_blocks, xenos::kTextureTileWidthHeight);
|
||||
if (dimension == xenos::DataDimension::k3D) {
|
||||
layout.z_extent =
|
||||
xe::align(layout.z_extent, xenos::kTextureTiledDepthGranularity);
|
||||
// 3D texture addressing is pretty complex, so it's hard to determine the
|
||||
// memory extent of a subregion - just use pitch_tiles * height_tiles *
|
||||
// depth_tiles * bytes_per_tile at least for now, until we find a case
|
||||
// where it causes issues. width > pitch is a very weird edge case anyway,
|
||||
// and is extremely unlikely.
|
||||
assert_true(layout.x_extent_blocks <= row_pitch_blocks_tile_aligned);
|
||||
layout.array_slice_data_extent_bytes =
|
||||
array_slice_stride_bytes_non_4kb_aligned;
|
||||
} else {
|
||||
// 2D 32x32-block tiles are laid out linearly in the texture.
|
||||
// Calculate the extent as ((all rows except for the last * pitch in
|
||||
// tiles + last row length in tiles) * bytes per tile).
|
||||
layout.array_slice_data_extent_bytes =
|
||||
(layout.y_extent_blocks - xenos::kTextureTileWidthHeight) *
|
||||
layout.row_pitch_bytes +
|
||||
bytes_per_block * layout.x_extent_blocks *
|
||||
xenos::kTextureTileWidthHeight;
|
||||
}
|
||||
} else {
|
||||
if (is_packed_level) {
|
||||
layout.x_extent_blocks =
|
||||
xe::align(layout.x_extent_blocks,
|
||||
xenos::kTextureTileWidthHeight / format_info->block_width);
|
||||
if (dimension != xenos::DataDimension::k1D) {
|
||||
layout.y_extent_blocks =
|
||||
xe::align(layout.y_extent_blocks, xenos::kTextureTileWidthHeight /
|
||||
format_info->block_height);
|
||||
if (dimension == xenos::DataDimension::k3D) {
|
||||
layout.z_extent =
|
||||
xe::align(layout.z_extent, xenos::kTextureTiledDepthGranularity);
|
||||
}
|
||||
}
|
||||
}
|
||||
layout.array_slice_data_extent_bytes =
|
||||
z_stride_bytes * (layout.z_extent - 1) +
|
||||
layout.row_pitch_bytes * (layout.y_extent_blocks - 1) +
|
||||
bytes_per_block * layout.x_extent_blocks;
|
||||
}
|
||||
layout.level_data_extent_bytes =
|
||||
layout.array_slice_stride_bytes * (layout.array_size - 1) +
|
||||
layout.array_slice_data_extent_bytes;
|
||||
|
||||
// For safety, for instance, with empty resolve regions (extents calculation
|
||||
// may overflow otherwise due to the assumption of at least one row, for
|
||||
// example, but an empty texture is empty anyway).
|
||||
if (!width_texels || !height_texels || !depth || !layout.array_size) {
|
||||
std::memset(&layout, 0, sizeof(layout));
|
||||
return layout;
|
||||
}
|
||||
|
||||
TextureGuestLayout GetGuestTextureLayout(
|
||||
xenos::DataDimension dimension, uint32_t base_pitch_texels_div_32,
|
||||
uint32_t width_texels, uint32_t height_texels, uint32_t depth_or_array_size,
|
||||
bool is_tiled, xenos::TextureFormat format, bool has_packed_levels,
|
||||
bool has_base, uint32_t max_level) {
|
||||
TextureGuestLayout layout;
|
||||
|
||||
if (dimension == xenos::DataDimension::k1D) {
|
||||
height_texels = 1;
|
||||
}
|
||||
|
||||
// For safety, clamp the maximum level.
|
||||
uint32_t longest_axis = std::max(width_texels, height_texels);
|
||||
if (dimension == xenos::DataDimension::k3D) {
|
||||
longest_axis = std::max(longest_axis, depth_or_array_size);
|
||||
}
|
||||
uint32_t max_level_for_dimensions = xe::log2_floor(longest_axis);
|
||||
uint32_t max_level_for_dimensions =
|
||||
xe::log2_floor(std::max(std::max(width_texels, height_texels), depth));
|
||||
assert_true(max_level <= max_level_for_dimensions);
|
||||
max_level = std::min(max_level, max_level_for_dimensions);
|
||||
layout.max_level = max_level;
|
||||
|
@ -404,33 +250,210 @@ TextureGuestLayout GetGuestTextureLayout(
|
|||
? GetPackedMipLevel(width_texels, height_texels)
|
||||
: UINT32_MAX;
|
||||
|
||||
if (has_base) {
|
||||
layout.base =
|
||||
GetGuestLevelLayout(dimension, base_pitch_texels_div_32, width_texels,
|
||||
height_texels, depth_or_array_size, is_tiled,
|
||||
format, false, 0, layout.packed_level == 0);
|
||||
} else {
|
||||
// Clear unused level layouts to zero strides/sizes.
|
||||
if (!has_base) {
|
||||
std::memset(&layout.base, 0, sizeof(layout.base));
|
||||
}
|
||||
|
||||
std::memset(layout.mips, 0, sizeof(layout.mips));
|
||||
std::memset(layout.mip_offsets_bytes, 0, sizeof(layout.mip_offsets_bytes));
|
||||
if (layout.packed_level != 0) {
|
||||
std::memset(&layout.mips[0], 0, sizeof(layout.mips[0]));
|
||||
}
|
||||
uint32_t max_stored_level = std::min(max_level, layout.packed_level);
|
||||
{
|
||||
uint32_t mips_end = max_stored_level + 1;
|
||||
assert_true(mips_end <= xe::countof(layout.mips));
|
||||
uint32_t mips_unused_count = uint32_t(xe::countof(layout.mips)) - mips_end;
|
||||
if (mips_unused_count) {
|
||||
std::memset(&layout.mips[mips_end], 0,
|
||||
sizeof(layout.mips[0]) * mips_unused_count);
|
||||
std::memset(&layout.mip_offsets_bytes[mips_end], 0,
|
||||
sizeof(layout.mip_offsets_bytes[0]) * mips_unused_count);
|
||||
}
|
||||
}
|
||||
layout.mips_total_extent_bytes = 0;
|
||||
if (max_level) {
|
||||
|
||||
const FormatInfo* format_info = FormatInfo::Get(format);
|
||||
uint32_t bytes_per_block = format_info->bytes_per_block();
|
||||
|
||||
// The loop counter can mean two things depending on whether the packed mip
|
||||
// tail is stored as mip 0, because in this case, it would be ambiguous since
|
||||
// both the base and the mips would be on "level 0", but stored separately and
|
||||
// possibly with a different layout.
|
||||
uint32_t loop_level_last;
|
||||
if (layout.packed_level == 0) {
|
||||
// Packed mip tail is the level 0 - may need to load mip tails for the base,
|
||||
// the mips, or both.
|
||||
// Loop iteration 0 - base packed mip tail.
|
||||
// Loop iteration 1 - mips packed mip tail.
|
||||
loop_level_last = uint32_t(max_level != 0);
|
||||
} else {
|
||||
// Packed mip tail is not the level 0.
|
||||
// Loop iteration is the actual level being loaded.
|
||||
loop_level_last = max_stored_level;
|
||||
}
|
||||
uint32_t mip_offset_bytes = 0;
|
||||
uint32_t max_stored_mip = std::min(max_level, layout.packed_level);
|
||||
for (uint32_t mip = std::min(uint32_t(1), layout.packed_level);
|
||||
mip <= max_stored_mip; ++mip) {
|
||||
layout.mip_offsets_bytes[mip] = mip_offset_bytes;
|
||||
TextureGuestLevelLayout& mip_layout = layout.mips[mip];
|
||||
mip_layout =
|
||||
GetGuestLevelLayout(dimension, base_pitch_texels_div_32, width_texels,
|
||||
height_texels, depth_or_array_size, is_tiled,
|
||||
format, true, mip, mip == layout.packed_level);
|
||||
for (uint32_t loop_level = has_base ? 0 : 1; loop_level <= loop_level_last;
|
||||
++loop_level) {
|
||||
bool is_base = loop_level == 0;
|
||||
uint32_t level = (layout.packed_level == 0) ? 0 : loop_level;
|
||||
TextureGuestLayout::Level& level_layout =
|
||||
is_base ? layout.base : layout.mips[level];
|
||||
|
||||
// Calculate the strides.
|
||||
// Mips have row / depth slice strides calculated from a mip of a texture
|
||||
// whose base size is a power of two.
|
||||
// The base mip has tightly packed depth slices, and takes the row pitch
|
||||
// from the fetch constant.
|
||||
// For stride calculation purposes, mip dimensions are always aligned to
|
||||
// 32x32x4 blocks (or x1 for the missing dimensions), including for linear
|
||||
// textures.
|
||||
// Linear texture rows are 256-byte-aligned.
|
||||
uint32_t row_pitch_texels_unaligned;
|
||||
uint32_t z_slice_stride_texel_rows_unaligned;
|
||||
if (is_base) {
|
||||
row_pitch_texels_unaligned = base_pitch_texels_div_32 << 5;
|
||||
z_slice_stride_texel_rows_unaligned = height_texels;
|
||||
} else {
|
||||
row_pitch_texels_unaligned =
|
||||
std::max(xe::next_pow2(width_texels) >> level, uint32_t(1));
|
||||
z_slice_stride_texel_rows_unaligned =
|
||||
std::max(xe::next_pow2(height_texels) >> level, uint32_t(1));
|
||||
}
|
||||
uint32_t row_pitch_blocks_tile_aligned = xe::align(
|
||||
xe::align(row_pitch_texels_unaligned, format_info->block_width) /
|
||||
format_info->block_width,
|
||||
xenos::kTextureTileWidthHeight);
|
||||
level_layout.row_pitch_bytes =
|
||||
row_pitch_blocks_tile_aligned * bytes_per_block;
|
||||
// Assuming the provided pitch is already 256-byte-aligned for linear, but
|
||||
// considering the guest-provided pitch more important (no information about
|
||||
// how the GPU actually handles unaligned rows).
|
||||
if (!is_tiled && !is_base) {
|
||||
level_layout.row_pitch_bytes = xe::align(
|
||||
level_layout.row_pitch_bytes, xenos::kTextureLinearRowAlignmentBytes);
|
||||
}
|
||||
level_layout.z_slice_stride_block_rows =
|
||||
dimension != xenos::DataDimension::k1D
|
||||
? xe::align(xe::align(z_slice_stride_texel_rows_unaligned,
|
||||
format_info->block_height) /
|
||||
format_info->block_height,
|
||||
xenos::kTextureTileWidthHeight)
|
||||
: 1;
|
||||
level_layout.array_slice_stride_bytes =
|
||||
level_layout.row_pitch_bytes * level_layout.z_slice_stride_block_rows;
|
||||
uint32_t z_stride_bytes = level_layout.array_slice_stride_bytes;
|
||||
if (dimension == xenos::DataDimension::k3D) {
|
||||
level_layout.array_slice_stride_bytes *=
|
||||
xe::align(depth_or_array_size, xenos::kTextureTiledDepthGranularity);
|
||||
}
|
||||
uint32_t array_slice_stride_bytes_non_4kb_aligned =
|
||||
level_layout.array_slice_stride_bytes;
|
||||
level_layout.array_slice_stride_bytes =
|
||||
xe::align(array_slice_stride_bytes_non_4kb_aligned,
|
||||
xenos::kTextureSubresourceAlignmentBytes);
|
||||
|
||||
// Estimate the memory amount actually referenced by the texture, which may
|
||||
// be smaller (especially in the 1280x720 linear k_8_8_8_8 case in Ridge
|
||||
// Racer Unbounded, for which memory exactly for 1280x720 is allocated, and
|
||||
// aligning the height to 32 would cause access of an unallocated page) or
|
||||
// bigger than the stride. For tiled textures, this is the dimensions
|
||||
// aligned to 32x32x4 blocks (or x1 for the missing dimensions).
|
||||
uint32_t level_width_blocks =
|
||||
xe::align(std::max(width_texels >> level, uint32_t(1)),
|
||||
format_info->block_width) /
|
||||
format_info->block_width;
|
||||
uint32_t level_height_blocks =
|
||||
xe::align(std::max(height_texels >> level, uint32_t(1)),
|
||||
format_info->block_height) /
|
||||
format_info->block_height;
|
||||
uint32_t level_depth = std::max(depth >> level, uint32_t(1));
|
||||
if (is_tiled) {
|
||||
level_layout.x_extent_blocks =
|
||||
xe::align(level_width_blocks, xenos::kTextureTileWidthHeight);
|
||||
level_layout.y_extent_blocks =
|
||||
xe::align(level_height_blocks, xenos::kTextureTileWidthHeight);
|
||||
if (dimension == xenos::DataDimension::k3D) {
|
||||
level_layout.z_extent =
|
||||
xe::align(level_depth, xenos::kTextureTiledDepthGranularity);
|
||||
// 3D texture addressing is pretty complex, so it's hard to determine
|
||||
// the memory extent of a subregion - just use `pitch_tiles *
|
||||
// height_tiles * depth_tiles * bytes_per_tile` at least for now, until
|
||||
// we find a case where it causes issues. `width > pitch` is a very
|
||||
// weird edge case anyway, and is extremely unlikely.
|
||||
assert_true(level_layout.x_extent_blocks <=
|
||||
row_pitch_blocks_tile_aligned);
|
||||
level_layout.array_slice_data_extent_bytes =
|
||||
array_slice_stride_bytes_non_4kb_aligned;
|
||||
} else {
|
||||
level_layout.z_extent = 1;
|
||||
// 2D 32x32-block tiles are laid out linearly in the texture.
|
||||
// Calculate the extent as ((all rows except for the last * pitch in
|
||||
// tiles + last row length in tiles) * bytes per tile).
|
||||
level_layout.array_slice_data_extent_bytes =
|
||||
(level_layout.y_extent_blocks - xenos::kTextureTileWidthHeight) *
|
||||
level_layout.row_pitch_bytes +
|
||||
bytes_per_block * level_layout.x_extent_blocks *
|
||||
xenos::kTextureTileWidthHeight;
|
||||
}
|
||||
} else {
|
||||
if (level == layout.packed_level) {
|
||||
// Calculate the portion of the mip tail actually used by the needed
|
||||
// mips. The actually used region may be significantly smaller than the
|
||||
// full 32x32-texel-aligned tail. A 2x2 texture (for example, in Test
|
||||
// Drive Unlimited, there's a 2x2 k_8_8_8_8 linear texture with packed
|
||||
// mips), for instance, would have its 2x2 base at (16, 0) and its 1x1
|
||||
// mip at (8, 0) - and we need 2 or 1 rows in these cases, not 32.
|
||||
level_layout.x_extent_blocks = 0;
|
||||
level_layout.y_extent_blocks = 0;
|
||||
level_layout.z_extent = 0;
|
||||
uint32_t packed_sublevel_last = is_base ? 0 : max_level;
|
||||
for (uint32_t packed_sublevel = layout.packed_level;
|
||||
packed_sublevel <= packed_sublevel_last; ++packed_sublevel) {
|
||||
uint32_t packed_sublevel_x_blocks;
|
||||
uint32_t packed_sublevel_y_blocks;
|
||||
uint32_t packed_sublevel_z;
|
||||
GetPackedMipOffset(width_texels, height_texels, depth, format,
|
||||
packed_sublevel, packed_sublevel_x_blocks,
|
||||
packed_sublevel_y_blocks, packed_sublevel_z);
|
||||
level_layout.x_extent_blocks = std::max(
|
||||
level_layout.x_extent_blocks,
|
||||
packed_sublevel_x_blocks +
|
||||
xe::align(
|
||||
std::max(width_texels >> packed_sublevel, uint32_t(1)),
|
||||
format_info->block_width) /
|
||||
format_info->block_width);
|
||||
level_layout.y_extent_blocks = std::max(
|
||||
level_layout.y_extent_blocks,
|
||||
packed_sublevel_y_blocks +
|
||||
xe::align(
|
||||
std::max(height_texels >> packed_sublevel, uint32_t(1)),
|
||||
format_info->block_height) /
|
||||
format_info->block_height);
|
||||
level_layout.z_extent =
|
||||
std::max(level_layout.z_extent,
|
||||
packed_sublevel_z +
|
||||
std::max(depth >> packed_sublevel, uint32_t(1)));
|
||||
}
|
||||
} else {
|
||||
level_layout.x_extent_blocks = level_width_blocks;
|
||||
level_layout.y_extent_blocks = level_height_blocks;
|
||||
level_layout.z_extent = level_depth;
|
||||
}
|
||||
level_layout.array_slice_data_extent_bytes =
|
||||
z_stride_bytes * (level_layout.z_extent - 1) +
|
||||
level_layout.row_pitch_bytes * (level_layout.y_extent_blocks - 1) +
|
||||
bytes_per_block * level_layout.x_extent_blocks;
|
||||
}
|
||||
level_layout.level_data_extent_bytes =
|
||||
level_layout.array_slice_stride_bytes * (layout.array_size - 1) +
|
||||
level_layout.array_slice_data_extent_bytes;
|
||||
|
||||
if (!is_base) {
|
||||
layout.mip_offsets_bytes[level] = mip_offset_bytes;
|
||||
layout.mips_total_extent_bytes =
|
||||
std::max(layout.mips_total_extent_bytes,
|
||||
mip_offset_bytes + mip_layout.level_data_extent_bytes);
|
||||
mip_offset_bytes += mip_layout.next_level_distance_bytes();
|
||||
mip_offset_bytes + level_layout.level_data_extent_bytes);
|
||||
mip_offset_bytes +=
|
||||
level_layout.array_slice_stride_bytes * layout.array_size;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -97,18 +97,20 @@ bool GetPackedMipOffset(uint32_t width, uint32_t height, uint32_t depth,
|
|||
// disassembly, which only checks the flag whether the data is packed passed to
|
||||
// it, not the level, to see if it needs to calculate the offset in the mip
|
||||
// tail, and the offset calculation function doesn't have level == 0 checks in
|
||||
// it, only early-out if level < packed tail level (which can be 0).
|
||||
// it, only early-out if level < packed tail level (which can be 0). There are
|
||||
// examples of textures with packed base, for example, in the intro level of
|
||||
// Prey (8x8 linear DXT1 - pairs of orange lights in the bottom of gambling
|
||||
// machines).
|
||||
//
|
||||
// Linear texture rows are aligned to 256 bytes, for both the base and the mips
|
||||
// (for the base, Direct3D 9 writes an already 256-byte-aligned pitch to the
|
||||
// fetch constant).
|
||||
//
|
||||
// However, all the 32x32x4 padding, being just padding, is not necessarily
|
||||
// being actually accessed, especially for linear textures. Test Drive Unlimited
|
||||
// has a 2x2 k_8_8_8_8 linear texture, and allocates 4 KB for it (with accessing
|
||||
// the page beyond it triggering an access violation), while a 32x32 k_8_8_8_8
|
||||
// linear texture, with rows aligned to 256 bytes (so stored like 64x32) would
|
||||
// take 8 KB. So, while for stride calculations all the padding must be
|
||||
// being actually accessed, especially for linear textures. Ridge Racer
|
||||
// Unbounded has a 1280x720 k_8_8_8_8 linear texture, and allocates memory for
|
||||
// exactly 1280x720, so aligning the height to 32 to 1280x736 results in access
|
||||
// violations. So, while for stride calculations all the padding must be
|
||||
// respected, for actual memory loads it's better to avoid trying to access it
|
||||
// when possible:
|
||||
// - If the pitch is bigger than the width, it's better to calculate the last
|
||||
|
@ -116,86 +118,69 @@ bool GetPackedMipOffset(uint32_t width, uint32_t height, uint32_t depth,
|
|||
// in the other direction though - pitch < width is a weird situation, but
|
||||
// probably legal, and may lead to reading data from beyond the calculated
|
||||
// subresource stride).
|
||||
// - For linear textures (like that 2x2 example from Test Drive Unlimited), it's
|
||||
// easy to calculate the exact memory extent that may be accessed knowing the
|
||||
// dimensions (unlike for tiled textures with complex addressing within
|
||||
// 32x32x4-block tiles), so there's no need to align them to 32x32x4 for
|
||||
// memory extent calculation - that's what appears to cause that crash in Test
|
||||
// Drive Unlimited.
|
||||
// - The exception here is the packed mip tail for linear textures, as smaller
|
||||
// mips are stored in the 32x32x4-texel padding. However, the packed mip
|
||||
// tail needs to be aligned only to 32x32 texels, not to 32x32 blocks - so
|
||||
// for compressed textures, the padding may be smaller, only to 8x8 blocks.
|
||||
// - For linear textures (like that 1280x720 example from Ridge Racer
|
||||
// Unbounded), it's easy to calculate the exact memory extent that may be
|
||||
// accessed knowing the dimensions (unlike for tiled textures with complex
|
||||
// addressing within 32x32x4-block tiles), so there's no need to align them to
|
||||
// 32x32x4 for memory extent calculation.
|
||||
// - For the linear packed mip tail, the extent can be calculated as max of
|
||||
// (block offsets + block extents) of all levels stored in it.
|
||||
//
|
||||
// 1D textures are always linear.
|
||||
// 1D textures are always linear and likely can't have packed mips (for `width >
|
||||
// height` textures, mip offset calculation may result in packing along Y).
|
||||
//
|
||||
// Array slices are stored within levels (this is different than how Direct3D
|
||||
// 10+ builds subresource indices, for instance). Each array slice or level is
|
||||
// aligned to 4 KB (but this doesn't apply to 3D texture slices within one
|
||||
// level).
|
||||
|
||||
struct TextureGuestLevelLayout {
|
||||
// Number of array slices within the mip.
|
||||
uint32_t array_size;
|
||||
|
||||
struct TextureGuestLayout {
|
||||
struct Level {
|
||||
// Distance between each row of blocks in bytes, including all the needed
|
||||
// power of two (for mips) and 256-byte (for linear textures) alignment.
|
||||
uint32_t row_pitch_bytes;
|
||||
// Distance between Z slices in block rows, aligned to power of two for mips,
|
||||
// and to tile height.
|
||||
// Distance between Z slices in block rows, aligned to power of two for
|
||||
// mips, and to tile height.
|
||||
uint32_t z_slice_stride_block_rows;
|
||||
// Distance between each array slice within the level in bytes, aligned to
|
||||
// kTextureSubresourceAlignmentBytes.
|
||||
// kTextureSubresourceAlignmentBytes. The distance to the next level is this
|
||||
// multiplied by the array slice count.
|
||||
uint32_t array_slice_stride_bytes;
|
||||
// Distance from the beginning of the level to the next stored one.
|
||||
uint32_t next_level_distance_bytes() const {
|
||||
return array_slice_stride_bytes * array_size;
|
||||
}
|
||||
|
||||
// Estimated amount of memory this level occupies, and variables involved in
|
||||
// its calculation. Not aligned to kTextureSubresourceAlignmentBytes. For
|
||||
// tiled textures, this will be rounded to 32x32x4 blocks (or 32x32x1
|
||||
// depending on the dimension), and for the linear packed mip tail, this will
|
||||
// be rounded to the same amount of texels, but for the linear subresources
|
||||
// that are not the packed mip tail, this may be significantly (including less
|
||||
// 4 KB pages) smaller than the aligned size (like for Test Drive Unlimited
|
||||
// allocating 4 KB for a 2x2 linear k_8_8_8_8 texture that would be stored
|
||||
// like 64x32 and take 8 KB). If the width is bigger than the pitch, this will
|
||||
// also be taken into account for the last row so all memory actually used by
|
||||
// the texture will be loaded, and may be bigger than the distance between
|
||||
// array slices or levels. The purpose of this parameter is to make the memory
|
||||
// amount that needs to be resident as close to the real amount as possible,
|
||||
// to make sure all the needed data will be read, but also, if possible,
|
||||
// unneeded memory pages won't be accessed (since that may trigger an access
|
||||
// violation on the CPU).
|
||||
// depending on the dimension), but for the linear subresources, this may be
|
||||
// significantly (including less 4 KB pages) smaller than the aligned size
|
||||
// (like for Ridge Racer Unbounded where aligning the height of a 1280x720
|
||||
// linear texture results in access violations). For the linear mip tail,
|
||||
// this includes all the mip levels stored in it. If the width is bigger
|
||||
// than the pitch, this will also be taken into account for the last row so
|
||||
// all memory actually used by the texture will be loaded, and may be bigger
|
||||
// than the distance between array slices or levels. The purpose of this
|
||||
// parameter is to make the memory amount that needs to be resident as close
|
||||
// to the real amount as possible, to make sure all the needed data will be
|
||||
// read, but also, if possible, unneeded memory pages won't be accessed
|
||||
// (since that may trigger an access violation on the CPU).
|
||||
uint32_t x_extent_blocks;
|
||||
uint32_t y_extent_blocks;
|
||||
uint32_t z_extent;
|
||||
uint32_t array_slice_data_extent_bytes;
|
||||
// Including all array slices.
|
||||
uint32_t level_data_extent_bytes;
|
||||
};
|
||||
|
||||
// is_base == true - level must be 0 (for the base_address part).
|
||||
// is_base == false - level may be 0 if is_packed_level is true (for the packed
|
||||
// tail of mip_address part if the texture is very small so the tail is stored
|
||||
// like mip 0).
|
||||
TextureGuestLevelLayout GetGuestLevelLayout(
|
||||
xenos::DataDimension dimension, uint32_t base_pitch_texels_div_32,
|
||||
uint32_t width_texels, uint32_t height_texels, uint32_t depth_or_array_size,
|
||||
bool is_tiled, xenos::TextureFormat format, bool is_mip, uint32_t level,
|
||||
bool is_packed_level);
|
||||
|
||||
struct TextureGuestLayout {
|
||||
TextureGuestLevelLayout base;
|
||||
Level base;
|
||||
// If mip_max_level specified at calculation time is at least 1, the stored
|
||||
// mips are min(1, packed_mip_level) through min(mip_max_level,
|
||||
// packed_mip_level).
|
||||
TextureGuestLevelLayout mips[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1];
|
||||
Level mips[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1];
|
||||
uint32_t mip_offsets_bytes[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1];
|
||||
uint32_t mips_total_extent_bytes;
|
||||
uint32_t max_level;
|
||||
// UINT32_MAX if there's no packed mip tail.
|
||||
uint32_t packed_level;
|
||||
uint32_t array_size;
|
||||
};
|
||||
|
||||
TextureGuestLayout GetGuestTextureLayout(
|
||||
|
|
Loading…
Reference in New Issue