[GPU] Linear mip tail exact extent estimation
This commit is contained in:
parent
a69a058991
commit
dd5ea87213
|
@ -2093,15 +2093,36 @@ void TextureCache::BindingInfoFromFetchConstant(
|
||||||
// No texture data at all.
|
// No texture data at all.
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (fetch.dimension == xenos::DataDimension::k1D) {
|
||||||
|
bool is_invalid_1d = false;
|
||||||
// TODO(Triang3l): Support long 1D textures.
|
// TODO(Triang3l): Support long 1D textures.
|
||||||
if (fetch.dimension == xenos::DataDimension::k1D &&
|
if (width > xenos::kTexture2DCubeMaxWidthHeight) {
|
||||||
width > xenos::kTexture2DCubeMaxWidthHeight) {
|
|
||||||
XELOGE(
|
XELOGE(
|
||||||
"1D texture is too wide ({}) - ignoring! "
|
"1D texture is too wide ({}) - ignoring! Report the game to Xenia "
|
||||||
"Report the game to Xenia developers",
|
"developers",
|
||||||
width);
|
width);
|
||||||
|
is_invalid_1d = true;
|
||||||
|
}
|
||||||
|
assert_false(fetch.tiled);
|
||||||
|
if (fetch.tiled) {
|
||||||
|
XELOGE(
|
||||||
|
"1D texture has tiling enabled in the fetch constant, but this "
|
||||||
|
"appears to be completely wrong - ignoring! Report the game to Xenia "
|
||||||
|
"developers");
|
||||||
|
is_invalid_1d = true;
|
||||||
|
}
|
||||||
|
assert_false(fetch.packed_mips);
|
||||||
|
if (fetch.packed_mips) {
|
||||||
|
XELOGE(
|
||||||
|
"1D texture has packed mips enabled in the fetch constant, but this "
|
||||||
|
"appears to be completely wrong - ignoring! Report the game to Xenia "
|
||||||
|
"developers");
|
||||||
|
is_invalid_1d = true;
|
||||||
|
}
|
||||||
|
if (is_invalid_1d) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
xenos::TextureFormat format = GetBaseFormat(fetch.format);
|
xenos::TextureFormat format = GetBaseFormat(fetch.format);
|
||||||
|
|
||||||
|
@ -2411,7 +2432,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
||||||
if (!level_packed) {
|
if (!level_packed) {
|
||||||
// Loading the packed tail for the base - load the whole tail to copy
|
// Loading the packed tail for the base - load the whole tail to copy
|
||||||
// regions out of it.
|
// regions out of it.
|
||||||
const texture_util::TextureGuestLevelLayout& guest_layout_base =
|
const texture_util::TextureGuestLayout::Level& guest_layout_base =
|
||||||
texture->guest_layout.base;
|
texture->guest_layout.base;
|
||||||
host_slice_layout_base.Footprint.Width =
|
host_slice_layout_base.Footprint.Width =
|
||||||
guest_layout_base.x_extent_blocks * block_width;
|
guest_layout_base.x_extent_blocks * block_width;
|
||||||
|
@ -2452,7 +2473,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
||||||
if (level == level_packed) {
|
if (level == level_packed) {
|
||||||
// Loading the packed tail for the mips - load the whole tail to copy
|
// Loading the packed tail for the mips - load the whole tail to copy
|
||||||
// regions out of it.
|
// regions out of it.
|
||||||
const texture_util::TextureGuestLevelLayout&
|
const texture_util::TextureGuestLayout::Level&
|
||||||
guest_layout_packed_mips = texture->guest_layout.mips[level];
|
guest_layout_packed_mips = texture->guest_layout.mips[level];
|
||||||
host_slice_layout_mip.Footprint.Width =
|
host_slice_layout_mip.Footprint.Width =
|
||||||
guest_layout_packed_mips.x_extent_blocks * block_width;
|
guest_layout_packed_mips.x_extent_blocks * block_width;
|
||||||
|
@ -2634,7 +2655,7 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
||||||
load_constants.guest_offset +=
|
load_constants.guest_offset +=
|
||||||
texture->guest_layout.mip_offsets_bytes[level];
|
texture->guest_layout.mip_offsets_bytes[level];
|
||||||
}
|
}
|
||||||
const texture_util::TextureGuestLevelLayout& level_guest_layout =
|
const texture_util::TextureGuestLayout::Level& level_guest_layout =
|
||||||
is_base ? texture->guest_layout.base
|
is_base ? texture->guest_layout.base
|
||||||
: texture->guest_layout.mips[level];
|
: texture->guest_layout.mips[level];
|
||||||
uint32_t level_guest_pitch = level_guest_layout.row_pitch_bytes;
|
uint32_t level_guest_pitch = level_guest_layout.row_pitch_bytes;
|
||||||
|
|
|
@ -960,10 +960,10 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
|
||||||
// Need a subregion size, not the full subresource size - thus not aligning
|
// Need a subregion size, not the full subresource size - thus not aligning
|
||||||
// to xenos::kTextureSubresourceAlignmentBytes.
|
// to xenos::kTextureSubresourceAlignmentBytes.
|
||||||
copy_dest_length =
|
copy_dest_length =
|
||||||
texture_util::GetGuestLevelLayout(
|
texture_util::GetGuestTextureLayout(
|
||||||
dest_dimension, copy_dest_pitch_aligned_div_32, uint32_t(x1 - x0),
|
dest_dimension, copy_dest_pitch_aligned_div_32, uint32_t(x1 - x0),
|
||||||
dest_height, dest_depth, true, dest_format, false, 0, false)
|
dest_height, dest_depth, true, dest_format, false, true, 0)
|
||||||
.level_data_extent_bytes;
|
.base.level_data_extent_bytes;
|
||||||
} else {
|
} else {
|
||||||
XELOGE("Tried to resolve to format {}, which is not a ColorFormat",
|
XELOGE("Tried to resolve to format {}, which is not a ColorFormat",
|
||||||
dest_format_info.name);
|
dest_format_info.name);
|
||||||
|
|
|
@ -204,35 +204,22 @@ bool GetPackedMipOffset(uint32_t width, uint32_t height, uint32_t depth,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
TextureGuestLevelLayout GetGuestLevelLayout(
|
TextureGuestLayout GetGuestTextureLayout(
|
||||||
xenos::DataDimension dimension, uint32_t base_pitch_texels_div_32,
|
xenos::DataDimension dimension, uint32_t base_pitch_texels_div_32,
|
||||||
uint32_t width_texels, uint32_t height_texels, uint32_t depth_or_array_size,
|
uint32_t width_texels, uint32_t height_texels, uint32_t depth_or_array_size,
|
||||||
bool is_tiled, xenos::TextureFormat format, bool is_mip, uint32_t level,
|
bool is_tiled, xenos::TextureFormat format, bool has_packed_levels,
|
||||||
bool is_packed_level) {
|
bool has_base, uint32_t max_level) {
|
||||||
// If with packed mips the mips 1... happen to be packed in what's stored as
|
TextureGuestLayout layout;
|
||||||
// mip 0, this mip tail appears to be stored like mips (with power of two size
|
|
||||||
// rounding) rather than like the base level (with the pitch from the fetch
|
|
||||||
// constant), so we distinguish between them for mip == 0.
|
|
||||||
// Base is by definition the level 0.
|
|
||||||
assert_false(!is_mip && level);
|
|
||||||
// Level 0 for mips is the special case for a packed mip tail of very small
|
|
||||||
// textures, where the tail is stored like it's at the level 0.
|
|
||||||
assert_false(is_mip && !level && !is_packed_level);
|
|
||||||
|
|
||||||
TextureGuestLevelLayout layout;
|
if (dimension == xenos::DataDimension::k1D) {
|
||||||
|
assert_false(is_tiled);
|
||||||
// For safety, for instance, with empty resolve regions (extents calculation
|
// GetPackedMipOffset may result in packing along Y for `width > height`
|
||||||
// may overflow otherwise due to the assumption of at least one row, for
|
// textures.
|
||||||
// example, but an empty texture is empty anyway).
|
assert_false(has_packed_levels);
|
||||||
if (!width_texels ||
|
height_texels = 1;
|
||||||
(dimension != xenos::DataDimension::k1D && !height_texels) ||
|
|
||||||
((dimension == xenos::DataDimension::k2DOrStacked ||
|
|
||||||
dimension == xenos::DataDimension::k3D) &&
|
|
||||||
!depth_or_array_size)) {
|
|
||||||
std::memset(&layout, 0, sizeof(layout));
|
|
||||||
return layout;
|
|
||||||
}
|
}
|
||||||
|
uint32_t depth =
|
||||||
|
dimension == xenos::DataDimension::k3D ? depth_or_array_size : 1;
|
||||||
switch (dimension) {
|
switch (dimension) {
|
||||||
case xenos::DataDimension::k2DOrStacked:
|
case xenos::DataDimension::k2DOrStacked:
|
||||||
layout.array_size = depth_or_array_size;
|
layout.array_size = depth_or_array_size;
|
||||||
|
@ -244,158 +231,17 @@ TextureGuestLevelLayout GetGuestLevelLayout(
|
||||||
layout.array_size = 1;
|
layout.array_size = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
const FormatInfo* format_info = FormatInfo::Get(format);
|
// For safety, for instance, with empty resolve regions (extents calculation
|
||||||
uint32_t bytes_per_block = format_info->bytes_per_block();
|
// may overflow otherwise due to the assumption of at least one row, for
|
||||||
|
// example, but an empty texture is empty anyway).
|
||||||
// Calculate the strides.
|
if (!width_texels || !height_texels || !depth || !layout.array_size) {
|
||||||
// Mips have row / depth slice strides calculated from a mip of a texture
|
std::memset(&layout, 0, sizeof(layout));
|
||||||
// whose base size is a power of two.
|
|
||||||
// The base mip has tightly packed depth slices, and takes the row pitch from
|
|
||||||
// the fetch constant.
|
|
||||||
// For stride calculation purposes, mip dimensions are always aligned to
|
|
||||||
// 32x32x4 blocks (or x1 for the missing dimensions), including for linear
|
|
||||||
// textures.
|
|
||||||
// Linear texture rows are 256-byte-aligned.
|
|
||||||
uint32_t row_pitch_texels_unaligned;
|
|
||||||
uint32_t z_slice_stride_texel_rows_unaligned;
|
|
||||||
if (is_mip) {
|
|
||||||
row_pitch_texels_unaligned =
|
|
||||||
std::max(xe::next_pow2(width_texels) >> level, uint32_t(1));
|
|
||||||
z_slice_stride_texel_rows_unaligned =
|
|
||||||
std::max(xe::next_pow2(height_texels) >> level, uint32_t(1));
|
|
||||||
} else {
|
|
||||||
row_pitch_texels_unaligned = base_pitch_texels_div_32 << 5;
|
|
||||||
z_slice_stride_texel_rows_unaligned = height_texels;
|
|
||||||
}
|
|
||||||
uint32_t row_pitch_blocks_tile_aligned = xe::align(
|
|
||||||
xe::align(row_pitch_texels_unaligned, format_info->block_width) /
|
|
||||||
format_info->block_width,
|
|
||||||
xenos::kTextureTileWidthHeight);
|
|
||||||
layout.row_pitch_bytes = row_pitch_blocks_tile_aligned * bytes_per_block;
|
|
||||||
// Assuming the provided pitch is already 256-byte-aligned for linear, but
|
|
||||||
// considering the guest-provided pitch more important (no information about
|
|
||||||
// how the GPU actually handles unaligned rows).
|
|
||||||
if (!is_tiled && is_mip) {
|
|
||||||
layout.row_pitch_bytes = xe::align(layout.row_pitch_bytes,
|
|
||||||
xenos::kTextureLinearRowAlignmentBytes);
|
|
||||||
}
|
|
||||||
layout.z_slice_stride_block_rows =
|
|
||||||
dimension != xenos::DataDimension::k1D
|
|
||||||
? xe::align(xe::align(z_slice_stride_texel_rows_unaligned,
|
|
||||||
format_info->block_height) /
|
|
||||||
format_info->block_height,
|
|
||||||
xenos::kTextureTileWidthHeight)
|
|
||||||
: 1;
|
|
||||||
layout.array_slice_stride_bytes =
|
|
||||||
layout.row_pitch_bytes * layout.z_slice_stride_block_rows;
|
|
||||||
uint32_t z_stride_bytes = layout.array_slice_stride_bytes;
|
|
||||||
if (dimension == xenos::DataDimension::k3D) {
|
|
||||||
layout.array_slice_stride_bytes *=
|
|
||||||
xe::align(depth_or_array_size, xenos::kTextureTiledDepthGranularity);
|
|
||||||
}
|
|
||||||
uint32_t array_slice_stride_bytes_non_4kb_aligned =
|
|
||||||
layout.array_slice_stride_bytes;
|
|
||||||
layout.array_slice_stride_bytes =
|
|
||||||
xe::align(array_slice_stride_bytes_non_4kb_aligned,
|
|
||||||
xenos::kTextureSubresourceAlignmentBytes);
|
|
||||||
|
|
||||||
// Estimate the memory amount actually referenced by the texture, which may be
|
|
||||||
// smaller (especially in the 2x2 linear k_8_8_8_8 case in Test Drive
|
|
||||||
// Unlimited, for which 4 KB are allocated, while the stride is 8 KB) or
|
|
||||||
// bigger than the stride. For tiled textures, this is the dimensions aligned
|
|
||||||
// to 32x32x4 blocks (or x1 for the missing dimensions).
|
|
||||||
// For linear, doing almost the same for the mip tail (which can be used for
|
|
||||||
// both the mips and, if the texture is very small, the base) because it
|
|
||||||
// stores multiple mips outside the first mip in it in the tile padding
|
|
||||||
// (though there's no need to align the size to the next power of two for this
|
|
||||||
// purpose for mips - packed mips are only used when min(width, height) <= 16,
|
|
||||||
// and packing is first done along the shorter axis - even if the longer axis
|
|
||||||
// is larger than 32, nothing will be packed beyond the extent of the longer
|
|
||||||
// axis). "Almost" because for linear textures, we're rounding the size to
|
|
||||||
// 32x32x4 texels, not blocks - first packed mips start from 16-texel, not
|
|
||||||
// 16-block, shortest dimension, and are placed in 32x- or x32-texel tiles,
|
|
||||||
// while 32 blocks for compressed textures are bigger in memory than 32
|
|
||||||
// texels.
|
|
||||||
layout.x_extent_blocks = xe::align(width_texels, format_info->block_width) /
|
|
||||||
format_info->block_width;
|
|
||||||
layout.y_extent_blocks =
|
|
||||||
dimension != xenos::DataDimension::k1D
|
|
||||||
? xe::align(height_texels, format_info->block_height) /
|
|
||||||
format_info->block_height
|
|
||||||
: 1;
|
|
||||||
layout.z_extent =
|
|
||||||
dimension == xenos::DataDimension::k3D ? depth_or_array_size : 1;
|
|
||||||
if (is_tiled) {
|
|
||||||
layout.x_extent_blocks =
|
|
||||||
xe::align(layout.x_extent_blocks, xenos::kTextureTileWidthHeight);
|
|
||||||
assert_true(dimension != xenos::DataDimension::k1D);
|
|
||||||
layout.y_extent_blocks =
|
|
||||||
xe::align(layout.y_extent_blocks, xenos::kTextureTileWidthHeight);
|
|
||||||
if (dimension == xenos::DataDimension::k3D) {
|
|
||||||
layout.z_extent =
|
|
||||||
xe::align(layout.z_extent, xenos::kTextureTiledDepthGranularity);
|
|
||||||
// 3D texture addressing is pretty complex, so it's hard to determine the
|
|
||||||
// memory extent of a subregion - just use pitch_tiles * height_tiles *
|
|
||||||
// depth_tiles * bytes_per_tile at least for now, until we find a case
|
|
||||||
// where it causes issues. width > pitch is a very weird edge case anyway,
|
|
||||||
// and is extremely unlikely.
|
|
||||||
assert_true(layout.x_extent_blocks <= row_pitch_blocks_tile_aligned);
|
|
||||||
layout.array_slice_data_extent_bytes =
|
|
||||||
array_slice_stride_bytes_non_4kb_aligned;
|
|
||||||
} else {
|
|
||||||
// 2D 32x32-block tiles are laid out linearly in the texture.
|
|
||||||
// Calculate the extent as ((all rows except for the last * pitch in
|
|
||||||
// tiles + last row length in tiles) * bytes per tile).
|
|
||||||
layout.array_slice_data_extent_bytes =
|
|
||||||
(layout.y_extent_blocks - xenos::kTextureTileWidthHeight) *
|
|
||||||
layout.row_pitch_bytes +
|
|
||||||
bytes_per_block * layout.x_extent_blocks *
|
|
||||||
xenos::kTextureTileWidthHeight;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (is_packed_level) {
|
|
||||||
layout.x_extent_blocks =
|
|
||||||
xe::align(layout.x_extent_blocks,
|
|
||||||
xenos::kTextureTileWidthHeight / format_info->block_width);
|
|
||||||
if (dimension != xenos::DataDimension::k1D) {
|
|
||||||
layout.y_extent_blocks =
|
|
||||||
xe::align(layout.y_extent_blocks, xenos::kTextureTileWidthHeight /
|
|
||||||
format_info->block_height);
|
|
||||||
if (dimension == xenos::DataDimension::k3D) {
|
|
||||||
layout.z_extent =
|
|
||||||
xe::align(layout.z_extent, xenos::kTextureTiledDepthGranularity);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
layout.array_slice_data_extent_bytes =
|
|
||||||
z_stride_bytes * (layout.z_extent - 1) +
|
|
||||||
layout.row_pitch_bytes * (layout.y_extent_blocks - 1) +
|
|
||||||
bytes_per_block * layout.x_extent_blocks;
|
|
||||||
}
|
|
||||||
layout.level_data_extent_bytes =
|
|
||||||
layout.array_slice_stride_bytes * (layout.array_size - 1) +
|
|
||||||
layout.array_slice_data_extent_bytes;
|
|
||||||
|
|
||||||
return layout;
|
return layout;
|
||||||
}
|
}
|
||||||
|
|
||||||
TextureGuestLayout GetGuestTextureLayout(
|
|
||||||
xenos::DataDimension dimension, uint32_t base_pitch_texels_div_32,
|
|
||||||
uint32_t width_texels, uint32_t height_texels, uint32_t depth_or_array_size,
|
|
||||||
bool is_tiled, xenos::TextureFormat format, bool has_packed_levels,
|
|
||||||
bool has_base, uint32_t max_level) {
|
|
||||||
TextureGuestLayout layout;
|
|
||||||
|
|
||||||
if (dimension == xenos::DataDimension::k1D) {
|
|
||||||
height_texels = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// For safety, clamp the maximum level.
|
// For safety, clamp the maximum level.
|
||||||
uint32_t longest_axis = std::max(width_texels, height_texels);
|
uint32_t max_level_for_dimensions =
|
||||||
if (dimension == xenos::DataDimension::k3D) {
|
xe::log2_floor(std::max(std::max(width_texels, height_texels), depth));
|
||||||
longest_axis = std::max(longest_axis, depth_or_array_size);
|
|
||||||
}
|
|
||||||
uint32_t max_level_for_dimensions = xe::log2_floor(longest_axis);
|
|
||||||
assert_true(max_level <= max_level_for_dimensions);
|
assert_true(max_level <= max_level_for_dimensions);
|
||||||
max_level = std::min(max_level, max_level_for_dimensions);
|
max_level = std::min(max_level, max_level_for_dimensions);
|
||||||
layout.max_level = max_level;
|
layout.max_level = max_level;
|
||||||
|
@ -404,33 +250,210 @@ TextureGuestLayout GetGuestTextureLayout(
|
||||||
? GetPackedMipLevel(width_texels, height_texels)
|
? GetPackedMipLevel(width_texels, height_texels)
|
||||||
: UINT32_MAX;
|
: UINT32_MAX;
|
||||||
|
|
||||||
if (has_base) {
|
// Clear unused level layouts to zero strides/sizes.
|
||||||
layout.base =
|
if (!has_base) {
|
||||||
GetGuestLevelLayout(dimension, base_pitch_texels_div_32, width_texels,
|
|
||||||
height_texels, depth_or_array_size, is_tiled,
|
|
||||||
format, false, 0, layout.packed_level == 0);
|
|
||||||
} else {
|
|
||||||
std::memset(&layout.base, 0, sizeof(layout.base));
|
std::memset(&layout.base, 0, sizeof(layout.base));
|
||||||
}
|
}
|
||||||
|
if (layout.packed_level != 0) {
|
||||||
std::memset(layout.mips, 0, sizeof(layout.mips));
|
std::memset(&layout.mips[0], 0, sizeof(layout.mips[0]));
|
||||||
std::memset(layout.mip_offsets_bytes, 0, sizeof(layout.mip_offsets_bytes));
|
}
|
||||||
|
uint32_t max_stored_level = std::min(max_level, layout.packed_level);
|
||||||
|
{
|
||||||
|
uint32_t mips_end = max_stored_level + 1;
|
||||||
|
assert_true(mips_end <= xe::countof(layout.mips));
|
||||||
|
uint32_t mips_unused_count = uint32_t(xe::countof(layout.mips)) - mips_end;
|
||||||
|
if (mips_unused_count) {
|
||||||
|
std::memset(&layout.mips[mips_end], 0,
|
||||||
|
sizeof(layout.mips[0]) * mips_unused_count);
|
||||||
|
std::memset(&layout.mip_offsets_bytes[mips_end], 0,
|
||||||
|
sizeof(layout.mip_offsets_bytes[0]) * mips_unused_count);
|
||||||
|
}
|
||||||
|
}
|
||||||
layout.mips_total_extent_bytes = 0;
|
layout.mips_total_extent_bytes = 0;
|
||||||
if (max_level) {
|
|
||||||
|
const FormatInfo* format_info = FormatInfo::Get(format);
|
||||||
|
uint32_t bytes_per_block = format_info->bytes_per_block();
|
||||||
|
|
||||||
|
// The loop counter can mean two things depending on whether the packed mip
|
||||||
|
// tail is stored as mip 0, because in this case, it would be ambiguous since
|
||||||
|
// both the base and the mips would be on "level 0", but stored separately and
|
||||||
|
// possibly with a different layout.
|
||||||
|
uint32_t loop_level_last;
|
||||||
|
if (layout.packed_level == 0) {
|
||||||
|
// Packed mip tail is the level 0 - may need to load mip tails for the base,
|
||||||
|
// the mips, or both.
|
||||||
|
// Loop iteration 0 - base packed mip tail.
|
||||||
|
// Loop iteration 1 - mips packed mip tail.
|
||||||
|
loop_level_last = uint32_t(max_level != 0);
|
||||||
|
} else {
|
||||||
|
// Packed mip tail is not the level 0.
|
||||||
|
// Loop iteration is the actual level being loaded.
|
||||||
|
loop_level_last = max_stored_level;
|
||||||
|
}
|
||||||
uint32_t mip_offset_bytes = 0;
|
uint32_t mip_offset_bytes = 0;
|
||||||
uint32_t max_stored_mip = std::min(max_level, layout.packed_level);
|
for (uint32_t loop_level = has_base ? 0 : 1; loop_level <= loop_level_last;
|
||||||
for (uint32_t mip = std::min(uint32_t(1), layout.packed_level);
|
++loop_level) {
|
||||||
mip <= max_stored_mip; ++mip) {
|
bool is_base = loop_level == 0;
|
||||||
layout.mip_offsets_bytes[mip] = mip_offset_bytes;
|
uint32_t level = (layout.packed_level == 0) ? 0 : loop_level;
|
||||||
TextureGuestLevelLayout& mip_layout = layout.mips[mip];
|
TextureGuestLayout::Level& level_layout =
|
||||||
mip_layout =
|
is_base ? layout.base : layout.mips[level];
|
||||||
GetGuestLevelLayout(dimension, base_pitch_texels_div_32, width_texels,
|
|
||||||
height_texels, depth_or_array_size, is_tiled,
|
// Calculate the strides.
|
||||||
format, true, mip, mip == layout.packed_level);
|
// Mips have row / depth slice strides calculated from a mip of a texture
|
||||||
|
// whose base size is a power of two.
|
||||||
|
// The base mip has tightly packed depth slices, and takes the row pitch
|
||||||
|
// from the fetch constant.
|
||||||
|
// For stride calculation purposes, mip dimensions are always aligned to
|
||||||
|
// 32x32x4 blocks (or x1 for the missing dimensions), including for linear
|
||||||
|
// textures.
|
||||||
|
// Linear texture rows are 256-byte-aligned.
|
||||||
|
uint32_t row_pitch_texels_unaligned;
|
||||||
|
uint32_t z_slice_stride_texel_rows_unaligned;
|
||||||
|
if (is_base) {
|
||||||
|
row_pitch_texels_unaligned = base_pitch_texels_div_32 << 5;
|
||||||
|
z_slice_stride_texel_rows_unaligned = height_texels;
|
||||||
|
} else {
|
||||||
|
row_pitch_texels_unaligned =
|
||||||
|
std::max(xe::next_pow2(width_texels) >> level, uint32_t(1));
|
||||||
|
z_slice_stride_texel_rows_unaligned =
|
||||||
|
std::max(xe::next_pow2(height_texels) >> level, uint32_t(1));
|
||||||
|
}
|
||||||
|
uint32_t row_pitch_blocks_tile_aligned = xe::align(
|
||||||
|
xe::align(row_pitch_texels_unaligned, format_info->block_width) /
|
||||||
|
format_info->block_width,
|
||||||
|
xenos::kTextureTileWidthHeight);
|
||||||
|
level_layout.row_pitch_bytes =
|
||||||
|
row_pitch_blocks_tile_aligned * bytes_per_block;
|
||||||
|
// Assuming the provided pitch is already 256-byte-aligned for linear, but
|
||||||
|
// considering the guest-provided pitch more important (no information about
|
||||||
|
// how the GPU actually handles unaligned rows).
|
||||||
|
if (!is_tiled && !is_base) {
|
||||||
|
level_layout.row_pitch_bytes = xe::align(
|
||||||
|
level_layout.row_pitch_bytes, xenos::kTextureLinearRowAlignmentBytes);
|
||||||
|
}
|
||||||
|
level_layout.z_slice_stride_block_rows =
|
||||||
|
dimension != xenos::DataDimension::k1D
|
||||||
|
? xe::align(xe::align(z_slice_stride_texel_rows_unaligned,
|
||||||
|
format_info->block_height) /
|
||||||
|
format_info->block_height,
|
||||||
|
xenos::kTextureTileWidthHeight)
|
||||||
|
: 1;
|
||||||
|
level_layout.array_slice_stride_bytes =
|
||||||
|
level_layout.row_pitch_bytes * level_layout.z_slice_stride_block_rows;
|
||||||
|
uint32_t z_stride_bytes = level_layout.array_slice_stride_bytes;
|
||||||
|
if (dimension == xenos::DataDimension::k3D) {
|
||||||
|
level_layout.array_slice_stride_bytes *=
|
||||||
|
xe::align(depth_or_array_size, xenos::kTextureTiledDepthGranularity);
|
||||||
|
}
|
||||||
|
uint32_t array_slice_stride_bytes_non_4kb_aligned =
|
||||||
|
level_layout.array_slice_stride_bytes;
|
||||||
|
level_layout.array_slice_stride_bytes =
|
||||||
|
xe::align(array_slice_stride_bytes_non_4kb_aligned,
|
||||||
|
xenos::kTextureSubresourceAlignmentBytes);
|
||||||
|
|
||||||
|
// Estimate the memory amount actually referenced by the texture, which may
|
||||||
|
// be smaller (especially in the 1280x720 linear k_8_8_8_8 case in Ridge
|
||||||
|
// Racer Unbounded, for which memory exactly for 1280x720 is allocated, and
|
||||||
|
// aligning the height to 32 would cause access of an unallocated page) or
|
||||||
|
// bigger than the stride. For tiled textures, this is the dimensions
|
||||||
|
// aligned to 32x32x4 blocks (or x1 for the missing dimensions).
|
||||||
|
uint32_t level_width_blocks =
|
||||||
|
xe::align(std::max(width_texels >> level, uint32_t(1)),
|
||||||
|
format_info->block_width) /
|
||||||
|
format_info->block_width;
|
||||||
|
uint32_t level_height_blocks =
|
||||||
|
xe::align(std::max(height_texels >> level, uint32_t(1)),
|
||||||
|
format_info->block_height) /
|
||||||
|
format_info->block_height;
|
||||||
|
uint32_t level_depth = std::max(depth >> level, uint32_t(1));
|
||||||
|
if (is_tiled) {
|
||||||
|
level_layout.x_extent_blocks =
|
||||||
|
xe::align(level_width_blocks, xenos::kTextureTileWidthHeight);
|
||||||
|
level_layout.y_extent_blocks =
|
||||||
|
xe::align(level_height_blocks, xenos::kTextureTileWidthHeight);
|
||||||
|
if (dimension == xenos::DataDimension::k3D) {
|
||||||
|
level_layout.z_extent =
|
||||||
|
xe::align(level_depth, xenos::kTextureTiledDepthGranularity);
|
||||||
|
// 3D texture addressing is pretty complex, so it's hard to determine
|
||||||
|
// the memory extent of a subregion - just use `pitch_tiles *
|
||||||
|
// height_tiles * depth_tiles * bytes_per_tile` at least for now, until
|
||||||
|
// we find a case where it causes issues. `width > pitch` is a very
|
||||||
|
// weird edge case anyway, and is extremely unlikely.
|
||||||
|
assert_true(level_layout.x_extent_blocks <=
|
||||||
|
row_pitch_blocks_tile_aligned);
|
||||||
|
level_layout.array_slice_data_extent_bytes =
|
||||||
|
array_slice_stride_bytes_non_4kb_aligned;
|
||||||
|
} else {
|
||||||
|
level_layout.z_extent = 1;
|
||||||
|
// 2D 32x32-block tiles are laid out linearly in the texture.
|
||||||
|
// Calculate the extent as ((all rows except for the last * pitch in
|
||||||
|
// tiles + last row length in tiles) * bytes per tile).
|
||||||
|
level_layout.array_slice_data_extent_bytes =
|
||||||
|
(level_layout.y_extent_blocks - xenos::kTextureTileWidthHeight) *
|
||||||
|
level_layout.row_pitch_bytes +
|
||||||
|
bytes_per_block * level_layout.x_extent_blocks *
|
||||||
|
xenos::kTextureTileWidthHeight;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (level == layout.packed_level) {
|
||||||
|
// Calculate the portion of the mip tail actually used by the needed
|
||||||
|
// mips. The actually used region may be significantly smaller than the
|
||||||
|
// full 32x32-texel-aligned tail. A 2x2 texture (for example, in Test
|
||||||
|
// Drive Unlimited, there's a 2x2 k_8_8_8_8 linear texture with packed
|
||||||
|
// mips), for instance, would have its 2x2 base at (16, 0) and its 1x1
|
||||||
|
// mip at (8, 0) - and we need 2 or 1 rows in these cases, not 32.
|
||||||
|
level_layout.x_extent_blocks = 0;
|
||||||
|
level_layout.y_extent_blocks = 0;
|
||||||
|
level_layout.z_extent = 0;
|
||||||
|
uint32_t packed_sublevel_last = is_base ? 0 : max_level;
|
||||||
|
for (uint32_t packed_sublevel = layout.packed_level;
|
||||||
|
packed_sublevel <= packed_sublevel_last; ++packed_sublevel) {
|
||||||
|
uint32_t packed_sublevel_x_blocks;
|
||||||
|
uint32_t packed_sublevel_y_blocks;
|
||||||
|
uint32_t packed_sublevel_z;
|
||||||
|
GetPackedMipOffset(width_texels, height_texels, depth, format,
|
||||||
|
packed_sublevel, packed_sublevel_x_blocks,
|
||||||
|
packed_sublevel_y_blocks, packed_sublevel_z);
|
||||||
|
level_layout.x_extent_blocks = std::max(
|
||||||
|
level_layout.x_extent_blocks,
|
||||||
|
packed_sublevel_x_blocks +
|
||||||
|
xe::align(
|
||||||
|
std::max(width_texels >> packed_sublevel, uint32_t(1)),
|
||||||
|
format_info->block_width) /
|
||||||
|
format_info->block_width);
|
||||||
|
level_layout.y_extent_blocks = std::max(
|
||||||
|
level_layout.y_extent_blocks,
|
||||||
|
packed_sublevel_y_blocks +
|
||||||
|
xe::align(
|
||||||
|
std::max(height_texels >> packed_sublevel, uint32_t(1)),
|
||||||
|
format_info->block_height) /
|
||||||
|
format_info->block_height);
|
||||||
|
level_layout.z_extent =
|
||||||
|
std::max(level_layout.z_extent,
|
||||||
|
packed_sublevel_z +
|
||||||
|
std::max(depth >> packed_sublevel, uint32_t(1)));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
level_layout.x_extent_blocks = level_width_blocks;
|
||||||
|
level_layout.y_extent_blocks = level_height_blocks;
|
||||||
|
level_layout.z_extent = level_depth;
|
||||||
|
}
|
||||||
|
level_layout.array_slice_data_extent_bytes =
|
||||||
|
z_stride_bytes * (level_layout.z_extent - 1) +
|
||||||
|
level_layout.row_pitch_bytes * (level_layout.y_extent_blocks - 1) +
|
||||||
|
bytes_per_block * level_layout.x_extent_blocks;
|
||||||
|
}
|
||||||
|
level_layout.level_data_extent_bytes =
|
||||||
|
level_layout.array_slice_stride_bytes * (layout.array_size - 1) +
|
||||||
|
level_layout.array_slice_data_extent_bytes;
|
||||||
|
|
||||||
|
if (!is_base) {
|
||||||
|
layout.mip_offsets_bytes[level] = mip_offset_bytes;
|
||||||
layout.mips_total_extent_bytes =
|
layout.mips_total_extent_bytes =
|
||||||
std::max(layout.mips_total_extent_bytes,
|
std::max(layout.mips_total_extent_bytes,
|
||||||
mip_offset_bytes + mip_layout.level_data_extent_bytes);
|
mip_offset_bytes + level_layout.level_data_extent_bytes);
|
||||||
mip_offset_bytes += mip_layout.next_level_distance_bytes();
|
mip_offset_bytes +=
|
||||||
|
level_layout.array_slice_stride_bytes * layout.array_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -97,18 +97,20 @@ bool GetPackedMipOffset(uint32_t width, uint32_t height, uint32_t depth,
|
||||||
// disassembly, which only checks the flag whether the data is packed passed to
|
// disassembly, which only checks the flag whether the data is packed passed to
|
||||||
// it, not the level, to see if it needs to calculate the offset in the mip
|
// it, not the level, to see if it needs to calculate the offset in the mip
|
||||||
// tail, and the offset calculation function doesn't have level == 0 checks in
|
// tail, and the offset calculation function doesn't have level == 0 checks in
|
||||||
// it, only early-out if level < packed tail level (which can be 0).
|
// it, only early-out if level < packed tail level (which can be 0). There are
|
||||||
|
// examples of textures with packed base, for example, in the intro level of
|
||||||
|
// Prey (8x8 linear DXT1 - pairs of orange lights in the bottom of gambling
|
||||||
|
// machines).
|
||||||
//
|
//
|
||||||
// Linear texture rows are aligned to 256 bytes, for both the base and the mips
|
// Linear texture rows are aligned to 256 bytes, for both the base and the mips
|
||||||
// (for the base, Direct3D 9 writes an already 256-byte-aligned pitch to the
|
// (for the base, Direct3D 9 writes an already 256-byte-aligned pitch to the
|
||||||
// fetch constant).
|
// fetch constant).
|
||||||
//
|
//
|
||||||
// However, all the 32x32x4 padding, being just padding, is not necessarily
|
// However, all the 32x32x4 padding, being just padding, is not necessarily
|
||||||
// being actually accessed, especially for linear textures. Test Drive Unlimited
|
// being actually accessed, especially for linear textures. Ridge Racer
|
||||||
// has a 2x2 k_8_8_8_8 linear texture, and allocates 4 KB for it (with accessing
|
// Unbounded has a 1280x720 k_8_8_8_8 linear texture, and allocates memory for
|
||||||
// the page beyond it triggering an access violation), while a 32x32 k_8_8_8_8
|
// exactly 1280x720, so aligning the height to 32 to 1280x736 results in access
|
||||||
// linear texture, with rows aligned to 256 bytes (so stored like 64x32) would
|
// violations. So, while for stride calculations all the padding must be
|
||||||
// take 8 KB. So, while for stride calculations all the padding must be
|
|
||||||
// respected, for actual memory loads it's better to avoid trying to access it
|
// respected, for actual memory loads it's better to avoid trying to access it
|
||||||
// when possible:
|
// when possible:
|
||||||
// - If the pitch is bigger than the width, it's better to calculate the last
|
// - If the pitch is bigger than the width, it's better to calculate the last
|
||||||
|
@ -116,86 +118,69 @@ bool GetPackedMipOffset(uint32_t width, uint32_t height, uint32_t depth,
|
||||||
// in the other direction though - pitch < width is a weird situation, but
|
// in the other direction though - pitch < width is a weird situation, but
|
||||||
// probably legal, and may lead to reading data from beyond the calculated
|
// probably legal, and may lead to reading data from beyond the calculated
|
||||||
// subresource stride).
|
// subresource stride).
|
||||||
// - For linear textures (like that 2x2 example from Test Drive Unlimited), it's
|
// - For linear textures (like that 1280x720 example from Ridge Racer
|
||||||
// easy to calculate the exact memory extent that may be accessed knowing the
|
// Unbounded), it's easy to calculate the exact memory extent that may be
|
||||||
// dimensions (unlike for tiled textures with complex addressing within
|
// accessed knowing the dimensions (unlike for tiled textures with complex
|
||||||
// 32x32x4-block tiles), so there's no need to align them to 32x32x4 for
|
// addressing within 32x32x4-block tiles), so there's no need to align them to
|
||||||
// memory extent calculation - that's what appears to cause that crash in Test
|
// 32x32x4 for memory extent calculation.
|
||||||
// Drive Unlimited.
|
// - For the linear packed mip tail, the extent can be calculated as max of
|
||||||
// - The exception here is the packed mip tail for linear textures, as smaller
|
// (block offsets + block extents) of all levels stored in it.
|
||||||
// mips are stored in the 32x32x4-texel padding. However, the packed mip
|
|
||||||
// tail needs to be aligned only to 32x32 texels, not to 32x32 blocks - so
|
|
||||||
// for compressed textures, the padding may be smaller, only to 8x8 blocks.
|
|
||||||
//
|
//
|
||||||
// 1D textures are always linear.
|
// 1D textures are always linear and likely can't have packed mips (for `width >
|
||||||
|
// height` textures, mip offset calculation may result in packing along Y).
|
||||||
//
|
//
|
||||||
// Array slices are stored within levels (this is different than how Direct3D
|
// Array slices are stored within levels (this is different than how Direct3D
|
||||||
// 10+ builds subresource indices, for instance). Each array slice or level is
|
// 10+ builds subresource indices, for instance). Each array slice or level is
|
||||||
// aligned to 4 KB (but this doesn't apply to 3D texture slices within one
|
// aligned to 4 KB (but this doesn't apply to 3D texture slices within one
|
||||||
// level).
|
// level).
|
||||||
|
|
||||||
struct TextureGuestLevelLayout {
|
struct TextureGuestLayout {
|
||||||
// Number of array slices within the mip.
|
struct Level {
|
||||||
uint32_t array_size;
|
|
||||||
|
|
||||||
// Distance between each row of blocks in bytes, including all the needed
|
// Distance between each row of blocks in bytes, including all the needed
|
||||||
// power of two (for mips) and 256-byte (for linear textures) alignment.
|
// power of two (for mips) and 256-byte (for linear textures) alignment.
|
||||||
uint32_t row_pitch_bytes;
|
uint32_t row_pitch_bytes;
|
||||||
// Distance between Z slices in block rows, aligned to power of two for mips,
|
// Distance between Z slices in block rows, aligned to power of two for
|
||||||
// and to tile height.
|
// mips, and to tile height.
|
||||||
uint32_t z_slice_stride_block_rows;
|
uint32_t z_slice_stride_block_rows;
|
||||||
// Distance between each array slice within the level in bytes, aligned to
|
// Distance between each array slice within the level in bytes, aligned to
|
||||||
// kTextureSubresourceAlignmentBytes.
|
// kTextureSubresourceAlignmentBytes. The distance to the next level is this
|
||||||
|
// multiplied by the array slice count.
|
||||||
uint32_t array_slice_stride_bytes;
|
uint32_t array_slice_stride_bytes;
|
||||||
// Distance from the beginning of the level to the next stored one.
|
|
||||||
uint32_t next_level_distance_bytes() const {
|
|
||||||
return array_slice_stride_bytes * array_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Estimated amount of memory this level occupies, and variables involved in
|
// Estimated amount of memory this level occupies, and variables involved in
|
||||||
// its calculation. Not aligned to kTextureSubresourceAlignmentBytes. For
|
// its calculation. Not aligned to kTextureSubresourceAlignmentBytes. For
|
||||||
// tiled textures, this will be rounded to 32x32x4 blocks (or 32x32x1
|
// tiled textures, this will be rounded to 32x32x4 blocks (or 32x32x1
|
||||||
// depending on the dimension), and for the linear packed mip tail, this will
|
// depending on the dimension), but for the linear subresources, this may be
|
||||||
// be rounded to the same amount of texels, but for the linear subresources
|
// significantly (including less 4 KB pages) smaller than the aligned size
|
||||||
// that are not the packed mip tail, this may be significantly (including less
|
// (like for Ridge Racer Unbounded where aligning the height of a 1280x720
|
||||||
// 4 KB pages) smaller than the aligned size (like for Test Drive Unlimited
|
// linear texture results in access violations). For the linear mip tail,
|
||||||
// allocating 4 KB for a 2x2 linear k_8_8_8_8 texture that would be stored
|
// this includes all the mip levels stored in it. If the width is bigger
|
||||||
// like 64x32 and take 8 KB). If the width is bigger than the pitch, this will
|
// than the pitch, this will also be taken into account for the last row so
|
||||||
// also be taken into account for the last row so all memory actually used by
|
// all memory actually used by the texture will be loaded, and may be bigger
|
||||||
// the texture will be loaded, and may be bigger than the distance between
|
// than the distance between array slices or levels. The purpose of this
|
||||||
// array slices or levels. The purpose of this parameter is to make the memory
|
// parameter is to make the memory amount that needs to be resident as close
|
||||||
// amount that needs to be resident as close to the real amount as possible,
|
// to the real amount as possible, to make sure all the needed data will be
|
||||||
// to make sure all the needed data will be read, but also, if possible,
|
// read, but also, if possible, unneeded memory pages won't be accessed
|
||||||
// unneeded memory pages won't be accessed (since that may trigger an access
|
// (since that may trigger an access violation on the CPU).
|
||||||
// violation on the CPU).
|
|
||||||
uint32_t x_extent_blocks;
|
uint32_t x_extent_blocks;
|
||||||
uint32_t y_extent_blocks;
|
uint32_t y_extent_blocks;
|
||||||
uint32_t z_extent;
|
uint32_t z_extent;
|
||||||
uint32_t array_slice_data_extent_bytes;
|
uint32_t array_slice_data_extent_bytes;
|
||||||
|
// Including all array slices.
|
||||||
uint32_t level_data_extent_bytes;
|
uint32_t level_data_extent_bytes;
|
||||||
};
|
};
|
||||||
|
|
||||||
// is_base == true - level must be 0 (for the base_address part).
|
Level base;
|
||||||
// is_base == false - level may be 0 if is_packed_level is true (for the packed
|
|
||||||
// tail of mip_address part if the texture is very small so the tail is stored
|
|
||||||
// like mip 0).
|
|
||||||
TextureGuestLevelLayout GetGuestLevelLayout(
|
|
||||||
xenos::DataDimension dimension, uint32_t base_pitch_texels_div_32,
|
|
||||||
uint32_t width_texels, uint32_t height_texels, uint32_t depth_or_array_size,
|
|
||||||
bool is_tiled, xenos::TextureFormat format, bool is_mip, uint32_t level,
|
|
||||||
bool is_packed_level);
|
|
||||||
|
|
||||||
struct TextureGuestLayout {
|
|
||||||
TextureGuestLevelLayout base;
|
|
||||||
// If mip_max_level specified at calculation time is at least 1, the stored
|
// If mip_max_level specified at calculation time is at least 1, the stored
|
||||||
// mips are min(1, packed_mip_level) through min(mip_max_level,
|
// mips are min(1, packed_mip_level) through min(mip_max_level,
|
||||||
// packed_mip_level).
|
// packed_mip_level).
|
||||||
TextureGuestLevelLayout mips[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1];
|
Level mips[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1];
|
||||||
uint32_t mip_offsets_bytes[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1];
|
uint32_t mip_offsets_bytes[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1];
|
||||||
uint32_t mips_total_extent_bytes;
|
uint32_t mips_total_extent_bytes;
|
||||||
uint32_t max_level;
|
uint32_t max_level;
|
||||||
// UINT32_MAX if there's no packed mip tail.
|
// UINT32_MAX if there's no packed mip tail.
|
||||||
uint32_t packed_level;
|
uint32_t packed_level;
|
||||||
|
uint32_t array_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
TextureGuestLayout GetGuestTextureLayout(
|
TextureGuestLayout GetGuestTextureLayout(
|
||||||
|
|
Loading…
Reference in New Issue