diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 67c9fd2f7..898ee7f4d 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -2093,14 +2093,35 @@ void TextureCache::BindingInfoFromFetchConstant( // No texture data at all. return; } - // TODO(Triang3l): Support long 1D textures. - if (fetch.dimension == xenos::DataDimension::k1D && - width > xenos::kTexture2DCubeMaxWidthHeight) { - XELOGE( - "1D texture is too wide ({}) - ignoring! " - "Report the game to Xenia developers", - width); - return; + if (fetch.dimension == xenos::DataDimension::k1D) { + bool is_invalid_1d = false; + // TODO(Triang3l): Support long 1D textures. + if (width > xenos::kTexture2DCubeMaxWidthHeight) { + XELOGE( + "1D texture is too wide ({}) - ignoring! Report the game to Xenia " + "developers", + width); + is_invalid_1d = true; + } + assert_false(fetch.tiled); + if (fetch.tiled) { + XELOGE( + "1D texture has tiling enabled in the fetch constant, but this " + "appears to be completely wrong - ignoring! Report the game to Xenia " + "developers"); + is_invalid_1d = true; + } + assert_false(fetch.packed_mips); + if (fetch.packed_mips) { + XELOGE( + "1D texture has packed mips enabled in the fetch constant, but this " + "appears to be completely wrong - ignoring! Report the game to Xenia " + "developers"); + is_invalid_1d = true; + } + if (is_invalid_1d) { + return; + } } xenos::TextureFormat format = GetBaseFormat(fetch.format); @@ -2411,7 +2432,7 @@ bool TextureCache::LoadTextureData(Texture* texture) { if (!level_packed) { // Loading the packed tail for the base - load the whole tail to copy // regions out of it. - const texture_util::TextureGuestLevelLayout& guest_layout_base = + const texture_util::TextureGuestLayout::Level& guest_layout_base = texture->guest_layout.base; host_slice_layout_base.Footprint.Width = guest_layout_base.x_extent_blocks * block_width; @@ -2452,7 +2473,7 @@ bool TextureCache::LoadTextureData(Texture* texture) { if (level == level_packed) { // Loading the packed tail for the mips - load the whole tail to copy // regions out of it. - const texture_util::TextureGuestLevelLayout& + const texture_util::TextureGuestLayout::Level& guest_layout_packed_mips = texture->guest_layout.mips[level]; host_slice_layout_mip.Footprint.Width = guest_layout_packed_mips.x_extent_blocks * block_width; @@ -2634,7 +2655,7 @@ bool TextureCache::LoadTextureData(Texture* texture) { load_constants.guest_offset += texture->guest_layout.mip_offsets_bytes[level]; } - const texture_util::TextureGuestLevelLayout& level_guest_layout = + const texture_util::TextureGuestLayout::Level& level_guest_layout = is_base ? texture->guest_layout.base : texture->guest_layout.mips[level]; uint32_t level_guest_pitch = level_guest_layout.row_pitch_bytes; diff --git a/src/xenia/gpu/draw_util.cc b/src/xenia/gpu/draw_util.cc index be6d0826b..8612038a0 100644 --- a/src/xenia/gpu/draw_util.cc +++ b/src/xenia/gpu/draw_util.cc @@ -960,10 +960,10 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory, // Need a subregion size, not the full subresource size - thus not aligning // to xenos::kTextureSubresourceAlignmentBytes. copy_dest_length = - texture_util::GetGuestLevelLayout( + texture_util::GetGuestTextureLayout( dest_dimension, copy_dest_pitch_aligned_div_32, uint32_t(x1 - x0), - dest_height, dest_depth, true, dest_format, false, 0, false) - .level_data_extent_bytes; + dest_height, dest_depth, true, dest_format, false, true, 0) + .base.level_data_extent_bytes; } else { XELOGE("Tried to resolve to format {}, which is not a ColorFormat", dest_format_info.name); diff --git a/src/xenia/gpu/texture_util.cc b/src/xenia/gpu/texture_util.cc index ffe74b8e2..bd7a78c6c 100644 --- a/src/xenia/gpu/texture_util.cc +++ b/src/xenia/gpu/texture_util.cc @@ -204,35 +204,22 @@ bool GetPackedMipOffset(uint32_t width, uint32_t height, uint32_t depth, return true; } -TextureGuestLevelLayout GetGuestLevelLayout( +TextureGuestLayout GetGuestTextureLayout( xenos::DataDimension dimension, uint32_t base_pitch_texels_div_32, uint32_t width_texels, uint32_t height_texels, uint32_t depth_or_array_size, - bool is_tiled, xenos::TextureFormat format, bool is_mip, uint32_t level, - bool is_packed_level) { - // If with packed mips the mips 1... happen to be packed in what's stored as - // mip 0, this mip tail appears to be stored like mips (with power of two size - // rounding) rather than like the base level (with the pitch from the fetch - // constant), so we distinguish between them for mip == 0. - // Base is by definition the level 0. - assert_false(!is_mip && level); - // Level 0 for mips is the special case for a packed mip tail of very small - // textures, where the tail is stored like it's at the level 0. - assert_false(is_mip && !level && !is_packed_level); + bool is_tiled, xenos::TextureFormat format, bool has_packed_levels, + bool has_base, uint32_t max_level) { + TextureGuestLayout layout; - TextureGuestLevelLayout layout; - - // For safety, for instance, with empty resolve regions (extents calculation - // may overflow otherwise due to the assumption of at least one row, for - // example, but an empty texture is empty anyway). - if (!width_texels || - (dimension != xenos::DataDimension::k1D && !height_texels) || - ((dimension == xenos::DataDimension::k2DOrStacked || - dimension == xenos::DataDimension::k3D) && - !depth_or_array_size)) { - std::memset(&layout, 0, sizeof(layout)); - return layout; + if (dimension == xenos::DataDimension::k1D) { + assert_false(is_tiled); + // GetPackedMipOffset may result in packing along Y for `width > height` + // textures. + assert_false(has_packed_levels); + height_texels = 1; } - + uint32_t depth = + dimension == xenos::DataDimension::k3D ? depth_or_array_size : 1; switch (dimension) { case xenos::DataDimension::k2DOrStacked: layout.array_size = depth_or_array_size; @@ -244,158 +231,17 @@ TextureGuestLevelLayout GetGuestLevelLayout( layout.array_size = 1; } - const FormatInfo* format_info = FormatInfo::Get(format); - uint32_t bytes_per_block = format_info->bytes_per_block(); - - // Calculate the strides. - // Mips have row / depth slice strides calculated from a mip of a texture - // whose base size is a power of two. - // The base mip has tightly packed depth slices, and takes the row pitch from - // the fetch constant. - // For stride calculation purposes, mip dimensions are always aligned to - // 32x32x4 blocks (or x1 for the missing dimensions), including for linear - // textures. - // Linear texture rows are 256-byte-aligned. - uint32_t row_pitch_texels_unaligned; - uint32_t z_slice_stride_texel_rows_unaligned; - if (is_mip) { - row_pitch_texels_unaligned = - std::max(xe::next_pow2(width_texels) >> level, uint32_t(1)); - z_slice_stride_texel_rows_unaligned = - std::max(xe::next_pow2(height_texels) >> level, uint32_t(1)); - } else { - row_pitch_texels_unaligned = base_pitch_texels_div_32 << 5; - z_slice_stride_texel_rows_unaligned = height_texels; - } - uint32_t row_pitch_blocks_tile_aligned = xe::align( - xe::align(row_pitch_texels_unaligned, format_info->block_width) / - format_info->block_width, - xenos::kTextureTileWidthHeight); - layout.row_pitch_bytes = row_pitch_blocks_tile_aligned * bytes_per_block; - // Assuming the provided pitch is already 256-byte-aligned for linear, but - // considering the guest-provided pitch more important (no information about - // how the GPU actually handles unaligned rows). - if (!is_tiled && is_mip) { - layout.row_pitch_bytes = xe::align(layout.row_pitch_bytes, - xenos::kTextureLinearRowAlignmentBytes); - } - layout.z_slice_stride_block_rows = - dimension != xenos::DataDimension::k1D - ? xe::align(xe::align(z_slice_stride_texel_rows_unaligned, - format_info->block_height) / - format_info->block_height, - xenos::kTextureTileWidthHeight) - : 1; - layout.array_slice_stride_bytes = - layout.row_pitch_bytes * layout.z_slice_stride_block_rows; - uint32_t z_stride_bytes = layout.array_slice_stride_bytes; - if (dimension == xenos::DataDimension::k3D) { - layout.array_slice_stride_bytes *= - xe::align(depth_or_array_size, xenos::kTextureTiledDepthGranularity); - } - uint32_t array_slice_stride_bytes_non_4kb_aligned = - layout.array_slice_stride_bytes; - layout.array_slice_stride_bytes = - xe::align(array_slice_stride_bytes_non_4kb_aligned, - xenos::kTextureSubresourceAlignmentBytes); - - // Estimate the memory amount actually referenced by the texture, which may be - // smaller (especially in the 2x2 linear k_8_8_8_8 case in Test Drive - // Unlimited, for which 4 KB are allocated, while the stride is 8 KB) or - // bigger than the stride. For tiled textures, this is the dimensions aligned - // to 32x32x4 blocks (or x1 for the missing dimensions). - // For linear, doing almost the same for the mip tail (which can be used for - // both the mips and, if the texture is very small, the base) because it - // stores multiple mips outside the first mip in it in the tile padding - // (though there's no need to align the size to the next power of two for this - // purpose for mips - packed mips are only used when min(width, height) <= 16, - // and packing is first done along the shorter axis - even if the longer axis - // is larger than 32, nothing will be packed beyond the extent of the longer - // axis). "Almost" because for linear textures, we're rounding the size to - // 32x32x4 texels, not blocks - first packed mips start from 16-texel, not - // 16-block, shortest dimension, and are placed in 32x- or x32-texel tiles, - // while 32 blocks for compressed textures are bigger in memory than 32 - // texels. - layout.x_extent_blocks = xe::align(width_texels, format_info->block_width) / - format_info->block_width; - layout.y_extent_blocks = - dimension != xenos::DataDimension::k1D - ? xe::align(height_texels, format_info->block_height) / - format_info->block_height - : 1; - layout.z_extent = - dimension == xenos::DataDimension::k3D ? depth_or_array_size : 1; - if (is_tiled) { - layout.x_extent_blocks = - xe::align(layout.x_extent_blocks, xenos::kTextureTileWidthHeight); - assert_true(dimension != xenos::DataDimension::k1D); - layout.y_extent_blocks = - xe::align(layout.y_extent_blocks, xenos::kTextureTileWidthHeight); - if (dimension == xenos::DataDimension::k3D) { - layout.z_extent = - xe::align(layout.z_extent, xenos::kTextureTiledDepthGranularity); - // 3D texture addressing is pretty complex, so it's hard to determine the - // memory extent of a subregion - just use pitch_tiles * height_tiles * - // depth_tiles * bytes_per_tile at least for now, until we find a case - // where it causes issues. width > pitch is a very weird edge case anyway, - // and is extremely unlikely. - assert_true(layout.x_extent_blocks <= row_pitch_blocks_tile_aligned); - layout.array_slice_data_extent_bytes = - array_slice_stride_bytes_non_4kb_aligned; - } else { - // 2D 32x32-block tiles are laid out linearly in the texture. - // Calculate the extent as ((all rows except for the last * pitch in - // tiles + last row length in tiles) * bytes per tile). - layout.array_slice_data_extent_bytes = - (layout.y_extent_blocks - xenos::kTextureTileWidthHeight) * - layout.row_pitch_bytes + - bytes_per_block * layout.x_extent_blocks * - xenos::kTextureTileWidthHeight; - } - } else { - if (is_packed_level) { - layout.x_extent_blocks = - xe::align(layout.x_extent_blocks, - xenos::kTextureTileWidthHeight / format_info->block_width); - if (dimension != xenos::DataDimension::k1D) { - layout.y_extent_blocks = - xe::align(layout.y_extent_blocks, xenos::kTextureTileWidthHeight / - format_info->block_height); - if (dimension == xenos::DataDimension::k3D) { - layout.z_extent = - xe::align(layout.z_extent, xenos::kTextureTiledDepthGranularity); - } - } - } - layout.array_slice_data_extent_bytes = - z_stride_bytes * (layout.z_extent - 1) + - layout.row_pitch_bytes * (layout.y_extent_blocks - 1) + - bytes_per_block * layout.x_extent_blocks; - } - layout.level_data_extent_bytes = - layout.array_slice_stride_bytes * (layout.array_size - 1) + - layout.array_slice_data_extent_bytes; - - return layout; -} - -TextureGuestLayout GetGuestTextureLayout( - xenos::DataDimension dimension, uint32_t base_pitch_texels_div_32, - uint32_t width_texels, uint32_t height_texels, uint32_t depth_or_array_size, - bool is_tiled, xenos::TextureFormat format, bool has_packed_levels, - bool has_base, uint32_t max_level) { - TextureGuestLayout layout; - - if (dimension == xenos::DataDimension::k1D) { - height_texels = 1; + // For safety, for instance, with empty resolve regions (extents calculation + // may overflow otherwise due to the assumption of at least one row, for + // example, but an empty texture is empty anyway). + if (!width_texels || !height_texels || !depth || !layout.array_size) { + std::memset(&layout, 0, sizeof(layout)); + return layout; } // For safety, clamp the maximum level. - uint32_t longest_axis = std::max(width_texels, height_texels); - if (dimension == xenos::DataDimension::k3D) { - longest_axis = std::max(longest_axis, depth_or_array_size); - } - uint32_t max_level_for_dimensions = xe::log2_floor(longest_axis); + uint32_t max_level_for_dimensions = + xe::log2_floor(std::max(std::max(width_texels, height_texels), depth)); assert_true(max_level <= max_level_for_dimensions); max_level = std::min(max_level, max_level_for_dimensions); layout.max_level = max_level; @@ -404,33 +250,210 @@ TextureGuestLayout GetGuestTextureLayout( ? GetPackedMipLevel(width_texels, height_texels) : UINT32_MAX; - if (has_base) { - layout.base = - GetGuestLevelLayout(dimension, base_pitch_texels_div_32, width_texels, - height_texels, depth_or_array_size, is_tiled, - format, false, 0, layout.packed_level == 0); - } else { + // Clear unused level layouts to zero strides/sizes. + if (!has_base) { std::memset(&layout.base, 0, sizeof(layout.base)); } - - std::memset(layout.mips, 0, sizeof(layout.mips)); - std::memset(layout.mip_offsets_bytes, 0, sizeof(layout.mip_offsets_bytes)); + if (layout.packed_level != 0) { + std::memset(&layout.mips[0], 0, sizeof(layout.mips[0])); + } + uint32_t max_stored_level = std::min(max_level, layout.packed_level); + { + uint32_t mips_end = max_stored_level + 1; + assert_true(mips_end <= xe::countof(layout.mips)); + uint32_t mips_unused_count = uint32_t(xe::countof(layout.mips)) - mips_end; + if (mips_unused_count) { + std::memset(&layout.mips[mips_end], 0, + sizeof(layout.mips[0]) * mips_unused_count); + std::memset(&layout.mip_offsets_bytes[mips_end], 0, + sizeof(layout.mip_offsets_bytes[0]) * mips_unused_count); + } + } layout.mips_total_extent_bytes = 0; - if (max_level) { - uint32_t mip_offset_bytes = 0; - uint32_t max_stored_mip = std::min(max_level, layout.packed_level); - for (uint32_t mip = std::min(uint32_t(1), layout.packed_level); - mip <= max_stored_mip; ++mip) { - layout.mip_offsets_bytes[mip] = mip_offset_bytes; - TextureGuestLevelLayout& mip_layout = layout.mips[mip]; - mip_layout = - GetGuestLevelLayout(dimension, base_pitch_texels_div_32, width_texels, - height_texels, depth_or_array_size, is_tiled, - format, true, mip, mip == layout.packed_level); + + const FormatInfo* format_info = FormatInfo::Get(format); + uint32_t bytes_per_block = format_info->bytes_per_block(); + + // The loop counter can mean two things depending on whether the packed mip + // tail is stored as mip 0, because in this case, it would be ambiguous since + // both the base and the mips would be on "level 0", but stored separately and + // possibly with a different layout. + uint32_t loop_level_last; + if (layout.packed_level == 0) { + // Packed mip tail is the level 0 - may need to load mip tails for the base, + // the mips, or both. + // Loop iteration 0 - base packed mip tail. + // Loop iteration 1 - mips packed mip tail. + loop_level_last = uint32_t(max_level != 0); + } else { + // Packed mip tail is not the level 0. + // Loop iteration is the actual level being loaded. + loop_level_last = max_stored_level; + } + uint32_t mip_offset_bytes = 0; + for (uint32_t loop_level = has_base ? 0 : 1; loop_level <= loop_level_last; + ++loop_level) { + bool is_base = loop_level == 0; + uint32_t level = (layout.packed_level == 0) ? 0 : loop_level; + TextureGuestLayout::Level& level_layout = + is_base ? layout.base : layout.mips[level]; + + // Calculate the strides. + // Mips have row / depth slice strides calculated from a mip of a texture + // whose base size is a power of two. + // The base mip has tightly packed depth slices, and takes the row pitch + // from the fetch constant. + // For stride calculation purposes, mip dimensions are always aligned to + // 32x32x4 blocks (or x1 for the missing dimensions), including for linear + // textures. + // Linear texture rows are 256-byte-aligned. + uint32_t row_pitch_texels_unaligned; + uint32_t z_slice_stride_texel_rows_unaligned; + if (is_base) { + row_pitch_texels_unaligned = base_pitch_texels_div_32 << 5; + z_slice_stride_texel_rows_unaligned = height_texels; + } else { + row_pitch_texels_unaligned = + std::max(xe::next_pow2(width_texels) >> level, uint32_t(1)); + z_slice_stride_texel_rows_unaligned = + std::max(xe::next_pow2(height_texels) >> level, uint32_t(1)); + } + uint32_t row_pitch_blocks_tile_aligned = xe::align( + xe::align(row_pitch_texels_unaligned, format_info->block_width) / + format_info->block_width, + xenos::kTextureTileWidthHeight); + level_layout.row_pitch_bytes = + row_pitch_blocks_tile_aligned * bytes_per_block; + // Assuming the provided pitch is already 256-byte-aligned for linear, but + // considering the guest-provided pitch more important (no information about + // how the GPU actually handles unaligned rows). + if (!is_tiled && !is_base) { + level_layout.row_pitch_bytes = xe::align( + level_layout.row_pitch_bytes, xenos::kTextureLinearRowAlignmentBytes); + } + level_layout.z_slice_stride_block_rows = + dimension != xenos::DataDimension::k1D + ? xe::align(xe::align(z_slice_stride_texel_rows_unaligned, + format_info->block_height) / + format_info->block_height, + xenos::kTextureTileWidthHeight) + : 1; + level_layout.array_slice_stride_bytes = + level_layout.row_pitch_bytes * level_layout.z_slice_stride_block_rows; + uint32_t z_stride_bytes = level_layout.array_slice_stride_bytes; + if (dimension == xenos::DataDimension::k3D) { + level_layout.array_slice_stride_bytes *= + xe::align(depth_or_array_size, xenos::kTextureTiledDepthGranularity); + } + uint32_t array_slice_stride_bytes_non_4kb_aligned = + level_layout.array_slice_stride_bytes; + level_layout.array_slice_stride_bytes = + xe::align(array_slice_stride_bytes_non_4kb_aligned, + xenos::kTextureSubresourceAlignmentBytes); + + // Estimate the memory amount actually referenced by the texture, which may + // be smaller (especially in the 1280x720 linear k_8_8_8_8 case in Ridge + // Racer Unbounded, for which memory exactly for 1280x720 is allocated, and + // aligning the height to 32 would cause access of an unallocated page) or + // bigger than the stride. For tiled textures, this is the dimensions + // aligned to 32x32x4 blocks (or x1 for the missing dimensions). + uint32_t level_width_blocks = + xe::align(std::max(width_texels >> level, uint32_t(1)), + format_info->block_width) / + format_info->block_width; + uint32_t level_height_blocks = + xe::align(std::max(height_texels >> level, uint32_t(1)), + format_info->block_height) / + format_info->block_height; + uint32_t level_depth = std::max(depth >> level, uint32_t(1)); + if (is_tiled) { + level_layout.x_extent_blocks = + xe::align(level_width_blocks, xenos::kTextureTileWidthHeight); + level_layout.y_extent_blocks = + xe::align(level_height_blocks, xenos::kTextureTileWidthHeight); + if (dimension == xenos::DataDimension::k3D) { + level_layout.z_extent = + xe::align(level_depth, xenos::kTextureTiledDepthGranularity); + // 3D texture addressing is pretty complex, so it's hard to determine + // the memory extent of a subregion - just use `pitch_tiles * + // height_tiles * depth_tiles * bytes_per_tile` at least for now, until + // we find a case where it causes issues. `width > pitch` is a very + // weird edge case anyway, and is extremely unlikely. + assert_true(level_layout.x_extent_blocks <= + row_pitch_blocks_tile_aligned); + level_layout.array_slice_data_extent_bytes = + array_slice_stride_bytes_non_4kb_aligned; + } else { + level_layout.z_extent = 1; + // 2D 32x32-block tiles are laid out linearly in the texture. + // Calculate the extent as ((all rows except for the last * pitch in + // tiles + last row length in tiles) * bytes per tile). + level_layout.array_slice_data_extent_bytes = + (level_layout.y_extent_blocks - xenos::kTextureTileWidthHeight) * + level_layout.row_pitch_bytes + + bytes_per_block * level_layout.x_extent_blocks * + xenos::kTextureTileWidthHeight; + } + } else { + if (level == layout.packed_level) { + // Calculate the portion of the mip tail actually used by the needed + // mips. The actually used region may be significantly smaller than the + // full 32x32-texel-aligned tail. A 2x2 texture (for example, in Test + // Drive Unlimited, there's a 2x2 k_8_8_8_8 linear texture with packed + // mips), for instance, would have its 2x2 base at (16, 0) and its 1x1 + // mip at (8, 0) - and we need 2 or 1 rows in these cases, not 32. + level_layout.x_extent_blocks = 0; + level_layout.y_extent_blocks = 0; + level_layout.z_extent = 0; + uint32_t packed_sublevel_last = is_base ? 0 : max_level; + for (uint32_t packed_sublevel = layout.packed_level; + packed_sublevel <= packed_sublevel_last; ++packed_sublevel) { + uint32_t packed_sublevel_x_blocks; + uint32_t packed_sublevel_y_blocks; + uint32_t packed_sublevel_z; + GetPackedMipOffset(width_texels, height_texels, depth, format, + packed_sublevel, packed_sublevel_x_blocks, + packed_sublevel_y_blocks, packed_sublevel_z); + level_layout.x_extent_blocks = std::max( + level_layout.x_extent_blocks, + packed_sublevel_x_blocks + + xe::align( + std::max(width_texels >> packed_sublevel, uint32_t(1)), + format_info->block_width) / + format_info->block_width); + level_layout.y_extent_blocks = std::max( + level_layout.y_extent_blocks, + packed_sublevel_y_blocks + + xe::align( + std::max(height_texels >> packed_sublevel, uint32_t(1)), + format_info->block_height) / + format_info->block_height); + level_layout.z_extent = + std::max(level_layout.z_extent, + packed_sublevel_z + + std::max(depth >> packed_sublevel, uint32_t(1))); + } + } else { + level_layout.x_extent_blocks = level_width_blocks; + level_layout.y_extent_blocks = level_height_blocks; + level_layout.z_extent = level_depth; + } + level_layout.array_slice_data_extent_bytes = + z_stride_bytes * (level_layout.z_extent - 1) + + level_layout.row_pitch_bytes * (level_layout.y_extent_blocks - 1) + + bytes_per_block * level_layout.x_extent_blocks; + } + level_layout.level_data_extent_bytes = + level_layout.array_slice_stride_bytes * (layout.array_size - 1) + + level_layout.array_slice_data_extent_bytes; + + if (!is_base) { + layout.mip_offsets_bytes[level] = mip_offset_bytes; layout.mips_total_extent_bytes = std::max(layout.mips_total_extent_bytes, - mip_offset_bytes + mip_layout.level_data_extent_bytes); - mip_offset_bytes += mip_layout.next_level_distance_bytes(); + mip_offset_bytes + level_layout.level_data_extent_bytes); + mip_offset_bytes += + level_layout.array_slice_stride_bytes * layout.array_size; } } diff --git a/src/xenia/gpu/texture_util.h b/src/xenia/gpu/texture_util.h index 0c8a28644..3d9bc0e99 100644 --- a/src/xenia/gpu/texture_util.h +++ b/src/xenia/gpu/texture_util.h @@ -97,18 +97,20 @@ bool GetPackedMipOffset(uint32_t width, uint32_t height, uint32_t depth, // disassembly, which only checks the flag whether the data is packed passed to // it, not the level, to see if it needs to calculate the offset in the mip // tail, and the offset calculation function doesn't have level == 0 checks in -// it, only early-out if level < packed tail level (which can be 0). +// it, only early-out if level < packed tail level (which can be 0). There are +// examples of textures with packed base, for example, in the intro level of +// Prey (8x8 linear DXT1 - pairs of orange lights in the bottom of gambling +// machines). // // Linear texture rows are aligned to 256 bytes, for both the base and the mips // (for the base, Direct3D 9 writes an already 256-byte-aligned pitch to the // fetch constant). // // However, all the 32x32x4 padding, being just padding, is not necessarily -// being actually accessed, especially for linear textures. Test Drive Unlimited -// has a 2x2 k_8_8_8_8 linear texture, and allocates 4 KB for it (with accessing -// the page beyond it triggering an access violation), while a 32x32 k_8_8_8_8 -// linear texture, with rows aligned to 256 bytes (so stored like 64x32) would -// take 8 KB. So, while for stride calculations all the padding must be +// being actually accessed, especially for linear textures. Ridge Racer +// Unbounded has a 1280x720 k_8_8_8_8 linear texture, and allocates memory for +// exactly 1280x720, so aligning the height to 32 to 1280x736 results in access +// violations. So, while for stride calculations all the padding must be // respected, for actual memory loads it's better to avoid trying to access it // when possible: // - If the pitch is bigger than the width, it's better to calculate the last @@ -116,86 +118,69 @@ bool GetPackedMipOffset(uint32_t width, uint32_t height, uint32_t depth, // in the other direction though - pitch < width is a weird situation, but // probably legal, and may lead to reading data from beyond the calculated // subresource stride). -// - For linear textures (like that 2x2 example from Test Drive Unlimited), it's -// easy to calculate the exact memory extent that may be accessed knowing the -// dimensions (unlike for tiled textures with complex addressing within -// 32x32x4-block tiles), so there's no need to align them to 32x32x4 for -// memory extent calculation - that's what appears to cause that crash in Test -// Drive Unlimited. -// - The exception here is the packed mip tail for linear textures, as smaller -// mips are stored in the 32x32x4-texel padding. However, the packed mip -// tail needs to be aligned only to 32x32 texels, not to 32x32 blocks - so -// for compressed textures, the padding may be smaller, only to 8x8 blocks. +// - For linear textures (like that 1280x720 example from Ridge Racer +// Unbounded), it's easy to calculate the exact memory extent that may be +// accessed knowing the dimensions (unlike for tiled textures with complex +// addressing within 32x32x4-block tiles), so there's no need to align them to +// 32x32x4 for memory extent calculation. +// - For the linear packed mip tail, the extent can be calculated as max of +// (block offsets + block extents) of all levels stored in it. // -// 1D textures are always linear. +// 1D textures are always linear and likely can't have packed mips (for `width > +// height` textures, mip offset calculation may result in packing along Y). // // Array slices are stored within levels (this is different than how Direct3D // 10+ builds subresource indices, for instance). Each array slice or level is // aligned to 4 KB (but this doesn't apply to 3D texture slices within one // level). -struct TextureGuestLevelLayout { - // Number of array slices within the mip. - uint32_t array_size; - - // Distance between each row of blocks in bytes, including all the needed - // power of two (for mips) and 256-byte (for linear textures) alignment. - uint32_t row_pitch_bytes; - // Distance between Z slices in block rows, aligned to power of two for mips, - // and to tile height. - uint32_t z_slice_stride_block_rows; - // Distance between each array slice within the level in bytes, aligned to - // kTextureSubresourceAlignmentBytes. - uint32_t array_slice_stride_bytes; - // Distance from the beginning of the level to the next stored one. - uint32_t next_level_distance_bytes() const { - return array_slice_stride_bytes * array_size; - } - - // Estimated amount of memory this level occupies, and variables involved in - // its calculation. Not aligned to kTextureSubresourceAlignmentBytes. For - // tiled textures, this will be rounded to 32x32x4 blocks (or 32x32x1 - // depending on the dimension), and for the linear packed mip tail, this will - // be rounded to the same amount of texels, but for the linear subresources - // that are not the packed mip tail, this may be significantly (including less - // 4 KB pages) smaller than the aligned size (like for Test Drive Unlimited - // allocating 4 KB for a 2x2 linear k_8_8_8_8 texture that would be stored - // like 64x32 and take 8 KB). If the width is bigger than the pitch, this will - // also be taken into account for the last row so all memory actually used by - // the texture will be loaded, and may be bigger than the distance between - // array slices or levels. The purpose of this parameter is to make the memory - // amount that needs to be resident as close to the real amount as possible, - // to make sure all the needed data will be read, but also, if possible, - // unneeded memory pages won't be accessed (since that may trigger an access - // violation on the CPU). - uint32_t x_extent_blocks; - uint32_t y_extent_blocks; - uint32_t z_extent; - uint32_t array_slice_data_extent_bytes; - uint32_t level_data_extent_bytes; -}; - -// is_base == true - level must be 0 (for the base_address part). -// is_base == false - level may be 0 if is_packed_level is true (for the packed -// tail of mip_address part if the texture is very small so the tail is stored -// like mip 0). -TextureGuestLevelLayout GetGuestLevelLayout( - xenos::DataDimension dimension, uint32_t base_pitch_texels_div_32, - uint32_t width_texels, uint32_t height_texels, uint32_t depth_or_array_size, - bool is_tiled, xenos::TextureFormat format, bool is_mip, uint32_t level, - bool is_packed_level); - struct TextureGuestLayout { - TextureGuestLevelLayout base; + struct Level { + // Distance between each row of blocks in bytes, including all the needed + // power of two (for mips) and 256-byte (for linear textures) alignment. + uint32_t row_pitch_bytes; + // Distance between Z slices in block rows, aligned to power of two for + // mips, and to tile height. + uint32_t z_slice_stride_block_rows; + // Distance between each array slice within the level in bytes, aligned to + // kTextureSubresourceAlignmentBytes. The distance to the next level is this + // multiplied by the array slice count. + uint32_t array_slice_stride_bytes; + + // Estimated amount of memory this level occupies, and variables involved in + // its calculation. Not aligned to kTextureSubresourceAlignmentBytes. For + // tiled textures, this will be rounded to 32x32x4 blocks (or 32x32x1 + // depending on the dimension), but for the linear subresources, this may be + // significantly (including less 4 KB pages) smaller than the aligned size + // (like for Ridge Racer Unbounded where aligning the height of a 1280x720 + // linear texture results in access violations). For the linear mip tail, + // this includes all the mip levels stored in it. If the width is bigger + // than the pitch, this will also be taken into account for the last row so + // all memory actually used by the texture will be loaded, and may be bigger + // than the distance between array slices or levels. The purpose of this + // parameter is to make the memory amount that needs to be resident as close + // to the real amount as possible, to make sure all the needed data will be + // read, but also, if possible, unneeded memory pages won't be accessed + // (since that may trigger an access violation on the CPU). + uint32_t x_extent_blocks; + uint32_t y_extent_blocks; + uint32_t z_extent; + uint32_t array_slice_data_extent_bytes; + // Including all array slices. + uint32_t level_data_extent_bytes; + }; + + Level base; // If mip_max_level specified at calculation time is at least 1, the stored // mips are min(1, packed_mip_level) through min(mip_max_level, // packed_mip_level). - TextureGuestLevelLayout mips[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1]; + Level mips[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1]; uint32_t mip_offsets_bytes[xenos::kTexture2DCubeMaxWidthHeightLog2 + 1]; uint32_t mips_total_extent_bytes; uint32_t max_level; // UINT32_MAX if there's no packed mip tail. uint32_t packed_level; + uint32_t array_size; }; TextureGuestLayout GetGuestTextureLayout(