diff --git a/src/xenia/gpu/texture_conversion.cc b/src/xenia/gpu/texture_conversion.cc new file mode 100644 index 000000000..81bd6b955 --- /dev/null +++ b/src/xenia/gpu/texture_conversion.cc @@ -0,0 +1,156 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/texture_conversion.h" + +#include +#include +#include +#include + +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/memory.h" + +#include "third_party/xxhash/xxhash.h" + +namespace xe { +namespace gpu { +namespace texture_conversion { + +using namespace xe::gpu::xenos; + +void CopySwapBlock(Endian endian, void* output, const void* input, + size_t length) { + switch (endian) { + case Endian::k8in16: + xe::copy_and_swap_16_unaligned(output, input, length / 2); + break; + case Endian::k8in32: + xe::copy_and_swap_32_unaligned(output, input, length / 4); + break; + case Endian::k16in32: // Swap high and low 16 bits within a 32 bit word + xe::copy_and_swap_16_in_32_unaligned(output, input, length); + break; + default: + case Endian::kUnspecified: + std::memcpy(output, input, length); + break; + } +} + +void ConvertTexelCTX1ToR8G8(Endian endian, void* output, const void* input, + size_t length) { + // http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf + union { + uint8_t data[8]; + struct { + uint8_t r0, g0, r1, g1; + uint32_t xx; + }; + } block; + static_assert(sizeof(block) == 8, "CTX1 block mismatch"); + + const uint32_t bytes_per_block = 8; + CopySwapBlock(endian, block.data, input, bytes_per_block); + + uint8_t cr[4] = { + block.r0, block.r1, + static_cast(2.f / 3.f * block.r0 + 1.f / 3.f * block.r1), + static_cast(1.f / 3.f * block.r0 + 2.f / 3.f * block.r1)}; + uint8_t cg[4] = { + block.g0, block.g1, + static_cast(2.f / 3.f * block.g0 + 1.f / 3.f * block.g1), + static_cast(1.f / 3.f * block.g0 + 2.f / 3.f * block.g1)}; + + auto output_bytes = static_cast(output); + for (uint32_t oy = 0; oy < 4; ++oy) { + for (uint32_t ox = 0; ox < 4; ++ox) { + uint8_t xx = (block.xx >> (((ox + (oy * 4)) * 2))) & 3; + output_bytes[(oy * length) + (ox * 2) + 0] = cr[xx]; + output_bytes[(oy * length) + (ox * 2) + 1] = cg[xx]; + } + } +} + +void ConvertTexelDXT3AToDXT3(Endian endian, void* output, const void* input, + size_t length) { + const uint32_t bytes_per_block = 8; + auto output_bytes = static_cast(output); + std::memset(&output_bytes[0], 0, 8); + CopySwapBlock(endian, &output_bytes[8], input, bytes_per_block); +} + +// https://github.com/BinomialLLC/crunch/blob/ea9b8d8c00c8329791256adafa8cf11e4e7942a2/inc/crn_decomp.h#L4108 +static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width, + uint32_t log2_bpp) { + uint32_t macro = ((y / 32) * (width / 32)) << (log2_bpp + 7); + uint32_t micro = ((y & 6) << 2) << log2_bpp; + return macro + ((micro & ~0xF) << 1) + (micro & 0xF) + + ((y & 8) << (3 + log2_bpp)) + ((y & 1) << 4); +} + +static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t log2_bpp, + uint32_t base_offset) { + uint32_t macro = (x / 32) << (log2_bpp + 7); + uint32_t micro = (x & 7) << log2_bpp; + uint32_t offset = + base_offset + (macro + ((micro & ~0xF) << 1) + (micro & 0xF)); + return ((offset & ~0x1FF) << 3) + ((offset & 0x1C0) << 2) + (offset & 0x3F) + + ((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6); +} + +void Untile(uint8_t* output_buffer, const uint8_t* input_buffer, + const UntileInfo* untile_info) { + assert_not_null(untile_info); + assert_not_null(untile_info->input_format_info); + assert_not_null(untile_info->output_format_info); + assert_true(untile_info->width <= untile_info->input_pitch); + assert_true(untile_info->width <= untile_info->output_pitch); + + uint32_t input_bytes_per_block = + untile_info->input_format_info->bytes_per_block(); + uint32_t output_bytes_per_block = + untile_info->output_format_info->bytes_per_block(); + uint32_t output_pitch = untile_info->output_pitch * output_bytes_per_block; + + // Bytes per pixel + auto log2_bpp = (input_bytes_per_block / 4) + + ((input_bytes_per_block / 2) >> (input_bytes_per_block / 4)); + + // Offset to the current row, in bytes. + uint32_t output_row_offset = 0; + for (uint32_t y = 0; y < untile_info->height; y++) { + auto input_row_offset = TiledOffset2DOuter( + untile_info->offset_y + y, untile_info->input_pitch, log2_bpp); + + // Go block-by-block on this row. + uint32_t output_offset = output_row_offset; + + for (uint32_t x = 0; x < untile_info->width; x++) { + auto input_offset = TiledOffset2DInner(untile_info->offset_x + x, + untile_info->offset_y + y, + log2_bpp, input_row_offset); + input_offset >>= log2_bpp; + + untile_info->copy_callback( + &output_buffer[output_offset], + &input_buffer[input_offset * input_bytes_per_block], + output_bytes_per_block); + + output_offset += output_bytes_per_block; + } + + output_row_offset += output_pitch; + } +} + +} // namespace texture_conversion +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/texture_conversion.h b/src/xenia/gpu/texture_conversion.h new file mode 100644 index 000000000..28bf398be --- /dev/null +++ b/src/xenia/gpu/texture_conversion.h @@ -0,0 +1,56 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_TEXTURE_CONVERSION_H_ +#define XENIA_GPU_TEXTURE_CONVERSION_H_ + +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/gpu/texture_info.h" +#include "xenia/gpu/xenos.h" + +namespace xe { +namespace gpu { +namespace texture_conversion { + +typedef std::function + CopyBlockCallback; + +void CopySwapBlock(Endian endian, void* output, const void* input, + size_t length); +void ConvertTexelCTX1ToR8G8(Endian endian, void* output, const void* input, + size_t length); +void ConvertTexelDXT3AToDXT3(Endian endian, void* output, const void* input, + size_t length); + +typedef std::function UntileCopyBlockCallback; + +typedef struct UntileInfo { + uint32_t offset_x; + uint32_t offset_y; + uint32_t width; + uint32_t height; + uint32_t input_pitch; + uint32_t output_pitch; + const FormatInfo* input_format_info; + const FormatInfo* output_format_info; + UntileCopyBlockCallback copy_callback; +} UntileInfo; + +void Untile(uint8_t* output_buffer, const uint8_t* input_buffer, + const UntileInfo* untile_info); + +} // namespace texture_conversion +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_TEXTURE_CONVERSION_H_ diff --git a/src/xenia/gpu/texture_info.cc b/src/xenia/gpu/texture_info.cc index 56f85823e..defc3bcee 100644 --- a/src/xenia/gpu/texture_info.cc +++ b/src/xenia/gpu/texture_info.cc @@ -26,21 +26,22 @@ using namespace xe::gpu::xenos; bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch, TextureInfo* out_info) { - std::memset(out_info, 0, sizeof(TextureInfo)); - // http://msdn.microsoft.com/en-us/library/windows/desktop/cc308051(v=vs.85).aspx // a2xx_sq_surfaceformat + + std::memset(out_info, 0, sizeof(TextureInfo)); + auto& info = *out_info; - info.guest_address = fetch.address << 12; + + info.format = static_cast(fetch.format); + info.endianness = static_cast(fetch.endianness); info.dimension = static_cast(fetch.dimension); - info.pitch = fetch.pitch << 5; info.width = info.height = info.depth = 0; switch (info.dimension) { case Dimension::k1D: - info.dimension = Dimension::k2D; + info.dimension = Dimension::k2D; // we treat 1D textures as 2D info.width = fetch.size_1d.width; - info.height = 1; break; case Dimension::k2D: info.width = fetch.size_2d.width; @@ -56,228 +57,62 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch, info.height = fetch.size_stack.height; info.depth = fetch.size_stack.depth; break; + default: + assert_unhandled_case(info.dimension); + break; } - info.texture_format = static_cast(fetch.format); - info.endianness = static_cast(fetch.endianness); + info.pitch = fetch.pitch << 5; + info.mip_levels = fetch.packed_mips ? fetch.mip_max_level + 1 : 1; + info.is_tiled = fetch.tiled; info.has_packed_mips = fetch.packed_mips; + + info.guest_address = fetch.address << 12; info.mip_address = fetch.mip_address << 12; - info.mip_levels = fetch.packed_mips ? fetch.mip_max_level + 1 : 1; - info.input_length = 0; // Populated below. if (info.format_info()->format == TextureFormat::kUnknown) { XELOGE("Attempting to fetch from unsupported texture format %d", - info.texture_format); + info.format); return false; } - // Must be called here when we know the format. - switch (info.dimension) { - case Dimension::k1D: { - info.CalculateTextureSizes1D(fetch.size_1d.width + 1); - } break; - case Dimension::k2D: { - info.CalculateTextureSizes2D(fetch.size_2d.width + 1, - fetch.size_2d.height + 1); - } break; - case Dimension::k3D: { - info.CalculateTextureSizes3D(fetch.size_3d.width + 1, - fetch.size_3d.height + 1, - fetch.size_3d.depth + 1); - break; - } - case Dimension::kCube: { - info.CalculateTextureSizesCube(fetch.size_stack.width + 1, - fetch.size_stack.height + 1, - fetch.size_stack.depth + 1); - } break; - } - + info.memory_usage = TextureMemoryUsage::Calculate(out_info, true); return true; } bool TextureInfo::PrepareResolve(uint32_t physical_address, - TextureFormat texture_format, Endian endian, + TextureFormat format, Endian endian, uint32_t pitch, uint32_t width, uint32_t height, TextureInfo* out_info) { - std::memset(out_info, 0, sizeof(TextureInfo)); - auto& info = *out_info; - info.guest_address = physical_address; - info.dimension = Dimension::k2D; assert_true(width > 0); assert_true(height > 0); - info.pitch = pitch; + + std::memset(out_info, 0, sizeof(TextureInfo)); + + auto& info = *out_info; + info.format = format; + info.dimension = Dimension::k2D; info.width = width - 1; info.height = height - 1; - info.texture_format = texture_format; + info.mip_levels = 1; + info.depth = 0; + info.pitch = pitch; + info.endianness = endian; info.is_tiled = true; + + info.guest_address = physical_address; info.mip_address = 0; - info.mip_levels = 1; - info.input_length = 0; if (info.format_info()->format == TextureFormat::kUnknown) { assert_true("Unsupported texture format"); return false; } - info.CalculateTextureSizes2D(width, height); + info.memory_usage = TextureMemoryUsage::Calculate(out_info, true); return true; } -void TextureInfo::CalculateTextureSizes1D(uint32_t width) { - size.logical_width = width; - - auto format = format_info(); - - // width in blocks. - uint32_t block_width = - xe::round_up(pitch, format->block_width) / format->block_width; - - // Texture dimensions must be a multiple of tile - // dimensions (32x32 blocks). - size.block_width = xe::round_up(block_width, 32); - - uint32_t bytes_per_block = format->block_width * format->bits_per_pixel / 8; - uint32_t byte_pitch = size.block_width * bytes_per_block; - - uint32_t texel_width; - if (!is_tiled) { - // Each row must be a multiple of 256 in linear textures. - byte_pitch = xe::round_up(byte_pitch, 256); - texel_width = (byte_pitch / bytes_per_block) * format->block_width; - } else { - texel_width = size.block_width * format->block_width; - } - - size.input_width = texel_width; - - // Set some reasonable defaults for unused fields. - size.logical_height = 1; - size.block_height = format->block_height; - size.input_height = 1; - size.input_face_length = pitch * bytes_per_block; - - input_length = size.input_face_length; -} - -void TextureInfo::CalculateTextureSizes2D(uint32_t width, uint32_t height) { - size.logical_width = width; - size.logical_height = height; - - auto format = format_info(); - - // w/h in blocks. - uint32_t block_width = - xe::round_up(pitch, format->block_width) / format->block_width; - uint32_t block_height = - xe::round_up(size.logical_height, format->block_height) / - format->block_height; - - // Texture dimensions must be a multiple of tile - // dimensions (32x32 blocks). - size.block_width = xe::round_up(block_width, 32); - size.block_height = xe::round_up(block_height, 32); - - uint32_t bytes_per_block = - format->block_width * format->block_height * format->bits_per_pixel / 8; - uint32_t byte_pitch = size.block_width * bytes_per_block; - - uint32_t texel_width; - if (!is_tiled) { - // Each row must be a multiple of 256 in linear textures. - byte_pitch = xe::round_up(byte_pitch, 256); - texel_width = (byte_pitch / bytes_per_block) * format->block_width; - } else { - texel_width = size.block_width * format->block_width; - } - - size.input_width = texel_width; - size.input_height = size.block_height * format->block_height; - size.input_face_length = pitch * bytes_per_block * size.block_height; - - input_length = size.input_face_length; -} - -void TextureInfo::CalculateTextureSizes3D(uint32_t width, uint32_t height, - uint32_t depth) { - size.logical_width = width; - size.logical_height = height; - - auto format = format_info(); - - // w/h in blocks must be a multiple of block size. - uint32_t block_width = - xe::round_up(pitch, format->block_width) / format->block_width; - uint32_t block_height = - xe::round_up(size.logical_height, format->block_height) / - format->block_height; - - // Texture dimensions must be a multiple of tile - // dimensions (32x32 blocks). - size.block_width = xe::round_up(block_width, 32); - size.block_height = xe::round_up(block_height, 32); - - uint32_t bytes_per_block = - format->block_width * format->block_height * format->bits_per_pixel / 8; - uint32_t byte_pitch = size.block_width * bytes_per_block; - - uint32_t texel_width; - if (!is_tiled) { - // Each row must be a multiple of 256 in linear textures. - byte_pitch = xe::round_up(byte_pitch, 256); - texel_width = (byte_pitch / bytes_per_block) * format->block_width; - } else { - texel_width = size.block_width * format->block_width; - } - - size.input_width = texel_width; - size.input_height = size.block_height * format->block_height; - size.input_face_length = pitch * bytes_per_block * size.block_height; - - input_length = size.input_face_length * depth; -} - -void TextureInfo::CalculateTextureSizesCube(uint32_t width, uint32_t height, - uint32_t depth) { - assert_true(depth == 6); - size.logical_width = width; - size.logical_height = height; - - auto format = format_info(); - - // w/h in blocks must be a multiple of block size. - uint32_t block_width = - xe::round_up(pitch, format->block_width) / format->block_width; - uint32_t block_height = - xe::round_up(size.logical_height, format->block_height) / - format->block_height; - - // Texture dimensions must be a multiple of tile - // dimensions (32x32 blocks). - size.block_width = xe::round_up(block_width, 32); - size.block_height = xe::round_up(block_height, 32); - - uint32_t bytes_per_block = - format->block_width * format->block_height * format->bits_per_pixel / 8; - uint32_t byte_pitch = size.block_width * bytes_per_block; - - uint32_t texel_width; - if (!is_tiled) { - // Each row must be a multiple of 256 in linear textures. - byte_pitch = xe::round_up(byte_pitch, 256); - texel_width = (byte_pitch / bytes_per_block) * format->block_width; - } else { - texel_width = size.block_width * format->block_width; - } - - size.input_width = texel_width; - size.input_height = size.block_height * format->block_height; - size.input_face_length = pitch * bytes_per_block * size.block_height; - - input_length = size.input_face_length * depth; -} - static void TextureSwap(Endian endianness, void* dest, const void* src, size_t length) { switch (endianness) { @@ -330,167 +165,104 @@ static void ConvertTexelCTX1(uint8_t* dest, size_t dest_pitch, } } -void TextureInfo::ConvertTiled(uint8_t* dest, const uint8_t* src, Endian endian, - const FormatInfo* format_info, uint32_t offset_x, - uint32_t offset_y, uint32_t block_pitch, - uint32_t width, uint32_t height, - uint32_t output_width) { - // TODO(benvanik): optimize this inner loop (or work by tiles). - uint32_t bytes_per_block = format_info->block_width * - format_info->block_height * - format_info->bits_per_pixel / 8; - - uint32_t output_pitch = - output_width * format_info->block_width * format_info->bits_per_pixel / 8; - - uint32_t output_row_height = 1; - if (format_info->format == TextureFormat::k_CTX1) { - // TODO: Can we calculate this? - output_row_height = 4; - } - - // logical w/h in blocks. - uint32_t block_width = - xe::round_up(width, format_info->block_width) / format_info->block_width; - uint32_t block_height = xe::round_up(height, format_info->block_height) / - format_info->block_height; - - // Bytes per pixel - auto log2_bpp = - (bytes_per_block / 4) + ((bytes_per_block / 2) >> (bytes_per_block / 4)); - - // Offset to the current row, in bytes. - uint32_t output_row_offset = 0; - for (uint32_t y = 0; y < block_height; y++) { - auto input_row_offset = - TextureInfo::TiledOffset2DOuter(offset_y + y, block_pitch, log2_bpp); - - // Go block-by-block on this row. - uint32_t output_offset = output_row_offset; - for (uint32_t x = 0; x < block_width; x++) { - auto input_offset = TextureInfo::TiledOffset2DInner( - offset_x + x, offset_y + y, log2_bpp, input_row_offset); - input_offset >>= log2_bpp; - - if (format_info->format == TextureFormat::k_CTX1) { - // Convert to R8G8. - ConvertTexelCTX1(&dest[output_offset], output_pitch, src, endian); - } else { - // Generic swap to destination. - TextureSwap(endian, dest + output_offset, - src + input_offset * bytes_per_block, bytes_per_block); - } - - output_offset += bytes_per_block; - } - - output_row_offset += output_pitch * output_row_height; - } +uint32_t TextureInfo::GetMaxMipLevels() const { + return 1 + xe::log2_floor(std::max({width + 1, height + 1, depth + 1})); } -uint32_t TextureInfo::GetMaxMipLevels(uint32_t width, uint32_t height, - uint32_t depth) { - return 1 + xe::log2_floor(std::max({width, height, depth})); +const TextureMemoryUsage TextureInfo::GetMipMemoryUsage(uint32_t mip, + bool is_guest) const { + if (mip == 0) { + return memory_usage; + } + uint32_t mip_width = xe::next_pow2(width + 1) >> mip; + uint32_t mip_height = xe::next_pow2(height + 1) >> mip; + return TextureMemoryUsage::Calculate(format_info(), mip_width, mip_height, + depth + 1, is_tiled, is_guest); } -uint32_t TextureInfo::GetMipLocation(const TextureInfo& src, uint32_t mip, - uint32_t* offset_x, uint32_t* offset_y) { +void TextureInfo::GetMipSize(uint32_t mip, uint32_t* out_width, + uint32_t* out_height) const { + assert_not_null(out_width); + assert_not_null(out_height); + if (mip == 0) { + *out_width = width + 1; + *out_height = height + 1; + return; + } + uint32_t width_pow2 = xe::next_pow2(width + 1); + uint32_t height_pow2 = xe::next_pow2(height + 1); + *out_width = std::max(width_pow2 >> mip, 1u); + *out_height = std::max(height_pow2 >> mip, 1u); +} + +uint32_t TextureInfo::GetMipLocation(uint32_t mip, uint32_t* offset_x, + uint32_t* offset_y, bool is_guest) const { if (mip == 0) { // Short-circuit. Mip 0 is always stored in guest_address. - if (!src.has_packed_mips) { + if (!has_packed_mips) { *offset_x = 0; *offset_y = 0; } else { - GetPackedTileOffset(src, 0, offset_x, offset_y); + GetPackedTileOffset(0, offset_x, offset_y); } - return src.guest_address; + return guest_address; } // If the texture is <= 16 pixels w/h, the mips are packed with the base // texture. Otherwise, they're stored beginning from mip_address. - uint32_t address_base = std::min(src.width, src.height) < 16 - ? src.guest_address - : src.mip_address; + uint32_t address_base = + std::min(width, height) < 16 ? guest_address : mip_address; uint32_t address_offset = 0; - if (!src.has_packed_mips) { + if (!has_packed_mips) { for (uint32_t i = 1; i < mip; i++) { - address_offset += GetMipByteSize(src, i); + address_offset += GetMipByteSize(i, is_guest); } *offset_x = 0; *offset_y = 0; return address_base + address_offset; } + uint32_t width_pow2 = xe::next_pow2(width + 1); + uint32_t height_pow2 = xe::next_pow2(height + 1); + // Walk forward to find the address of the mip. uint32_t packed_mip_base = 1; for (uint32_t i = packed_mip_base; i < mip; i++, packed_mip_base++) { - uint32_t logical_width = std::max(xe::next_pow2(src.width + 1) >> i, 1u); - uint32_t logical_height = std::max(xe::next_pow2(src.height + 1) >> i, 1u); - if (std::min(logical_width, logical_height) <= 16) { + uint32_t mip_width = std::max(width_pow2 >> i, 1u); + uint32_t mip_height = std::max(height_pow2 >> i, 1u); + if (std::min(mip_width, mip_height) <= 16) { // We've reached the point where the mips are packed into a single tile. break; } - address_offset += GetMipByteSize(src, i); + address_offset += GetMipByteSize(i, is_guest); } // Now, check if the mip is packed at an offset. - GetPackedTileOffset(xe::next_pow2(src.width + 1) >> mip, - xe::next_pow2(src.height + 1) >> mip, src.format_info(), + GetPackedTileOffset(width_pow2 >> mip, height_pow2 >> mip, format_info(), mip - packed_mip_base, offset_x, offset_y); return address_base + address_offset; } -uint32_t TextureInfo::GetMipByteSize(const TextureInfo& src, uint32_t mip) { - if (mip == 0) { - return src.input_length; - } - - uint32_t bytes_per_block = src.format_info()->block_width * - src.format_info()->block_height * - src.format_info()->bits_per_pixel / 8; - - uint32_t logical_width = xe::next_pow2(src.width + 1) >> mip; - uint32_t logical_height = xe::next_pow2(src.height + 1) >> mip; - - // w/h in blocks - uint32_t block_width = - xe::round_up(logical_width, src.format_info()->block_width) / - src.format_info()->block_width; - uint32_t block_height = - xe::round_up(logical_height, src.format_info()->block_height) / - src.format_info()->block_height; - - // Texture dimensions must be a multiple of tile - // dimensions (32x32 blocks). - block_width = xe::round_up(block_width, 32); - block_height = xe::round_up(block_height, 32); - - uint32_t byte_pitch = block_width * bytes_per_block; - - if (!src.is_tiled) { - // Each row must be a multiple of 256 in linear textures. - byte_pitch = xe::round_up(byte_pitch, 256); - } - - return byte_pitch * block_height * (src.depth + 1); +uint32_t TextureInfo::GetMipByteSize(uint32_t mip, bool is_guest) const { + uint32_t bytes_per_block = format_info()->bytes_per_block(); + auto mip_usage = GetMipMemoryUsage(mip, is_guest); + return mip_usage.blocks() * bytes_per_block; } -uint32_t TextureInfo::GetMipLinearSize(const TextureInfo& src, uint32_t mip) { - uint32_t bytes_per_block = src.format_info()->block_width * - src.format_info()->block_height * - src.format_info()->bits_per_pixel / 8; - uint32_t size = src.input_length >> (mip * 2); - - // The size is a multiple of the block size. - return xe::round_up(size, bytes_per_block) * (src.depth + 1); +uint32_t TextureInfo::GetByteSize(bool is_guest) const { + uint32_t length = 0; + for (uint32_t mip = 0; mip < mip_levels; ++mip) { + length += GetMipByteSize(mip, is_guest); + } + return length; } bool TextureInfo::GetPackedTileOffset(uint32_t width, uint32_t height, const FormatInfo* format_info, - int packed_tile, uint32_t* out_offset_x, - uint32_t* out_offset_y) { + int packed_tile, uint32_t* offset_x, + uint32_t* offset_y) { // Tile size is 32x32, and once textures go <=16 they are packed into a // single tile together. The math here is insane. Most sourced // from graph paper and looking at dds dumps. @@ -530,8 +302,8 @@ bool TextureInfo::GetPackedTileOffset(uint32_t width, uint32_t height, uint32_t log2_height = xe::log2_ceil(height); if (std::min(log2_width, log2_height) > 4) { // Too big, not packed. - *out_offset_x = 0; - *out_offset_y = 0; + *offset_x = 0; + *offset_y = 0; return false; } @@ -539,62 +311,40 @@ bool TextureInfo::GetPackedTileOffset(uint32_t width, uint32_t height, if (packed_tile < 3) { if (log2_width > log2_height) { // Wider than tall. Laid out vertically. - *out_offset_x = 0; - *out_offset_y = 16 >> packed_tile; + *offset_x = 0; + *offset_y = 16 >> packed_tile; } else { // Taller than wide. Laid out horizontally. - *out_offset_x = 16 >> packed_tile; - *out_offset_y = 0; + *offset_x = 16 >> packed_tile; + *offset_y = 0; } } else { if (log2_width > log2_height) { // Wider than tall. Laid out vertically. - *out_offset_x = 16 >> (packed_tile - 2); - *out_offset_y = 0; + *offset_x = 16 >> (packed_tile - 2); + *offset_y = 0; } else { // Taller than wide. Laid out horizontally. - *out_offset_x = 0; - *out_offset_y = 16 >> (packed_tile - 2); + *offset_x = 0; + *offset_y = 16 >> (packed_tile - 2); } } - *out_offset_x /= format_info->block_width; - *out_offset_y /= format_info->block_height; + *offset_x /= format_info->block_width; + *offset_y /= format_info->block_height; return true; } -bool TextureInfo::GetPackedTileOffset(const TextureInfo& texture_info, - int packed_tile, uint32_t* out_offset_x, - uint32_t* out_offset_y) { - if (!texture_info.has_packed_mips) { - *out_offset_x = 0; - *out_offset_y = 0; +bool TextureInfo::GetPackedTileOffset(int packed_tile, uint32_t* offset_x, + uint32_t* offset_y) const { + if (!has_packed_mips) { + *offset_x = 0; + *offset_y = 0; return false; } - return GetPackedTileOffset(xe::next_pow2(texture_info.size.logical_width), - xe::next_pow2(texture_info.size.logical_height), - texture_info.format_info(), packed_tile, - out_offset_x, out_offset_y); -} - -// https://github.com/BinomialLLC/crunch/blob/ea9b8d8c00c8329791256adafa8cf11e4e7942a2/inc/crn_decomp.h#L4108 -uint32_t TextureInfo::TiledOffset2DOuter(uint32_t y, uint32_t width, - uint32_t log2_bpp) { - uint32_t macro = ((y / 32) * (width / 32)) << (log2_bpp + 7); - uint32_t micro = ((y & 6) << 2) << log2_bpp; - return macro + ((micro & ~0xF) << 1) + (micro & 0xF) + - ((y & 8) << (3 + log2_bpp)) + ((y & 1) << 4); -} - -uint32_t TextureInfo::TiledOffset2DInner(uint32_t x, uint32_t y, - uint32_t log2_bpp, - uint32_t base_offset) { - uint32_t macro = (x / 32) << (log2_bpp + 7); - uint32_t micro = (x & 7) << log2_bpp; - uint32_t offset = - base_offset + (macro + ((micro & ~0xF) << 1) + (micro & 0xF)); - return ((offset & ~0x1FF) << 3) + ((offset & 0x1C0) << 2) + (offset & 0x3F) + - ((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6); + return GetPackedTileOffset(xe::next_pow2(width + 1), + xe::next_pow2(height + 1), format_info(), + packed_tile, offset_x, offset_y); } uint64_t TextureInfo::hash() const { diff --git a/src/xenia/gpu/texture_info.h b/src/xenia/gpu/texture_info.h index 2db3865e5..89ae085a3 100644 --- a/src/xenia/gpu/texture_info.h +++ b/src/xenia/gpu/texture_info.h @@ -279,36 +279,56 @@ struct FormatInfo { uint32_t block_height; uint32_t bits_per_pixel; + uint32_t bytes_per_block() const { + return block_width * block_height * bits_per_pixel / 8; + } + static const FormatInfo* Get(uint32_t gpu_format); + + static const FormatInfo* Get(TextureFormat format) { + return Get(static_cast(format)); + } +}; + +struct TextureInfo; + +struct TextureMemoryUsage { + uint32_t pitch; // texel pitch + uint32_t height; // texel height + uint32_t block_pitch; // # of horizontal pitch blocks + uint32_t block_height; // # of vertical blocks + uint32_t depth; + + uint32_t blocks() const { return block_pitch * block_height * depth; } + + static TextureMemoryUsage Calculate(const FormatInfo* format_info, + uint32_t pitch, uint32_t height, + uint32_t depth, bool is_tiled, + bool is_guest); + static TextureMemoryUsage Calculate(const TextureInfo* texture_info, + bool is_guest); }; struct TextureInfo { - uint32_t guest_address; - TextureFormat texture_format; + TextureFormat format; + Endian endianness; + Dimension dimension; - uint32_t pitch; // pitch in blocks uint32_t width; // width in pixels uint32_t height; // height in pixels uint32_t depth; // depth in layers - Endian endianness; + uint32_t pitch; // pitch in blocks + uint32_t mip_levels; bool is_tiled; bool has_packed_mips; - uint32_t mip_address; - uint32_t mip_levels; - uint32_t input_length; - struct Size { - uint32_t logical_width; - uint32_t logical_height; - uint32_t block_width; // # of horizontal blocks - uint32_t block_height; // # of vertical blocks - uint32_t input_width; // (full) texel pitch - uint32_t input_height; // (full) texel height - uint32_t input_face_length; // byte length of face - } size; + TextureMemoryUsage memory_usage; + + uint32_t guest_address; + uint32_t mip_address; const FormatInfo* format_info() const { - return FormatInfo::Get(static_cast(texture_format)); + return FormatInfo::Get(static_cast(format)); } bool is_compressed() const { @@ -323,47 +343,32 @@ struct TextureInfo { uint32_t pitch, uint32_t width, uint32_t height, TextureInfo* out_info); - static void ConvertTiled(uint8_t* dest, const uint8_t* src, Endian endian, - const FormatInfo* format_info, uint32_t offset_x, - uint32_t offset_y, uint32_t block_pitch, - uint32_t width, uint32_t height, - uint32_t output_width); + uint32_t GetMaxMipLevels() const; - static uint32_t GetMaxMipLevels(uint32_t width, uint32_t height, - uint32_t depth); + const TextureMemoryUsage GetMipMemoryUsage(uint32_t mip, bool is_guest) const; + + void GetMipSize(uint32_t mip, uint32_t* width, uint32_t* height) const; // Get the memory location of a mip. offset_x and offset_y are in blocks. - static uint32_t GetMipLocation(const TextureInfo& src, uint32_t mip, - uint32_t* offset_x, uint32_t* offset_y); - static uint32_t GetMipByteSize(const TextureInfo& src, uint32_t mip); - static uint32_t GetMipSizes(const TextureInfo& src, uint32_t mip); + uint32_t GetMipLocation(uint32_t mip, uint32_t* offset_x, uint32_t* offset_y, + bool is_guest) const; - // Get the byte size of a MIP when stored linearly. - static uint32_t GetMipLinearSize(const TextureInfo& src, uint32_t mip); + uint32_t GetMipByteSize(uint32_t mip, bool is_guest) const; + + uint32_t GetByteSize(bool is_guest) const; static bool GetPackedTileOffset(uint32_t width, uint32_t height, const FormatInfo* format_info, - int packed_tile, uint32_t* out_offset_x, - uint32_t* out_offset_y); - static bool GetPackedTileOffset(const TextureInfo& texture_info, - int packed_tile, uint32_t* out_offset_x, - uint32_t* out_offset_y); - static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width, - uint32_t log2_bpp); - static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t log2_bpp, - uint32_t base_offset); + int packed_tile, uint32_t* offset_x, + uint32_t* offset_y); + + bool GetPackedTileOffset(int packed_tile, uint32_t* offset_x, + uint32_t* offset_y) const; uint64_t hash() const; bool operator==(const TextureInfo& other) const { return std::memcmp(this, &other, sizeof(TextureInfo)) == 0; } - - private: - void CalculateTextureSizes1D(uint32_t width); - void CalculateTextureSizes2D(uint32_t width, uint32_t height); - void CalculateTextureSizes3D(uint32_t width, uint32_t height, uint32_t depth); - void CalculateTextureSizesCube(uint32_t width, uint32_t height, - uint32_t depth); }; } // namespace gpu diff --git a/src/xenia/gpu/texture_memory_usage.cc b/src/xenia/gpu/texture_memory_usage.cc new file mode 100644 index 000000000..d464c4f76 --- /dev/null +++ b/src/xenia/gpu/texture_memory_usage.cc @@ -0,0 +1,80 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/texture_info.h" + +#include +#include + +#include "xenia/base/math.h" + +namespace xe { +namespace gpu { + +using namespace xe::gpu::xenos; + +static TextureMemoryUsage CalculateMemoryUsage(const FormatInfo* format_info, + uint32_t pitch, uint32_t height, + uint32_t depth, bool is_tiled, + bool is_guest) { + TextureMemoryUsage usage; + + usage.pitch = pitch; + usage.height = height; + usage.block_pitch = xe::round_up(usage.pitch, format_info->block_width) / + format_info->block_width; + usage.block_height = xe::round_up(usage.height, format_info->block_height) / + format_info->block_height; + usage.depth = depth; + + if (is_guest) { + // Texture dimensions must be a multiple of tile + // dimensions (32x32 blocks). + usage.block_pitch = xe::round_up(usage.block_pitch, 32); + usage.block_height = xe::round_up(usage.block_height, 32); + + usage.pitch = usage.block_pitch * format_info->block_width; + usage.height = usage.block_height * format_info->block_height; + + uint32_t bytes_per_block = format_info->bytes_per_block(); + uint32_t byte_pitch = usage.block_pitch * bytes_per_block; + + if (!is_tiled) { + // Each row must be a multiple of 256 bytes in linear textures. + byte_pitch = xe::round_up(byte_pitch, 256); + usage.block_pitch = byte_pitch / bytes_per_block; + usage.pitch = usage.block_pitch * format_info->block_width; + } + + // Is depth special? + usage.depth = usage.depth; + } + + return usage; +} + +TextureMemoryUsage TextureMemoryUsage::Calculate(const FormatInfo* format_info, + uint32_t pitch, + uint32_t height, + uint32_t depth, bool is_tiled, + bool is_guest) { + return CalculateMemoryUsage(format_info, pitch, height, depth, is_tiled, + is_guest); +} + +TextureMemoryUsage TextureMemoryUsage::Calculate(const TextureInfo* info, + bool is_guest) { + assert_not_null(info); + return CalculateMemoryUsage(info->format_info(), info->pitch, + info->height + 1, info->depth + 1, info->is_tiled, + is_guest); +} + +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc index 92eccd432..67d655f28 100644 --- a/src/xenia/gpu/vulkan/texture_cache.cc +++ b/src/xenia/gpu/vulkan/texture_cache.cc @@ -15,6 +15,7 @@ #include "xenia/base/profiling.h" #include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/sampler_info.h" +#include "xenia/gpu/texture_conversion.h" #include "xenia/gpu/texture_info.h" #include "xenia/gpu/vulkan/vulkan_gpu_flags.h" #include "xenia/ui/vulkan/vulkan_mem_alloc.h" @@ -38,6 +39,7 @@ struct TextureConfig { #define SWIZ(r, g, b, a) r, g, b, a #define ___R SWIZ(-7, -7, -7, 0) +#define ___A SWIZ(-7, -7, -7, 3) #define RRRR SWIZ(0, 0, 0, 0) #define RRRA SWIZ(0, 0, 0, 3) #define RGBA SWIZ(0, 1, 2, 3) @@ -117,8 +119,8 @@ static const TextureConfig texture_configs[64] = { /* k_10_11_11_AS_16_16_16_16 */ {VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // ? /* k_11_11_10_AS_16_16_16_16 */ {VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // ? /* k_32_32_32_FLOAT */ {VK_FORMAT_R32G32B32_SFLOAT}, - /* k_DXT3A */ {VK_FORMAT_UNDEFINED}, - /* k_DXT5A */ {VK_FORMAT_BC4_UNORM_BLOCK, RRRR}, // ATI1N + /* k_DXT3A */ {VK_FORMAT_BC2_UNORM_BLOCK, ___A}, + /* k_DXT5A */ {VK_FORMAT_BC4_UNORM_BLOCK, ___R}, // ATI1N // http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf /* k_CTX1 */ {VK_FORMAT_R8G8_UINT}, @@ -137,6 +139,7 @@ static const TextureConfig texture_configs[64] = { #undef RGBA #undef RRRA #undef RRRR +#undef ___A #undef ___R #undef SWIZ @@ -438,15 +441,15 @@ TextureCache::Texture* TextureCache::DemandResolveTexture( // Tell the trace writer to "cache" this memory (but not read it) trace_writer_->WriteMemoryReadCachedNop(texture_info.guest_address, - texture_info.input_length); + texture_info.GetByteSize(true)); return it->second; } } VkFormatFeatureFlags required_flags = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; - if (texture_info.texture_format == TextureFormat::k_24_8 || - texture_info.texture_format == TextureFormat::k_24_8_FLOAT) { + if (texture_info.format == TextureFormat::k_24_8 || + texture_info.format == TextureFormat::k_24_8_FLOAT) { required_flags |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; } else { required_flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; @@ -466,11 +469,11 @@ TextureCache::Texture* TextureCache::DemandResolveTexture( VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT, xe::format_string( "RT: 0x%.8X - 0x%.8X", texture_info.guest_address, - texture_info.guest_address + texture_info.input_length)); + texture_info.guest_address + texture_info.GetByteSize(true))); // Setup an access watch. If this texture is touched, it is destroyed. texture->access_watch_handle = memory_->AddPhysicalAccessWatch( - texture_info.guest_address, texture_info.input_length, + texture_info.guest_address, texture_info.GetByteSize(true), cpu::MMIOHandler::kWatchWrite, &WatchCallback, this, texture); textures_[texture_hash] = texture; @@ -492,7 +495,7 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info, } trace_writer_->WriteMemoryReadCached(texture_info.guest_address, - texture_info.input_length); + texture_info.GetByteSize(true)); return it->second; } @@ -525,12 +528,12 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info, } trace_writer_->WriteMemoryRead(texture_info.guest_address, - texture_info.input_length); + texture_info.GetByteSize(true)); // Okay. Put a writewatch on it to tell us if it's been modified from the // guest. texture->access_watch_handle = memory_->AddPhysicalAccessWatch( - texture_info.guest_address, texture_info.input_length, + texture_info.guest_address, texture_info.GetByteSize(true), cpu::MMIOHandler::kWatchWrite, &WatchCallback, this, texture); if (!UploadTexture(command_buffer, completion_fence, texture, texture_info)) { @@ -542,11 +545,11 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info, device_->DbgSetObjectName( reinterpret_cast(texture->image), VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT, - xe::format_string("T: 0x%.8X - 0x%.8X (%s, %s)", - texture_info.guest_address, - texture_info.guest_address + texture_info.input_length, - texture_info.format_info()->name, - get_dimension_name(texture_info.dimension))); + xe::format_string( + "T: 0x%.8X - 0x%.8X (%s, %s)", texture_info.guest_address, + texture_info.guest_address + texture_info.GetByteSize(true), + texture_info.format_info()->name, + get_dimension_name(texture_info.dimension))); textures_[texture_hash] = texture; COUNT_profile_set("gpu/texture_cache/textures", textures_.size()); @@ -561,8 +564,7 @@ TextureCache::TextureView* TextureCache::DemandView(Texture* texture, } } - auto& config = - texture_configs[uint32_t(texture->texture_info.texture_format)]; + auto& config = texture_configs[uint32_t(texture->texture_info.format)]; VkImageViewCreateInfo view_info; view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; @@ -821,10 +823,10 @@ TextureCache::Texture* TextureCache::Lookup(const TextureInfo& texture_info) { COMPARE_FIELD(depth); COMPARE_FIELD(endianness); COMPARE_FIELD(is_tiled); - COMPARE_FIELD(input_length); + COMPARE_FIELD(GetByteSize(true)); #undef COMPARE_FIELD - if (!TextureFormatIsSimilar(texture_info.texture_format, - other_texture_info.texture_format)) { + if (!TextureFormatIsSimilar(texture_info.format, + other_texture_info.format)) { continue; } /*const auto format_info = texture_info.format_info(); @@ -850,9 +852,9 @@ TextureCache::Texture* TextureCache::LookupAddress(uint32_t guest_address, const auto& texture_info = it->second->texture_info; if (guest_address >= texture_info.guest_address && guest_address < - texture_info.guest_address + texture_info.input_length && - texture_info.size.input_width >= width && - texture_info.size.input_height >= height && out_offset) { + texture_info.guest_address + texture_info.GetByteSize(true) && + texture_info.pitch >= width && texture_info.height >= height && + out_offset) { auto offset_bytes = guest_address - texture_info.guest_address; if (texture_info.dimension == Dimension::k2D) { @@ -868,8 +870,7 @@ TextureCache::Texture* TextureCache::LookupAddress(uint32_t guest_address, if (texture_info.guest_address == guest_address && texture_info.dimension == Dimension::k2D && - texture_info.size.input_width == width && - texture_info.size.input_height == height) { + texture_info.pitch == width && texture_info.height == height) { if (out_offset) { out_offset->x = 0; out_offset->y = 0; @@ -882,25 +883,6 @@ TextureCache::Texture* TextureCache::LookupAddress(uint32_t guest_address, return nullptr; } -void TextureSwap(Endian endianness, void* dest, const void* src, - size_t length) { - switch (endianness) { - case Endian::k8in16: - xe::copy_and_swap_16_unaligned(dest, src, length / 2); - break; - case Endian::k8in32: - xe::copy_and_swap_32_unaligned(dest, src, length / 4); - break; - case Endian::k16in32: // Swap high and low 16 bits within a 32 bit word - xe::copy_and_swap_16_in_32_unaligned(dest, src, length); - break; - default: - case Endian::kUnspecified: - std::memcpy(dest, src, length); - break; - } -} - void TextureCache::FlushPendingCommands(VkCommandBuffer command_buffer, VkFence completion_fence) { auto status = vkEndCommandBuffer(command_buffer); @@ -942,60 +924,55 @@ bool TextureCache::ConvertTexture2D(uint8_t* dest, uint32_t mip, const TextureInfo& src) { uint32_t offset_x = 0; uint32_t offset_y = 0; - uint32_t address = - TextureInfo::GetMipLocation(src, mip, &offset_x, &offset_y); + uint32_t address = src.GetMipLocation(mip, &offset_x, &offset_y, true); void* host_address = memory_->TranslatePhysical(address); - // Pitch of the source texture in blocks. - uint32_t block_width; - if (mip == 0) { - block_width = src.size.block_width; - } else { - block_width = xe::next_pow2(src.size.block_width) >> mip; - block_width = xe::round_up(block_width, 32); - } + auto src_usage = src.GetMipMemoryUsage(mip, true); + auto dst_usage = GetMipMemoryUsage(src, mip); - uint32_t logical_width = src.size.logical_width >> mip; - uint32_t logical_height = src.size.logical_height >> mip; - uint32_t input_width = src.size.input_width >> mip; - uint32_t input_height = src.size.input_height >> mip; + uint32_t mip_width, mip_height; + src.GetMipSize(mip, &mip_width, &mip_height); - // All dimensions must be a multiple of block w/h - logical_width = xe::round_up(logical_width, src.format_info()->block_width); - logical_height = - xe::round_up(logical_height, src.format_info()->block_height); - input_width = xe::round_up(input_width, src.format_info()->block_width); - input_height = xe::round_up(input_height, src.format_info()->block_height); + auto copy_block = GetFormatCopyBlock(src.format); if (!src.is_tiled) { - uint32_t bytes_per_block = src.format_info()->block_width * - src.format_info()->block_height * - src.format_info()->bits_per_pixel / 8; - uint32_t src_pitch = xe::round_up(block_width * bytes_per_block, 256); + uint32_t src_pitch = + src_usage.block_pitch * src.format_info()->bytes_per_block(); uint32_t dst_pitch = - (input_width / src.format_info()->block_width) * bytes_per_block; - assert_true(dst_pitch <= src_pitch); + dst_usage.block_pitch * GetFormatInfo(src.format)->bytes_per_block(); const uint8_t* src_mem = reinterpret_cast(host_address); src_mem += offset_y * src_pitch; - src_mem += offset_x * bytes_per_block; - for (uint32_t y = 0; y < src.size.block_height; y++) { - TextureSwap(src.endianness, dest + y * dst_pitch, src_mem + y * src_pitch, - dst_pitch); + src_mem += offset_x * src.format_info()->bytes_per_block(); + for (uint32_t y = 0; y < dst_usage.block_height; y++) { + copy_block(src.endianness, dest + y * dst_pitch, src_mem + y * src_pitch, + dst_pitch); } } else { // Untile image. - // We could do this in a shader to speed things up, as this is pretty - // slow. + // We could do this in a shader to speed things up, as this is pretty slow. + const uint8_t* src_mem = reinterpret_cast(host_address); - TextureInfo::ConvertTiled(dest, src_mem, src.endianness, src.format_info(), - offset_x, offset_y, block_width, logical_width, - logical_height, input_width); + + texture_conversion::UntileInfo untile_info; + std::memset(&untile_info, 0, sizeof(untile_info)); + untile_info.offset_x = offset_x; + untile_info.offset_y = offset_y; + untile_info.width = dst_usage.block_pitch; + untile_info.height = dst_usage.block_height; + untile_info.input_pitch = src_usage.block_pitch; + untile_info.output_pitch = dst_usage.block_pitch; + untile_info.input_format_info = src.format_info(); + untile_info.output_format_info = GetFormatInfo(src.format); + untile_info.copy_callback = [=](auto o, auto i, auto l) { + copy_block(src.endianness, o, i, l); + }; + texture_conversion::Untile(dest, src_mem, &untile_info); } - copy_region->bufferRowLength = input_width; - copy_region->bufferImageHeight = input_height; + copy_region->bufferRowLength = dst_usage.pitch; + copy_region->bufferImageHeight = dst_usage.height; copy_region->imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, mip, 0, 1}; - copy_region->imageExtent = {logical_width, logical_height, 1}; + copy_region->imageExtent = {mip_width, mip_height, 1}; return true; } @@ -1004,77 +981,60 @@ bool TextureCache::ConvertTextureCube(uint8_t* dest, uint32_t mip, const TextureInfo& src) { uint32_t offset_x = 0; uint32_t offset_y = 0; - uint32_t address = - TextureInfo::GetMipLocation(src, mip, &offset_x, &offset_y); + uint32_t address = src.GetMipLocation(mip, &offset_x, &offset_y, true); void* host_address = memory_->TranslatePhysical(address); - // Pitch of the source texture in blocks. - uint32_t block_width, block_height, input_block_height; - if (mip == 0) { - block_width = src.size.block_width; - input_block_height = block_height = src.size.block_height; - } else { - block_width = xe::next_pow2(src.size.block_width) >> mip; - block_width = xe::round_up(block_width, 32); - block_height = xe::next_pow2(src.size.block_height) >> mip; - input_block_height = block_height; - block_height = xe::round_up(block_height, 32); - } + auto src_usage = src.GetMipMemoryUsage(mip, true); + auto dst_usage = GetMipMemoryUsage(src, mip); - uint32_t logical_width = src.size.logical_width >> mip; - uint32_t logical_height = src.size.logical_height >> mip; - uint32_t input_width = src.size.input_width >> mip; - uint32_t input_height = src.size.input_height >> mip; + uint32_t src_pitch = + src_usage.block_pitch * src.format_info()->bytes_per_block(); + uint32_t dst_pitch = + dst_usage.block_pitch * GetFormatInfo(src.format)->bytes_per_block(); - // All dimensions must be a multiple of block w/h - logical_width = xe::round_up(logical_width, src.format_info()->block_width); - logical_height = - xe::round_up(logical_height, src.format_info()->block_height); - input_width = xe::round_up(input_width, src.format_info()->block_width); - input_height = xe::round_up(input_height, src.format_info()->block_height); + uint32_t mip_width, mip_height; + src.GetMipSize(mip, &mip_width, &mip_height); + + auto copy_block = GetFormatCopyBlock(src.format); if (!src.is_tiled) { - uint32_t bytes_per_block = src.format_info()->block_width * - src.format_info()->block_height * - src.format_info()->bits_per_pixel / 8; - uint32_t src_pitch = xe::round_up(block_width * bytes_per_block, 256); - uint32_t dst_pitch = - (input_width / src.format_info()->block_width) * bytes_per_block; - assert_true(dst_pitch <= src_pitch); const uint8_t* src_mem = reinterpret_cast(host_address); for (int face = 0; face < 6; face++) { src_mem += offset_y * src_pitch; - src_mem += offset_x * bytes_per_block; - for (uint32_t y = 0; y < block_height; y++) { - TextureSwap(src.endianness, dest + y * dst_pitch, - src_mem + y * src_pitch, dst_pitch); + src_mem += offset_x * src.format_info()->bytes_per_block(); + for (uint32_t y = 0; y < dst_usage.block_height; y++) { + copy_block(src.endianness, dest + y * dst_pitch, + src_mem + y * src_pitch, dst_pitch); } - src_mem += src_pitch * block_height; - dest += dst_pitch * input_block_height; + src_mem += src_pitch * src_usage.block_height; + dest += dst_pitch * dst_usage.block_height; } } else { - // TODO(benvanik): optimize this inner loop (or work by tiles). - uint32_t bytes_per_block = src.format_info()->block_width * - src.format_info()->block_height * - src.format_info()->bits_per_pixel / 8; - uint32_t src_pitch = block_width * bytes_per_block; - uint32_t dst_pitch = - (input_width / src.format_info()->block_width) * bytes_per_block; - assert_true(dst_pitch <= src_pitch); const uint8_t* src_mem = reinterpret_cast(host_address); for (int face = 0; face < 6; face++) { - TextureInfo::ConvertTiled( - dest, src_mem, src.endianness, src.format_info(), offset_x, offset_y, - block_width, logical_width, logical_height, input_width); - src_mem += src_pitch * block_height; - dest += dst_pitch * input_block_height; + texture_conversion::UntileInfo untile_info; + std::memset(&untile_info, 0, sizeof(untile_info)); + untile_info.offset_x = offset_x; + untile_info.offset_y = offset_y; + untile_info.width = dst_usage.block_pitch; + untile_info.height = dst_usage.block_height; + untile_info.input_pitch = src_usage.block_pitch; + untile_info.output_pitch = dst_usage.block_pitch; + untile_info.input_format_info = src.format_info(); + untile_info.output_format_info = GetFormatInfo(src.format); + untile_info.copy_callback = [=](auto o, auto i, auto l) { + copy_block(src.endianness, o, i, l); + }; + + src_mem += src_pitch * src_usage.block_height; + dest += dst_pitch * dst_usage.block_height; } } - copy_region->bufferRowLength = input_width; - copy_region->bufferImageHeight = input_height; + copy_region->bufferRowLength = dst_usage.pitch; + copy_region->bufferImageHeight = dst_usage.height; copy_region->imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, mip, 0, 6}; - copy_region->imageExtent = {logical_width, logical_height, 1}; + copy_region->imageExtent = {mip_width, mip_height, 1}; return true; } @@ -1102,53 +1062,49 @@ bool TextureCache::UploadTexture(VkCommandBuffer command_buffer, SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES + size_t unpack_length = ComputeTextureStorage(src); + XELOGGPU( "Uploading texture @ 0x%.8X (%dx%d, length: 0x%.8X, format: %s, dim: %s, " "levels: %d, tiled: %s)", - src.guest_address, src.width + 1, src.height + 1, src.input_length, + src.guest_address, src.width + 1, src.height + 1, unpack_length, src.format_info()->name, get_dimension_name(src.dimension), src.mip_levels, src.is_tiled ? "yes" : "no"); - size_t unpack_length; - if (!ComputeTextureStorage(&unpack_length, src)) { - XELOGW("Failed to compute texture storage"); + + if (!unpack_length) { + XELOGW("Failed to compute texture storage!"); return false; } - size_t total_unpack_length = unpack_length; - for (uint32_t i = 1; i < src.mip_levels; i++) { - // Add in more space for mips. - total_unpack_length += TextureInfo::GetMipLinearSize(src, i); - } - - if (!staging_buffer_.CanAcquire(total_unpack_length)) { + if (!staging_buffer_.CanAcquire(unpack_length)) { // Need to have unique memory for every upload for at least one frame. If we // run out of memory, we need to flush all queued upload commands to the // GPU. FlushPendingCommands(command_buffer, completion_fence); // Uploads have been flushed. Continue. - if (!staging_buffer_.CanAcquire(total_unpack_length)) { + if (!staging_buffer_.CanAcquire(unpack_length)) { // The staging buffer isn't big enough to hold this texture. XELOGE( "TextureCache staging buffer is too small! (uploading 0x%.8X bytes)", - total_unpack_length); + unpack_length); assert_always(); return false; } } // Grab some temporary memory for staging. - auto alloc = staging_buffer_.Acquire(total_unpack_length, completion_fence); + auto alloc = staging_buffer_.Acquire(unpack_length, completion_fence); assert_not_null(alloc); if (!alloc) { - XELOGE("%s: Failed to acquire staging memory", __func__); + XELOGE("%s: Failed to acquire staging memory!", __func__); return false; } // DEBUG: Check the source address. If it's completely zero'd out, print it. bool valid = false; auto src_data = memory_->TranslatePhysical(src.guest_address); - for (uint32_t i = 0; i < src.input_length; i++) { + for (uint32_t i = 0; i < unpack_length; i++) { if (src_data[i] != 0) { valid = true; break; @@ -1175,7 +1131,7 @@ bool TextureCache::UploadTexture(VkCommandBuffer command_buffer, copy_regions[0].imageOffset = {0, 0, 0}; // Now upload all the MIPs - VkDeviceSize buffer_offset = unpack_length; + VkDeviceSize buffer_offset = ComputeMipStorage(src, 0); for (uint32_t mip = 1; mip < src.mip_levels; mip++) { uint8_t* dest = reinterpret_cast(alloc->host_ptr) + buffer_offset; if (!ConvertTexture(dest, ©_regions[mip], mip, src)) { @@ -1186,7 +1142,7 @@ bool TextureCache::UploadTexture(VkCommandBuffer command_buffer, copy_regions[mip].imageOffset = {0, 0, 0}; // With each mip, the length is divided by 4. - buffer_offset += TextureInfo::GetMipLinearSize(src, mip); + buffer_offset += ComputeMipStorage(src, mip); } // Transition the texture into a transfer destination layout. @@ -1240,32 +1196,83 @@ bool TextureCache::UploadTexture(VkCommandBuffer command_buffer, return true; } -bool TextureCache::ComputeTextureStorage(size_t* output_length, - const TextureInfo& src) { - if (src.texture_format == TextureFormat::k_CTX1) { - switch (src.dimension) { - case Dimension::k1D: { - assert_always(); - } break; - case Dimension::k2D: { - *output_length = src.size.input_width * src.size.input_height * 2; - return true; - } - case Dimension::k3D: { - assert_always(); - } break; - case Dimension::kCube: { - *output_length = src.size.input_width * src.size.input_height * 2 * 6; - return true; - } - } - return false; - } else { - *output_length = src.input_length; - return true; +const FormatInfo* TextureCache::GetFormatInfo(TextureFormat format) { + switch (format) { + case TextureFormat::k_CTX1: + return FormatInfo::Get(TextureFormat::k_8_8); + case TextureFormat::k_DXT3A: + return FormatInfo::Get(TextureFormat::k_DXT2_3); + default: + return FormatInfo::Get(format); } } +texture_conversion::CopyBlockCallback TextureCache::GetFormatCopyBlock( + TextureFormat format) { + switch (format) { + case TextureFormat::k_CTX1: + return texture_conversion::ConvertTexelCTX1ToR8G8; + case TextureFormat::k_DXT3A: + return texture_conversion::ConvertTexelDXT3AToDXT3; + default: + return texture_conversion::CopySwapBlock; + } +} + +TextureMemoryUsage TextureCache::GetMipMemoryUsage(const TextureInfo& src, + uint32_t mip) { + auto format_info = GetFormatInfo(src.format); + uint32_t width = src.width + 1; + uint32_t height = src.height + 1; + uint32_t depth = src.depth + 1; + TextureMemoryUsage usage; + if (mip == 0) { + usage = TextureMemoryUsage::Calculate(format_info, width, height, depth, + width, false); + } else { + uint32_t mip_width = xe::next_pow2(width) >> mip; + uint32_t mip_height = xe::next_pow2(height) >> mip; + usage = TextureMemoryUsage::Calculate(format_info, mip_width, mip_height, + depth, mip_width, false); + } + return usage; +} + +uint32_t TextureCache::ComputeMipStorage(const FormatInfo* format_info, + uint32_t width, uint32_t height, + uint32_t depth, uint32_t mip) { + assert_not_null(format_info); + TextureMemoryUsage usage; + if (mip == 0) { + usage = TextureMemoryUsage::Calculate(format_info, width, height, depth, + false, false); + } else { + uint32_t mip_width = xe::next_pow2(width) >> mip; + uint32_t mip_height = xe::next_pow2(height) >> mip; + usage = TextureMemoryUsage::Calculate(format_info, mip_width, mip_height, + depth, false, false); + } + uint32_t bytes_per_block = format_info->bytes_per_block(); + return usage.blocks() * bytes_per_block; +} + +uint32_t TextureCache::ComputeMipStorage(const TextureInfo& src, uint32_t mip) { + return ComputeMipStorage(GetFormatInfo(src.format), src.width + 1, + src.height + 1, src.depth + 1, mip); +} + +uint32_t TextureCache::ComputeTextureStorage(const TextureInfo& src) { + auto format_info = GetFormatInfo(src.format); + uint32_t width = src.width + 1; + uint32_t height = src.height + 1; + uint32_t depth = src.depth + 1; + uint32_t length = 0; + for (uint32_t mip = 0; mip < src.mip_levels; mip++) { + length += ComputeMipStorage(format_info, width, height, depth, mip); + } + return length; +} + void TextureCache::WritebackTexture(Texture* texture) { VkResult status = VK_SUCCESS; VkFence fence = wb_command_pool_->BeginBatch(); @@ -1332,7 +1339,8 @@ void TextureCache::WritebackTexture(Texture* texture) { auto dest = memory_->TranslatePhysical(texture->texture_info.guest_address); if (status == VK_SUCCESS) { - std::memcpy(dest, alloc->host_ptr, texture->texture_info.input_length); + std::memcpy(dest, alloc->host_ptr, + texture->texture_info.GetByteSize(false)); } wb_staging_buffer_.Scavenge(); @@ -1473,7 +1481,7 @@ bool TextureCache::SetupTextureBinding(VkCommandBuffer command_buffer, } // Search via the base format. - texture_info.texture_format = GetBaseFormat(texture_info.texture_format); + texture_info.format = GetBaseFormat(texture_info.format); auto texture = Demand(texture_info, command_buffer, completion_fence); auto sampler = Demand(sampler_info); diff --git a/src/xenia/gpu/vulkan/texture_cache.h b/src/xenia/gpu/vulkan/texture_cache.h index c15a05fa6..397b0b776 100644 --- a/src/xenia/gpu/vulkan/texture_cache.h +++ b/src/xenia/gpu/vulkan/texture_cache.h @@ -16,6 +16,7 @@ #include "xenia/gpu/register_file.h" #include "xenia/gpu/sampler_info.h" #include "xenia/gpu/shader.h" +#include "xenia/gpu/texture_conversion.h" #include "xenia/gpu/texture_info.h" #include "xenia/gpu/trace_writer.h" #include "xenia/gpu/vulkan/vulkan_command_processor.h" @@ -155,7 +156,17 @@ class TextureCache { uint32_t mip, const TextureInfo& src); bool ConvertTexture(uint8_t* dest, VkBufferImageCopy* copy_region, uint32_t mip, const TextureInfo& src); - bool ComputeTextureStorage(size_t* output_length, const TextureInfo& src); + + static const FormatInfo* GetFormatInfo(TextureFormat format); + static texture_conversion::CopyBlockCallback GetFormatCopyBlock( + TextureFormat format); + static TextureMemoryUsage GetMipMemoryUsage(const TextureInfo& src, + uint32_t mip); + static uint32_t ComputeMipStorage(const FormatInfo* format_info, + uint32_t width, uint32_t height, + uint32_t depth, uint32_t mip); + static uint32_t ComputeMipStorage(const TextureInfo& src, uint32_t mip); + static uint32_t ComputeTextureStorage(const TextureInfo& src); // Writes a texture back into guest memory. This call is (mostly) asynchronous // but the texture must not be flagged for destruction.