[Vulkan] Reimplemented texture conversions. Here be dragons! Probably breaks everything. Also bonus DXT3A support.

This commit is contained in:
gibbed 2018-05-26 06:57:16 -05:00
parent 926464cb90
commit 7116b5fc82
7 changed files with 647 additions and 581 deletions

View File

@ -0,0 +1,156 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/texture_conversion.h"
#include <algorithm>
#include <cmath>
#include <cstring>
#include <functional>
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "third_party/xxhash/xxhash.h"
namespace xe {
namespace gpu {
namespace texture_conversion {
using namespace xe::gpu::xenos;
void CopySwapBlock(Endian endian, void* output, const void* input,
size_t length) {
switch (endian) {
case Endian::k8in16:
xe::copy_and_swap_16_unaligned(output, input, length / 2);
break;
case Endian::k8in32:
xe::copy_and_swap_32_unaligned(output, input, length / 4);
break;
case Endian::k16in32: // Swap high and low 16 bits within a 32 bit word
xe::copy_and_swap_16_in_32_unaligned(output, input, length);
break;
default:
case Endian::kUnspecified:
std::memcpy(output, input, length);
break;
}
}
void ConvertTexelCTX1ToR8G8(Endian endian, void* output, const void* input,
size_t length) {
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
union {
uint8_t data[8];
struct {
uint8_t r0, g0, r1, g1;
uint32_t xx;
};
} block;
static_assert(sizeof(block) == 8, "CTX1 block mismatch");
const uint32_t bytes_per_block = 8;
CopySwapBlock(endian, block.data, input, bytes_per_block);
uint8_t cr[4] = {
block.r0, block.r1,
static_cast<uint8_t>(2.f / 3.f * block.r0 + 1.f / 3.f * block.r1),
static_cast<uint8_t>(1.f / 3.f * block.r0 + 2.f / 3.f * block.r1)};
uint8_t cg[4] = {
block.g0, block.g1,
static_cast<uint8_t>(2.f / 3.f * block.g0 + 1.f / 3.f * block.g1),
static_cast<uint8_t>(1.f / 3.f * block.g0 + 2.f / 3.f * block.g1)};
auto output_bytes = static_cast<uint8_t*>(output);
for (uint32_t oy = 0; oy < 4; ++oy) {
for (uint32_t ox = 0; ox < 4; ++ox) {
uint8_t xx = (block.xx >> (((ox + (oy * 4)) * 2))) & 3;
output_bytes[(oy * length) + (ox * 2) + 0] = cr[xx];
output_bytes[(oy * length) + (ox * 2) + 1] = cg[xx];
}
}
}
void ConvertTexelDXT3AToDXT3(Endian endian, void* output, const void* input,
size_t length) {
const uint32_t bytes_per_block = 8;
auto output_bytes = static_cast<uint8_t*>(output);
std::memset(&output_bytes[0], 0, 8);
CopySwapBlock(endian, &output_bytes[8], input, bytes_per_block);
}
// https://github.com/BinomialLLC/crunch/blob/ea9b8d8c00c8329791256adafa8cf11e4e7942a2/inc/crn_decomp.h#L4108
static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width,
uint32_t log2_bpp) {
uint32_t macro = ((y / 32) * (width / 32)) << (log2_bpp + 7);
uint32_t micro = ((y & 6) << 2) << log2_bpp;
return macro + ((micro & ~0xF) << 1) + (micro & 0xF) +
((y & 8) << (3 + log2_bpp)) + ((y & 1) << 4);
}
static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t log2_bpp,
uint32_t base_offset) {
uint32_t macro = (x / 32) << (log2_bpp + 7);
uint32_t micro = (x & 7) << log2_bpp;
uint32_t offset =
base_offset + (macro + ((micro & ~0xF) << 1) + (micro & 0xF));
return ((offset & ~0x1FF) << 3) + ((offset & 0x1C0) << 2) + (offset & 0x3F) +
((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6);
}
void Untile(uint8_t* output_buffer, const uint8_t* input_buffer,
const UntileInfo* untile_info) {
assert_not_null(untile_info);
assert_not_null(untile_info->input_format_info);
assert_not_null(untile_info->output_format_info);
assert_true(untile_info->width <= untile_info->input_pitch);
assert_true(untile_info->width <= untile_info->output_pitch);
uint32_t input_bytes_per_block =
untile_info->input_format_info->bytes_per_block();
uint32_t output_bytes_per_block =
untile_info->output_format_info->bytes_per_block();
uint32_t output_pitch = untile_info->output_pitch * output_bytes_per_block;
// Bytes per pixel
auto log2_bpp = (input_bytes_per_block / 4) +
((input_bytes_per_block / 2) >> (input_bytes_per_block / 4));
// Offset to the current row, in bytes.
uint32_t output_row_offset = 0;
for (uint32_t y = 0; y < untile_info->height; y++) {
auto input_row_offset = TiledOffset2DOuter(
untile_info->offset_y + y, untile_info->input_pitch, log2_bpp);
// Go block-by-block on this row.
uint32_t output_offset = output_row_offset;
for (uint32_t x = 0; x < untile_info->width; x++) {
auto input_offset = TiledOffset2DInner(untile_info->offset_x + x,
untile_info->offset_y + y,
log2_bpp, input_row_offset);
input_offset >>= log2_bpp;
untile_info->copy_callback(
&output_buffer[output_offset],
&input_buffer[input_offset * input_bytes_per_block],
output_bytes_per_block);
output_offset += output_bytes_per_block;
}
output_row_offset += output_pitch;
}
}
} // namespace texture_conversion
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,56 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_TEXTURE_CONVERSION_H_
#define XENIA_GPU_TEXTURE_CONVERSION_H_
#include <cstring>
#include <functional>
#include <memory>
#include "xenia/base/assert.h"
#include "xenia/gpu/texture_info.h"
#include "xenia/gpu/xenos.h"
namespace xe {
namespace gpu {
namespace texture_conversion {
typedef std::function<void(Endian, void*, const void*, size_t)>
CopyBlockCallback;
void CopySwapBlock(Endian endian, void* output, const void* input,
size_t length);
void ConvertTexelCTX1ToR8G8(Endian endian, void* output, const void* input,
size_t length);
void ConvertTexelDXT3AToDXT3(Endian endian, void* output, const void* input,
size_t length);
typedef std::function<void(void*, const void*, size_t)> UntileCopyBlockCallback;
typedef struct UntileInfo {
uint32_t offset_x;
uint32_t offset_y;
uint32_t width;
uint32_t height;
uint32_t input_pitch;
uint32_t output_pitch;
const FormatInfo* input_format_info;
const FormatInfo* output_format_info;
UntileCopyBlockCallback copy_callback;
} UntileInfo;
void Untile(uint8_t* output_buffer, const uint8_t* input_buffer,
const UntileInfo* untile_info);
} // namespace texture_conversion
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_TEXTURE_CONVERSION_H_

View File

@ -26,21 +26,22 @@ using namespace xe::gpu::xenos;
bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
TextureInfo* out_info) {
std::memset(out_info, 0, sizeof(TextureInfo));
// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308051(v=vs.85).aspx
// a2xx_sq_surfaceformat
std::memset(out_info, 0, sizeof(TextureInfo));
auto& info = *out_info;
info.guest_address = fetch.address << 12;
info.format = static_cast<TextureFormat>(fetch.format);
info.endianness = static_cast<Endian>(fetch.endianness);
info.dimension = static_cast<Dimension>(fetch.dimension);
info.pitch = fetch.pitch << 5;
info.width = info.height = info.depth = 0;
switch (info.dimension) {
case Dimension::k1D:
info.dimension = Dimension::k2D;
info.dimension = Dimension::k2D; // we treat 1D textures as 2D
info.width = fetch.size_1d.width;
info.height = 1;
break;
case Dimension::k2D:
info.width = fetch.size_2d.width;
@ -56,228 +57,62 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
info.height = fetch.size_stack.height;
info.depth = fetch.size_stack.depth;
break;
default:
assert_unhandled_case(info.dimension);
break;
}
info.texture_format = static_cast<TextureFormat>(fetch.format);
info.endianness = static_cast<Endian>(fetch.endianness);
info.pitch = fetch.pitch << 5;
info.mip_levels = fetch.packed_mips ? fetch.mip_max_level + 1 : 1;
info.is_tiled = fetch.tiled;
info.has_packed_mips = fetch.packed_mips;
info.guest_address = fetch.address << 12;
info.mip_address = fetch.mip_address << 12;
info.mip_levels = fetch.packed_mips ? fetch.mip_max_level + 1 : 1;
info.input_length = 0; // Populated below.
if (info.format_info()->format == TextureFormat::kUnknown) {
XELOGE("Attempting to fetch from unsupported texture format %d",
info.texture_format);
info.format);
return false;
}
// Must be called here when we know the format.
switch (info.dimension) {
case Dimension::k1D: {
info.CalculateTextureSizes1D(fetch.size_1d.width + 1);
} break;
case Dimension::k2D: {
info.CalculateTextureSizes2D(fetch.size_2d.width + 1,
fetch.size_2d.height + 1);
} break;
case Dimension::k3D: {
info.CalculateTextureSizes3D(fetch.size_3d.width + 1,
fetch.size_3d.height + 1,
fetch.size_3d.depth + 1);
break;
}
case Dimension::kCube: {
info.CalculateTextureSizesCube(fetch.size_stack.width + 1,
fetch.size_stack.height + 1,
fetch.size_stack.depth + 1);
} break;
}
info.memory_usage = TextureMemoryUsage::Calculate(out_info, true);
return true;
}
bool TextureInfo::PrepareResolve(uint32_t physical_address,
TextureFormat texture_format, Endian endian,
TextureFormat format, Endian endian,
uint32_t pitch, uint32_t width,
uint32_t height, TextureInfo* out_info) {
std::memset(out_info, 0, sizeof(TextureInfo));
auto& info = *out_info;
info.guest_address = physical_address;
info.dimension = Dimension::k2D;
assert_true(width > 0);
assert_true(height > 0);
info.pitch = pitch;
std::memset(out_info, 0, sizeof(TextureInfo));
auto& info = *out_info;
info.format = format;
info.dimension = Dimension::k2D;
info.width = width - 1;
info.height = height - 1;
info.texture_format = texture_format;
info.mip_levels = 1;
info.depth = 0;
info.pitch = pitch;
info.endianness = endian;
info.is_tiled = true;
info.guest_address = physical_address;
info.mip_address = 0;
info.mip_levels = 1;
info.input_length = 0;
if (info.format_info()->format == TextureFormat::kUnknown) {
assert_true("Unsupported texture format");
return false;
}
info.CalculateTextureSizes2D(width, height);
info.memory_usage = TextureMemoryUsage::Calculate(out_info, true);
return true;
}
void TextureInfo::CalculateTextureSizes1D(uint32_t width) {
size.logical_width = width;
auto format = format_info();
// width in blocks.
uint32_t block_width =
xe::round_up(pitch, format->block_width) / format->block_width;
// Texture dimensions must be a multiple of tile
// dimensions (32x32 blocks).
size.block_width = xe::round_up(block_width, 32);
uint32_t bytes_per_block = format->block_width * format->bits_per_pixel / 8;
uint32_t byte_pitch = size.block_width * bytes_per_block;
uint32_t texel_width;
if (!is_tiled) {
// Each row must be a multiple of 256 in linear textures.
byte_pitch = xe::round_up(byte_pitch, 256);
texel_width = (byte_pitch / bytes_per_block) * format->block_width;
} else {
texel_width = size.block_width * format->block_width;
}
size.input_width = texel_width;
// Set some reasonable defaults for unused fields.
size.logical_height = 1;
size.block_height = format->block_height;
size.input_height = 1;
size.input_face_length = pitch * bytes_per_block;
input_length = size.input_face_length;
}
void TextureInfo::CalculateTextureSizes2D(uint32_t width, uint32_t height) {
size.logical_width = width;
size.logical_height = height;
auto format = format_info();
// w/h in blocks.
uint32_t block_width =
xe::round_up(pitch, format->block_width) / format->block_width;
uint32_t block_height =
xe::round_up(size.logical_height, format->block_height) /
format->block_height;
// Texture dimensions must be a multiple of tile
// dimensions (32x32 blocks).
size.block_width = xe::round_up(block_width, 32);
size.block_height = xe::round_up(block_height, 32);
uint32_t bytes_per_block =
format->block_width * format->block_height * format->bits_per_pixel / 8;
uint32_t byte_pitch = size.block_width * bytes_per_block;
uint32_t texel_width;
if (!is_tiled) {
// Each row must be a multiple of 256 in linear textures.
byte_pitch = xe::round_up(byte_pitch, 256);
texel_width = (byte_pitch / bytes_per_block) * format->block_width;
} else {
texel_width = size.block_width * format->block_width;
}
size.input_width = texel_width;
size.input_height = size.block_height * format->block_height;
size.input_face_length = pitch * bytes_per_block * size.block_height;
input_length = size.input_face_length;
}
void TextureInfo::CalculateTextureSizes3D(uint32_t width, uint32_t height,
uint32_t depth) {
size.logical_width = width;
size.logical_height = height;
auto format = format_info();
// w/h in blocks must be a multiple of block size.
uint32_t block_width =
xe::round_up(pitch, format->block_width) / format->block_width;
uint32_t block_height =
xe::round_up(size.logical_height, format->block_height) /
format->block_height;
// Texture dimensions must be a multiple of tile
// dimensions (32x32 blocks).
size.block_width = xe::round_up(block_width, 32);
size.block_height = xe::round_up(block_height, 32);
uint32_t bytes_per_block =
format->block_width * format->block_height * format->bits_per_pixel / 8;
uint32_t byte_pitch = size.block_width * bytes_per_block;
uint32_t texel_width;
if (!is_tiled) {
// Each row must be a multiple of 256 in linear textures.
byte_pitch = xe::round_up(byte_pitch, 256);
texel_width = (byte_pitch / bytes_per_block) * format->block_width;
} else {
texel_width = size.block_width * format->block_width;
}
size.input_width = texel_width;
size.input_height = size.block_height * format->block_height;
size.input_face_length = pitch * bytes_per_block * size.block_height;
input_length = size.input_face_length * depth;
}
void TextureInfo::CalculateTextureSizesCube(uint32_t width, uint32_t height,
uint32_t depth) {
assert_true(depth == 6);
size.logical_width = width;
size.logical_height = height;
auto format = format_info();
// w/h in blocks must be a multiple of block size.
uint32_t block_width =
xe::round_up(pitch, format->block_width) / format->block_width;
uint32_t block_height =
xe::round_up(size.logical_height, format->block_height) /
format->block_height;
// Texture dimensions must be a multiple of tile
// dimensions (32x32 blocks).
size.block_width = xe::round_up(block_width, 32);
size.block_height = xe::round_up(block_height, 32);
uint32_t bytes_per_block =
format->block_width * format->block_height * format->bits_per_pixel / 8;
uint32_t byte_pitch = size.block_width * bytes_per_block;
uint32_t texel_width;
if (!is_tiled) {
// Each row must be a multiple of 256 in linear textures.
byte_pitch = xe::round_up(byte_pitch, 256);
texel_width = (byte_pitch / bytes_per_block) * format->block_width;
} else {
texel_width = size.block_width * format->block_width;
}
size.input_width = texel_width;
size.input_height = size.block_height * format->block_height;
size.input_face_length = pitch * bytes_per_block * size.block_height;
input_length = size.input_face_length * depth;
}
static void TextureSwap(Endian endianness, void* dest, const void* src,
size_t length) {
switch (endianness) {
@ -330,167 +165,104 @@ static void ConvertTexelCTX1(uint8_t* dest, size_t dest_pitch,
}
}
void TextureInfo::ConvertTiled(uint8_t* dest, const uint8_t* src, Endian endian,
const FormatInfo* format_info, uint32_t offset_x,
uint32_t offset_y, uint32_t block_pitch,
uint32_t width, uint32_t height,
uint32_t output_width) {
// TODO(benvanik): optimize this inner loop (or work by tiles).
uint32_t bytes_per_block = format_info->block_width *
format_info->block_height *
format_info->bits_per_pixel / 8;
uint32_t output_pitch =
output_width * format_info->block_width * format_info->bits_per_pixel / 8;
uint32_t output_row_height = 1;
if (format_info->format == TextureFormat::k_CTX1) {
// TODO: Can we calculate this?
output_row_height = 4;
}
// logical w/h in blocks.
uint32_t block_width =
xe::round_up(width, format_info->block_width) / format_info->block_width;
uint32_t block_height = xe::round_up(height, format_info->block_height) /
format_info->block_height;
// Bytes per pixel
auto log2_bpp =
(bytes_per_block / 4) + ((bytes_per_block / 2) >> (bytes_per_block / 4));
// Offset to the current row, in bytes.
uint32_t output_row_offset = 0;
for (uint32_t y = 0; y < block_height; y++) {
auto input_row_offset =
TextureInfo::TiledOffset2DOuter(offset_y + y, block_pitch, log2_bpp);
// Go block-by-block on this row.
uint32_t output_offset = output_row_offset;
for (uint32_t x = 0; x < block_width; x++) {
auto input_offset = TextureInfo::TiledOffset2DInner(
offset_x + x, offset_y + y, log2_bpp, input_row_offset);
input_offset >>= log2_bpp;
if (format_info->format == TextureFormat::k_CTX1) {
// Convert to R8G8.
ConvertTexelCTX1(&dest[output_offset], output_pitch, src, endian);
} else {
// Generic swap to destination.
TextureSwap(endian, dest + output_offset,
src + input_offset * bytes_per_block, bytes_per_block);
}
output_offset += bytes_per_block;
}
output_row_offset += output_pitch * output_row_height;
}
uint32_t TextureInfo::GetMaxMipLevels() const {
return 1 + xe::log2_floor(std::max({width + 1, height + 1, depth + 1}));
}
uint32_t TextureInfo::GetMaxMipLevels(uint32_t width, uint32_t height,
uint32_t depth) {
return 1 + xe::log2_floor(std::max({width, height, depth}));
const TextureMemoryUsage TextureInfo::GetMipMemoryUsage(uint32_t mip,
bool is_guest) const {
if (mip == 0) {
return memory_usage;
}
uint32_t mip_width = xe::next_pow2(width + 1) >> mip;
uint32_t mip_height = xe::next_pow2(height + 1) >> mip;
return TextureMemoryUsage::Calculate(format_info(), mip_width, mip_height,
depth + 1, is_tiled, is_guest);
}
uint32_t TextureInfo::GetMipLocation(const TextureInfo& src, uint32_t mip,
uint32_t* offset_x, uint32_t* offset_y) {
void TextureInfo::GetMipSize(uint32_t mip, uint32_t* out_width,
uint32_t* out_height) const {
assert_not_null(out_width);
assert_not_null(out_height);
if (mip == 0) {
*out_width = width + 1;
*out_height = height + 1;
return;
}
uint32_t width_pow2 = xe::next_pow2(width + 1);
uint32_t height_pow2 = xe::next_pow2(height + 1);
*out_width = std::max(width_pow2 >> mip, 1u);
*out_height = std::max(height_pow2 >> mip, 1u);
}
uint32_t TextureInfo::GetMipLocation(uint32_t mip, uint32_t* offset_x,
uint32_t* offset_y, bool is_guest) const {
if (mip == 0) {
// Short-circuit. Mip 0 is always stored in guest_address.
if (!src.has_packed_mips) {
if (!has_packed_mips) {
*offset_x = 0;
*offset_y = 0;
} else {
GetPackedTileOffset(src, 0, offset_x, offset_y);
GetPackedTileOffset(0, offset_x, offset_y);
}
return src.guest_address;
return guest_address;
}
// If the texture is <= 16 pixels w/h, the mips are packed with the base
// texture. Otherwise, they're stored beginning from mip_address.
uint32_t address_base = std::min(src.width, src.height) < 16
? src.guest_address
: src.mip_address;
uint32_t address_base =
std::min(width, height) < 16 ? guest_address : mip_address;
uint32_t address_offset = 0;
if (!src.has_packed_mips) {
if (!has_packed_mips) {
for (uint32_t i = 1; i < mip; i++) {
address_offset += GetMipByteSize(src, i);
address_offset += GetMipByteSize(i, is_guest);
}
*offset_x = 0;
*offset_y = 0;
return address_base + address_offset;
}
uint32_t width_pow2 = xe::next_pow2(width + 1);
uint32_t height_pow2 = xe::next_pow2(height + 1);
// Walk forward to find the address of the mip.
uint32_t packed_mip_base = 1;
for (uint32_t i = packed_mip_base; i < mip; i++, packed_mip_base++) {
uint32_t logical_width = std::max(xe::next_pow2(src.width + 1) >> i, 1u);
uint32_t logical_height = std::max(xe::next_pow2(src.height + 1) >> i, 1u);
if (std::min(logical_width, logical_height) <= 16) {
uint32_t mip_width = std::max(width_pow2 >> i, 1u);
uint32_t mip_height = std::max(height_pow2 >> i, 1u);
if (std::min(mip_width, mip_height) <= 16) {
// We've reached the point where the mips are packed into a single tile.
break;
}
address_offset += GetMipByteSize(src, i);
address_offset += GetMipByteSize(i, is_guest);
}
// Now, check if the mip is packed at an offset.
GetPackedTileOffset(xe::next_pow2(src.width + 1) >> mip,
xe::next_pow2(src.height + 1) >> mip, src.format_info(),
GetPackedTileOffset(width_pow2 >> mip, height_pow2 >> mip, format_info(),
mip - packed_mip_base, offset_x, offset_y);
return address_base + address_offset;
}
uint32_t TextureInfo::GetMipByteSize(const TextureInfo& src, uint32_t mip) {
if (mip == 0) {
return src.input_length;
}
uint32_t bytes_per_block = src.format_info()->block_width *
src.format_info()->block_height *
src.format_info()->bits_per_pixel / 8;
uint32_t logical_width = xe::next_pow2(src.width + 1) >> mip;
uint32_t logical_height = xe::next_pow2(src.height + 1) >> mip;
// w/h in blocks
uint32_t block_width =
xe::round_up(logical_width, src.format_info()->block_width) /
src.format_info()->block_width;
uint32_t block_height =
xe::round_up(logical_height, src.format_info()->block_height) /
src.format_info()->block_height;
// Texture dimensions must be a multiple of tile
// dimensions (32x32 blocks).
block_width = xe::round_up(block_width, 32);
block_height = xe::round_up(block_height, 32);
uint32_t byte_pitch = block_width * bytes_per_block;
if (!src.is_tiled) {
// Each row must be a multiple of 256 in linear textures.
byte_pitch = xe::round_up(byte_pitch, 256);
}
return byte_pitch * block_height * (src.depth + 1);
uint32_t TextureInfo::GetMipByteSize(uint32_t mip, bool is_guest) const {
uint32_t bytes_per_block = format_info()->bytes_per_block();
auto mip_usage = GetMipMemoryUsage(mip, is_guest);
return mip_usage.blocks() * bytes_per_block;
}
uint32_t TextureInfo::GetMipLinearSize(const TextureInfo& src, uint32_t mip) {
uint32_t bytes_per_block = src.format_info()->block_width *
src.format_info()->block_height *
src.format_info()->bits_per_pixel / 8;
uint32_t size = src.input_length >> (mip * 2);
// The size is a multiple of the block size.
return xe::round_up(size, bytes_per_block) * (src.depth + 1);
uint32_t TextureInfo::GetByteSize(bool is_guest) const {
uint32_t length = 0;
for (uint32_t mip = 0; mip < mip_levels; ++mip) {
length += GetMipByteSize(mip, is_guest);
}
return length;
}
bool TextureInfo::GetPackedTileOffset(uint32_t width, uint32_t height,
const FormatInfo* format_info,
int packed_tile, uint32_t* out_offset_x,
uint32_t* out_offset_y) {
int packed_tile, uint32_t* offset_x,
uint32_t* offset_y) {
// Tile size is 32x32, and once textures go <=16 they are packed into a
// single tile together. The math here is insane. Most sourced
// from graph paper and looking at dds dumps.
@ -530,8 +302,8 @@ bool TextureInfo::GetPackedTileOffset(uint32_t width, uint32_t height,
uint32_t log2_height = xe::log2_ceil(height);
if (std::min(log2_width, log2_height) > 4) {
// Too big, not packed.
*out_offset_x = 0;
*out_offset_y = 0;
*offset_x = 0;
*offset_y = 0;
return false;
}
@ -539,62 +311,40 @@ bool TextureInfo::GetPackedTileOffset(uint32_t width, uint32_t height,
if (packed_tile < 3) {
if (log2_width > log2_height) {
// Wider than tall. Laid out vertically.
*out_offset_x = 0;
*out_offset_y = 16 >> packed_tile;
*offset_x = 0;
*offset_y = 16 >> packed_tile;
} else {
// Taller than wide. Laid out horizontally.
*out_offset_x = 16 >> packed_tile;
*out_offset_y = 0;
*offset_x = 16 >> packed_tile;
*offset_y = 0;
}
} else {
if (log2_width > log2_height) {
// Wider than tall. Laid out vertically.
*out_offset_x = 16 >> (packed_tile - 2);
*out_offset_y = 0;
*offset_x = 16 >> (packed_tile - 2);
*offset_y = 0;
} else {
// Taller than wide. Laid out horizontally.
*out_offset_x = 0;
*out_offset_y = 16 >> (packed_tile - 2);
*offset_x = 0;
*offset_y = 16 >> (packed_tile - 2);
}
}
*out_offset_x /= format_info->block_width;
*out_offset_y /= format_info->block_height;
*offset_x /= format_info->block_width;
*offset_y /= format_info->block_height;
return true;
}
bool TextureInfo::GetPackedTileOffset(const TextureInfo& texture_info,
int packed_tile, uint32_t* out_offset_x,
uint32_t* out_offset_y) {
if (!texture_info.has_packed_mips) {
*out_offset_x = 0;
*out_offset_y = 0;
bool TextureInfo::GetPackedTileOffset(int packed_tile, uint32_t* offset_x,
uint32_t* offset_y) const {
if (!has_packed_mips) {
*offset_x = 0;
*offset_y = 0;
return false;
}
return GetPackedTileOffset(xe::next_pow2(texture_info.size.logical_width),
xe::next_pow2(texture_info.size.logical_height),
texture_info.format_info(), packed_tile,
out_offset_x, out_offset_y);
}
// https://github.com/BinomialLLC/crunch/blob/ea9b8d8c00c8329791256adafa8cf11e4e7942a2/inc/crn_decomp.h#L4108
uint32_t TextureInfo::TiledOffset2DOuter(uint32_t y, uint32_t width,
uint32_t log2_bpp) {
uint32_t macro = ((y / 32) * (width / 32)) << (log2_bpp + 7);
uint32_t micro = ((y & 6) << 2) << log2_bpp;
return macro + ((micro & ~0xF) << 1) + (micro & 0xF) +
((y & 8) << (3 + log2_bpp)) + ((y & 1) << 4);
}
uint32_t TextureInfo::TiledOffset2DInner(uint32_t x, uint32_t y,
uint32_t log2_bpp,
uint32_t base_offset) {
uint32_t macro = (x / 32) << (log2_bpp + 7);
uint32_t micro = (x & 7) << log2_bpp;
uint32_t offset =
base_offset + (macro + ((micro & ~0xF) << 1) + (micro & 0xF));
return ((offset & ~0x1FF) << 3) + ((offset & 0x1C0) << 2) + (offset & 0x3F) +
((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6);
return GetPackedTileOffset(xe::next_pow2(width + 1),
xe::next_pow2(height + 1), format_info(),
packed_tile, offset_x, offset_y);
}
uint64_t TextureInfo::hash() const {

View File

@ -279,36 +279,56 @@ struct FormatInfo {
uint32_t block_height;
uint32_t bits_per_pixel;
uint32_t bytes_per_block() const {
return block_width * block_height * bits_per_pixel / 8;
}
static const FormatInfo* Get(uint32_t gpu_format);
static const FormatInfo* Get(TextureFormat format) {
return Get(static_cast<uint32_t>(format));
}
};
struct TextureInfo;
struct TextureMemoryUsage {
uint32_t pitch; // texel pitch
uint32_t height; // texel height
uint32_t block_pitch; // # of horizontal pitch blocks
uint32_t block_height; // # of vertical blocks
uint32_t depth;
uint32_t blocks() const { return block_pitch * block_height * depth; }
static TextureMemoryUsage Calculate(const FormatInfo* format_info,
uint32_t pitch, uint32_t height,
uint32_t depth, bool is_tiled,
bool is_guest);
static TextureMemoryUsage Calculate(const TextureInfo* texture_info,
bool is_guest);
};
struct TextureInfo {
uint32_t guest_address;
TextureFormat texture_format;
TextureFormat format;
Endian endianness;
Dimension dimension;
uint32_t pitch; // pitch in blocks
uint32_t width; // width in pixels
uint32_t height; // height in pixels
uint32_t depth; // depth in layers
Endian endianness;
uint32_t pitch; // pitch in blocks
uint32_t mip_levels;
bool is_tiled;
bool has_packed_mips;
uint32_t mip_address;
uint32_t mip_levels;
uint32_t input_length;
struct Size {
uint32_t logical_width;
uint32_t logical_height;
uint32_t block_width; // # of horizontal blocks
uint32_t block_height; // # of vertical blocks
uint32_t input_width; // (full) texel pitch
uint32_t input_height; // (full) texel height
uint32_t input_face_length; // byte length of face
} size;
TextureMemoryUsage memory_usage;
uint32_t guest_address;
uint32_t mip_address;
const FormatInfo* format_info() const {
return FormatInfo::Get(static_cast<uint32_t>(texture_format));
return FormatInfo::Get(static_cast<uint32_t>(format));
}
bool is_compressed() const {
@ -323,47 +343,32 @@ struct TextureInfo {
uint32_t pitch, uint32_t width, uint32_t height,
TextureInfo* out_info);
static void ConvertTiled(uint8_t* dest, const uint8_t* src, Endian endian,
const FormatInfo* format_info, uint32_t offset_x,
uint32_t offset_y, uint32_t block_pitch,
uint32_t width, uint32_t height,
uint32_t output_width);
uint32_t GetMaxMipLevels() const;
static uint32_t GetMaxMipLevels(uint32_t width, uint32_t height,
uint32_t depth);
const TextureMemoryUsage GetMipMemoryUsage(uint32_t mip, bool is_guest) const;
void GetMipSize(uint32_t mip, uint32_t* width, uint32_t* height) const;
// Get the memory location of a mip. offset_x and offset_y are in blocks.
static uint32_t GetMipLocation(const TextureInfo& src, uint32_t mip,
uint32_t* offset_x, uint32_t* offset_y);
static uint32_t GetMipByteSize(const TextureInfo& src, uint32_t mip);
static uint32_t GetMipSizes(const TextureInfo& src, uint32_t mip);
uint32_t GetMipLocation(uint32_t mip, uint32_t* offset_x, uint32_t* offset_y,
bool is_guest) const;
// Get the byte size of a MIP when stored linearly.
static uint32_t GetMipLinearSize(const TextureInfo& src, uint32_t mip);
uint32_t GetMipByteSize(uint32_t mip, bool is_guest) const;
uint32_t GetByteSize(bool is_guest) const;
static bool GetPackedTileOffset(uint32_t width, uint32_t height,
const FormatInfo* format_info,
int packed_tile, uint32_t* out_offset_x,
uint32_t* out_offset_y);
static bool GetPackedTileOffset(const TextureInfo& texture_info,
int packed_tile, uint32_t* out_offset_x,
uint32_t* out_offset_y);
static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width,
uint32_t log2_bpp);
static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t log2_bpp,
uint32_t base_offset);
int packed_tile, uint32_t* offset_x,
uint32_t* offset_y);
bool GetPackedTileOffset(int packed_tile, uint32_t* offset_x,
uint32_t* offset_y) const;
uint64_t hash() const;
bool operator==(const TextureInfo& other) const {
return std::memcmp(this, &other, sizeof(TextureInfo)) == 0;
}
private:
void CalculateTextureSizes1D(uint32_t width);
void CalculateTextureSizes2D(uint32_t width, uint32_t height);
void CalculateTextureSizes3D(uint32_t width, uint32_t height, uint32_t depth);
void CalculateTextureSizesCube(uint32_t width, uint32_t height,
uint32_t depth);
};
} // namespace gpu

View File

@ -0,0 +1,80 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/texture_info.h"
#include <algorithm>
#include <cmath>
#include "xenia/base/math.h"
namespace xe {
namespace gpu {
using namespace xe::gpu::xenos;
static TextureMemoryUsage CalculateMemoryUsage(const FormatInfo* format_info,
uint32_t pitch, uint32_t height,
uint32_t depth, bool is_tiled,
bool is_guest) {
TextureMemoryUsage usage;
usage.pitch = pitch;
usage.height = height;
usage.block_pitch = xe::round_up(usage.pitch, format_info->block_width) /
format_info->block_width;
usage.block_height = xe::round_up(usage.height, format_info->block_height) /
format_info->block_height;
usage.depth = depth;
if (is_guest) {
// Texture dimensions must be a multiple of tile
// dimensions (32x32 blocks).
usage.block_pitch = xe::round_up(usage.block_pitch, 32);
usage.block_height = xe::round_up(usage.block_height, 32);
usage.pitch = usage.block_pitch * format_info->block_width;
usage.height = usage.block_height * format_info->block_height;
uint32_t bytes_per_block = format_info->bytes_per_block();
uint32_t byte_pitch = usage.block_pitch * bytes_per_block;
if (!is_tiled) {
// Each row must be a multiple of 256 bytes in linear textures.
byte_pitch = xe::round_up(byte_pitch, 256);
usage.block_pitch = byte_pitch / bytes_per_block;
usage.pitch = usage.block_pitch * format_info->block_width;
}
// Is depth special?
usage.depth = usage.depth;
}
return usage;
}
TextureMemoryUsage TextureMemoryUsage::Calculate(const FormatInfo* format_info,
uint32_t pitch,
uint32_t height,
uint32_t depth, bool is_tiled,
bool is_guest) {
return CalculateMemoryUsage(format_info, pitch, height, depth, is_tiled,
is_guest);
}
TextureMemoryUsage TextureMemoryUsage::Calculate(const TextureInfo* info,
bool is_guest) {
assert_not_null(info);
return CalculateMemoryUsage(info->format_info(), info->pitch,
info->height + 1, info->depth + 1, info->is_tiled,
is_guest);
}
} // namespace gpu
} // namespace xe

View File

@ -15,6 +15,7 @@
#include "xenia/base/profiling.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/sampler_info.h"
#include "xenia/gpu/texture_conversion.h"
#include "xenia/gpu/texture_info.h"
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
#include "xenia/ui/vulkan/vulkan_mem_alloc.h"
@ -38,6 +39,7 @@ struct TextureConfig {
#define SWIZ(r, g, b, a) r, g, b, a
#define ___R SWIZ(-7, -7, -7, 0)
#define ___A SWIZ(-7, -7, -7, 3)
#define RRRR SWIZ(0, 0, 0, 0)
#define RRRA SWIZ(0, 0, 0, 3)
#define RGBA SWIZ(0, 1, 2, 3)
@ -117,8 +119,8 @@ static const TextureConfig texture_configs[64] = {
/* k_10_11_11_AS_16_16_16_16 */ {VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // ?
/* k_11_11_10_AS_16_16_16_16 */ {VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // ?
/* k_32_32_32_FLOAT */ {VK_FORMAT_R32G32B32_SFLOAT},
/* k_DXT3A */ {VK_FORMAT_UNDEFINED},
/* k_DXT5A */ {VK_FORMAT_BC4_UNORM_BLOCK, RRRR}, // ATI1N
/* k_DXT3A */ {VK_FORMAT_BC2_UNORM_BLOCK, ___A},
/* k_DXT5A */ {VK_FORMAT_BC4_UNORM_BLOCK, ___R}, // ATI1N
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
/* k_CTX1 */ {VK_FORMAT_R8G8_UINT},
@ -137,6 +139,7 @@ static const TextureConfig texture_configs[64] = {
#undef RGBA
#undef RRRA
#undef RRRR
#undef ___A
#undef ___R
#undef SWIZ
@ -438,15 +441,15 @@ TextureCache::Texture* TextureCache::DemandResolveTexture(
// Tell the trace writer to "cache" this memory (but not read it)
trace_writer_->WriteMemoryReadCachedNop(texture_info.guest_address,
texture_info.input_length);
texture_info.GetByteSize(true));
return it->second;
}
}
VkFormatFeatureFlags required_flags = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
if (texture_info.texture_format == TextureFormat::k_24_8 ||
texture_info.texture_format == TextureFormat::k_24_8_FLOAT) {
if (texture_info.format == TextureFormat::k_24_8 ||
texture_info.format == TextureFormat::k_24_8_FLOAT) {
required_flags |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
} else {
required_flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT;
@ -466,11 +469,11 @@ TextureCache::Texture* TextureCache::DemandResolveTexture(
VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT,
xe::format_string(
"RT: 0x%.8X - 0x%.8X", texture_info.guest_address,
texture_info.guest_address + texture_info.input_length));
texture_info.guest_address + texture_info.GetByteSize(true)));
// Setup an access watch. If this texture is touched, it is destroyed.
texture->access_watch_handle = memory_->AddPhysicalAccessWatch(
texture_info.guest_address, texture_info.input_length,
texture_info.guest_address, texture_info.GetByteSize(true),
cpu::MMIOHandler::kWatchWrite, &WatchCallback, this, texture);
textures_[texture_hash] = texture;
@ -492,7 +495,7 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info,
}
trace_writer_->WriteMemoryReadCached(texture_info.guest_address,
texture_info.input_length);
texture_info.GetByteSize(true));
return it->second;
}
@ -525,12 +528,12 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info,
}
trace_writer_->WriteMemoryRead(texture_info.guest_address,
texture_info.input_length);
texture_info.GetByteSize(true));
// Okay. Put a writewatch on it to tell us if it's been modified from the
// guest.
texture->access_watch_handle = memory_->AddPhysicalAccessWatch(
texture_info.guest_address, texture_info.input_length,
texture_info.guest_address, texture_info.GetByteSize(true),
cpu::MMIOHandler::kWatchWrite, &WatchCallback, this, texture);
if (!UploadTexture(command_buffer, completion_fence, texture, texture_info)) {
@ -542,9 +545,9 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info,
device_->DbgSetObjectName(
reinterpret_cast<uint64_t>(texture->image),
VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT,
xe::format_string("T: 0x%.8X - 0x%.8X (%s, %s)",
texture_info.guest_address,
texture_info.guest_address + texture_info.input_length,
xe::format_string(
"T: 0x%.8X - 0x%.8X (%s, %s)", texture_info.guest_address,
texture_info.guest_address + texture_info.GetByteSize(true),
texture_info.format_info()->name,
get_dimension_name(texture_info.dimension)));
@ -561,8 +564,7 @@ TextureCache::TextureView* TextureCache::DemandView(Texture* texture,
}
}
auto& config =
texture_configs[uint32_t(texture->texture_info.texture_format)];
auto& config = texture_configs[uint32_t(texture->texture_info.format)];
VkImageViewCreateInfo view_info;
view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
@ -821,10 +823,10 @@ TextureCache::Texture* TextureCache::Lookup(const TextureInfo& texture_info) {
COMPARE_FIELD(depth);
COMPARE_FIELD(endianness);
COMPARE_FIELD(is_tiled);
COMPARE_FIELD(input_length);
COMPARE_FIELD(GetByteSize(true));
#undef COMPARE_FIELD
if (!TextureFormatIsSimilar(texture_info.texture_format,
other_texture_info.texture_format)) {
if (!TextureFormatIsSimilar(texture_info.format,
other_texture_info.format)) {
continue;
}
/*const auto format_info = texture_info.format_info();
@ -850,9 +852,9 @@ TextureCache::Texture* TextureCache::LookupAddress(uint32_t guest_address,
const auto& texture_info = it->second->texture_info;
if (guest_address >= texture_info.guest_address &&
guest_address <
texture_info.guest_address + texture_info.input_length &&
texture_info.size.input_width >= width &&
texture_info.size.input_height >= height && out_offset) {
texture_info.guest_address + texture_info.GetByteSize(true) &&
texture_info.pitch >= width && texture_info.height >= height &&
out_offset) {
auto offset_bytes = guest_address - texture_info.guest_address;
if (texture_info.dimension == Dimension::k2D) {
@ -868,8 +870,7 @@ TextureCache::Texture* TextureCache::LookupAddress(uint32_t guest_address,
if (texture_info.guest_address == guest_address &&
texture_info.dimension == Dimension::k2D &&
texture_info.size.input_width == width &&
texture_info.size.input_height == height) {
texture_info.pitch == width && texture_info.height == height) {
if (out_offset) {
out_offset->x = 0;
out_offset->y = 0;
@ -882,25 +883,6 @@ TextureCache::Texture* TextureCache::LookupAddress(uint32_t guest_address,
return nullptr;
}
void TextureSwap(Endian endianness, void* dest, const void* src,
size_t length) {
switch (endianness) {
case Endian::k8in16:
xe::copy_and_swap_16_unaligned(dest, src, length / 2);
break;
case Endian::k8in32:
xe::copy_and_swap_32_unaligned(dest, src, length / 4);
break;
case Endian::k16in32: // Swap high and low 16 bits within a 32 bit word
xe::copy_and_swap_16_in_32_unaligned(dest, src, length);
break;
default:
case Endian::kUnspecified:
std::memcpy(dest, src, length);
break;
}
}
void TextureCache::FlushPendingCommands(VkCommandBuffer command_buffer,
VkFence completion_fence) {
auto status = vkEndCommandBuffer(command_buffer);
@ -942,60 +924,55 @@ bool TextureCache::ConvertTexture2D(uint8_t* dest,
uint32_t mip, const TextureInfo& src) {
uint32_t offset_x = 0;
uint32_t offset_y = 0;
uint32_t address =
TextureInfo::GetMipLocation(src, mip, &offset_x, &offset_y);
uint32_t address = src.GetMipLocation(mip, &offset_x, &offset_y, true);
void* host_address = memory_->TranslatePhysical(address);
// Pitch of the source texture in blocks.
uint32_t block_width;
if (mip == 0) {
block_width = src.size.block_width;
} else {
block_width = xe::next_pow2(src.size.block_width) >> mip;
block_width = xe::round_up(block_width, 32);
}
auto src_usage = src.GetMipMemoryUsage(mip, true);
auto dst_usage = GetMipMemoryUsage(src, mip);
uint32_t logical_width = src.size.logical_width >> mip;
uint32_t logical_height = src.size.logical_height >> mip;
uint32_t input_width = src.size.input_width >> mip;
uint32_t input_height = src.size.input_height >> mip;
uint32_t mip_width, mip_height;
src.GetMipSize(mip, &mip_width, &mip_height);
// All dimensions must be a multiple of block w/h
logical_width = xe::round_up(logical_width, src.format_info()->block_width);
logical_height =
xe::round_up(logical_height, src.format_info()->block_height);
input_width = xe::round_up(input_width, src.format_info()->block_width);
input_height = xe::round_up(input_height, src.format_info()->block_height);
auto copy_block = GetFormatCopyBlock(src.format);
if (!src.is_tiled) {
uint32_t bytes_per_block = src.format_info()->block_width *
src.format_info()->block_height *
src.format_info()->bits_per_pixel / 8;
uint32_t src_pitch = xe::round_up(block_width * bytes_per_block, 256);
uint32_t src_pitch =
src_usage.block_pitch * src.format_info()->bytes_per_block();
uint32_t dst_pitch =
(input_width / src.format_info()->block_width) * bytes_per_block;
assert_true(dst_pitch <= src_pitch);
dst_usage.block_pitch * GetFormatInfo(src.format)->bytes_per_block();
const uint8_t* src_mem = reinterpret_cast<const uint8_t*>(host_address);
src_mem += offset_y * src_pitch;
src_mem += offset_x * bytes_per_block;
for (uint32_t y = 0; y < src.size.block_height; y++) {
TextureSwap(src.endianness, dest + y * dst_pitch, src_mem + y * src_pitch,
src_mem += offset_x * src.format_info()->bytes_per_block();
for (uint32_t y = 0; y < dst_usage.block_height; y++) {
copy_block(src.endianness, dest + y * dst_pitch, src_mem + y * src_pitch,
dst_pitch);
}
} else {
// Untile image.
// We could do this in a shader to speed things up, as this is pretty
// slow.
// We could do this in a shader to speed things up, as this is pretty slow.
const uint8_t* src_mem = reinterpret_cast<const uint8_t*>(host_address);
TextureInfo::ConvertTiled(dest, src_mem, src.endianness, src.format_info(),
offset_x, offset_y, block_width, logical_width,
logical_height, input_width);
texture_conversion::UntileInfo untile_info;
std::memset(&untile_info, 0, sizeof(untile_info));
untile_info.offset_x = offset_x;
untile_info.offset_y = offset_y;
untile_info.width = dst_usage.block_pitch;
untile_info.height = dst_usage.block_height;
untile_info.input_pitch = src_usage.block_pitch;
untile_info.output_pitch = dst_usage.block_pitch;
untile_info.input_format_info = src.format_info();
untile_info.output_format_info = GetFormatInfo(src.format);
untile_info.copy_callback = [=](auto o, auto i, auto l) {
copy_block(src.endianness, o, i, l);
};
texture_conversion::Untile(dest, src_mem, &untile_info);
}
copy_region->bufferRowLength = input_width;
copy_region->bufferImageHeight = input_height;
copy_region->bufferRowLength = dst_usage.pitch;
copy_region->bufferImageHeight = dst_usage.height;
copy_region->imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, mip, 0, 1};
copy_region->imageExtent = {logical_width, logical_height, 1};
copy_region->imageExtent = {mip_width, mip_height, 1};
return true;
}
@ -1004,77 +981,60 @@ bool TextureCache::ConvertTextureCube(uint8_t* dest,
uint32_t mip, const TextureInfo& src) {
uint32_t offset_x = 0;
uint32_t offset_y = 0;
uint32_t address =
TextureInfo::GetMipLocation(src, mip, &offset_x, &offset_y);
uint32_t address = src.GetMipLocation(mip, &offset_x, &offset_y, true);
void* host_address = memory_->TranslatePhysical(address);
// Pitch of the source texture in blocks.
uint32_t block_width, block_height, input_block_height;
if (mip == 0) {
block_width = src.size.block_width;
input_block_height = block_height = src.size.block_height;
} else {
block_width = xe::next_pow2(src.size.block_width) >> mip;
block_width = xe::round_up(block_width, 32);
block_height = xe::next_pow2(src.size.block_height) >> mip;
input_block_height = block_height;
block_height = xe::round_up(block_height, 32);
}
auto src_usage = src.GetMipMemoryUsage(mip, true);
auto dst_usage = GetMipMemoryUsage(src, mip);
uint32_t logical_width = src.size.logical_width >> mip;
uint32_t logical_height = src.size.logical_height >> mip;
uint32_t input_width = src.size.input_width >> mip;
uint32_t input_height = src.size.input_height >> mip;
uint32_t src_pitch =
src_usage.block_pitch * src.format_info()->bytes_per_block();
uint32_t dst_pitch =
dst_usage.block_pitch * GetFormatInfo(src.format)->bytes_per_block();
// All dimensions must be a multiple of block w/h
logical_width = xe::round_up(logical_width, src.format_info()->block_width);
logical_height =
xe::round_up(logical_height, src.format_info()->block_height);
input_width = xe::round_up(input_width, src.format_info()->block_width);
input_height = xe::round_up(input_height, src.format_info()->block_height);
uint32_t mip_width, mip_height;
src.GetMipSize(mip, &mip_width, &mip_height);
auto copy_block = GetFormatCopyBlock(src.format);
if (!src.is_tiled) {
uint32_t bytes_per_block = src.format_info()->block_width *
src.format_info()->block_height *
src.format_info()->bits_per_pixel / 8;
uint32_t src_pitch = xe::round_up(block_width * bytes_per_block, 256);
uint32_t dst_pitch =
(input_width / src.format_info()->block_width) * bytes_per_block;
assert_true(dst_pitch <= src_pitch);
const uint8_t* src_mem = reinterpret_cast<const uint8_t*>(host_address);
for (int face = 0; face < 6; face++) {
src_mem += offset_y * src_pitch;
src_mem += offset_x * bytes_per_block;
for (uint32_t y = 0; y < block_height; y++) {
TextureSwap(src.endianness, dest + y * dst_pitch,
src_mem += offset_x * src.format_info()->bytes_per_block();
for (uint32_t y = 0; y < dst_usage.block_height; y++) {
copy_block(src.endianness, dest + y * dst_pitch,
src_mem + y * src_pitch, dst_pitch);
}
src_mem += src_pitch * block_height;
dest += dst_pitch * input_block_height;
src_mem += src_pitch * src_usage.block_height;
dest += dst_pitch * dst_usage.block_height;
}
} else {
// TODO(benvanik): optimize this inner loop (or work by tiles).
uint32_t bytes_per_block = src.format_info()->block_width *
src.format_info()->block_height *
src.format_info()->bits_per_pixel / 8;
uint32_t src_pitch = block_width * bytes_per_block;
uint32_t dst_pitch =
(input_width / src.format_info()->block_width) * bytes_per_block;
assert_true(dst_pitch <= src_pitch);
const uint8_t* src_mem = reinterpret_cast<const uint8_t*>(host_address);
for (int face = 0; face < 6; face++) {
TextureInfo::ConvertTiled(
dest, src_mem, src.endianness, src.format_info(), offset_x, offset_y,
block_width, logical_width, logical_height, input_width);
src_mem += src_pitch * block_height;
dest += dst_pitch * input_block_height;
texture_conversion::UntileInfo untile_info;
std::memset(&untile_info, 0, sizeof(untile_info));
untile_info.offset_x = offset_x;
untile_info.offset_y = offset_y;
untile_info.width = dst_usage.block_pitch;
untile_info.height = dst_usage.block_height;
untile_info.input_pitch = src_usage.block_pitch;
untile_info.output_pitch = dst_usage.block_pitch;
untile_info.input_format_info = src.format_info();
untile_info.output_format_info = GetFormatInfo(src.format);
untile_info.copy_callback = [=](auto o, auto i, auto l) {
copy_block(src.endianness, o, i, l);
};
src_mem += src_pitch * src_usage.block_height;
dest += dst_pitch * dst_usage.block_height;
}
}
copy_region->bufferRowLength = input_width;
copy_region->bufferImageHeight = input_height;
copy_region->bufferRowLength = dst_usage.pitch;
copy_region->bufferImageHeight = dst_usage.height;
copy_region->imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, mip, 0, 6};
copy_region->imageExtent = {logical_width, logical_height, 1};
copy_region->imageExtent = {mip_width, mip_height, 1};
return true;
}
@ -1102,53 +1062,49 @@ bool TextureCache::UploadTexture(VkCommandBuffer command_buffer,
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
size_t unpack_length = ComputeTextureStorage(src);
XELOGGPU(
"Uploading texture @ 0x%.8X (%dx%d, length: 0x%.8X, format: %s, dim: %s, "
"levels: %d, tiled: %s)",
src.guest_address, src.width + 1, src.height + 1, src.input_length,
src.guest_address, src.width + 1, src.height + 1, unpack_length,
src.format_info()->name, get_dimension_name(src.dimension),
src.mip_levels, src.is_tiled ? "yes" : "no");
size_t unpack_length;
if (!ComputeTextureStorage(&unpack_length, src)) {
XELOGW("Failed to compute texture storage");
if (!unpack_length) {
XELOGW("Failed to compute texture storage!");
return false;
}
size_t total_unpack_length = unpack_length;
for (uint32_t i = 1; i < src.mip_levels; i++) {
// Add in more space for mips.
total_unpack_length += TextureInfo::GetMipLinearSize(src, i);
}
if (!staging_buffer_.CanAcquire(total_unpack_length)) {
if (!staging_buffer_.CanAcquire(unpack_length)) {
// Need to have unique memory for every upload for at least one frame. If we
// run out of memory, we need to flush all queued upload commands to the
// GPU.
FlushPendingCommands(command_buffer, completion_fence);
// Uploads have been flushed. Continue.
if (!staging_buffer_.CanAcquire(total_unpack_length)) {
if (!staging_buffer_.CanAcquire(unpack_length)) {
// The staging buffer isn't big enough to hold this texture.
XELOGE(
"TextureCache staging buffer is too small! (uploading 0x%.8X bytes)",
total_unpack_length);
unpack_length);
assert_always();
return false;
}
}
// Grab some temporary memory for staging.
auto alloc = staging_buffer_.Acquire(total_unpack_length, completion_fence);
auto alloc = staging_buffer_.Acquire(unpack_length, completion_fence);
assert_not_null(alloc);
if (!alloc) {
XELOGE("%s: Failed to acquire staging memory", __func__);
XELOGE("%s: Failed to acquire staging memory!", __func__);
return false;
}
// DEBUG: Check the source address. If it's completely zero'd out, print it.
bool valid = false;
auto src_data = memory_->TranslatePhysical(src.guest_address);
for (uint32_t i = 0; i < src.input_length; i++) {
for (uint32_t i = 0; i < unpack_length; i++) {
if (src_data[i] != 0) {
valid = true;
break;
@ -1175,7 +1131,7 @@ bool TextureCache::UploadTexture(VkCommandBuffer command_buffer,
copy_regions[0].imageOffset = {0, 0, 0};
// Now upload all the MIPs
VkDeviceSize buffer_offset = unpack_length;
VkDeviceSize buffer_offset = ComputeMipStorage(src, 0);
for (uint32_t mip = 1; mip < src.mip_levels; mip++) {
uint8_t* dest = reinterpret_cast<uint8_t*>(alloc->host_ptr) + buffer_offset;
if (!ConvertTexture(dest, &copy_regions[mip], mip, src)) {
@ -1186,7 +1142,7 @@ bool TextureCache::UploadTexture(VkCommandBuffer command_buffer,
copy_regions[mip].imageOffset = {0, 0, 0};
// With each mip, the length is divided by 4.
buffer_offset += TextureInfo::GetMipLinearSize(src, mip);
buffer_offset += ComputeMipStorage(src, mip);
}
// Transition the texture into a transfer destination layout.
@ -1240,30 +1196,81 @@ bool TextureCache::UploadTexture(VkCommandBuffer command_buffer,
return true;
}
bool TextureCache::ComputeTextureStorage(size_t* output_length,
const TextureInfo& src) {
if (src.texture_format == TextureFormat::k_CTX1) {
switch (src.dimension) {
case Dimension::k1D: {
assert_always();
} break;
case Dimension::k2D: {
*output_length = src.size.input_width * src.size.input_height * 2;
return true;
const FormatInfo* TextureCache::GetFormatInfo(TextureFormat format) {
switch (format) {
case TextureFormat::k_CTX1:
return FormatInfo::Get(TextureFormat::k_8_8);
case TextureFormat::k_DXT3A:
return FormatInfo::Get(TextureFormat::k_DXT2_3);
default:
return FormatInfo::Get(format);
}
case Dimension::k3D: {
assert_always();
} break;
case Dimension::kCube: {
*output_length = src.size.input_width * src.size.input_height * 2 * 6;
return true;
}
texture_conversion::CopyBlockCallback TextureCache::GetFormatCopyBlock(
TextureFormat format) {
switch (format) {
case TextureFormat::k_CTX1:
return texture_conversion::ConvertTexelCTX1ToR8G8;
case TextureFormat::k_DXT3A:
return texture_conversion::ConvertTexelDXT3AToDXT3;
default:
return texture_conversion::CopySwapBlock;
}
}
return false;
}
TextureMemoryUsage TextureCache::GetMipMemoryUsage(const TextureInfo& src,
uint32_t mip) {
auto format_info = GetFormatInfo(src.format);
uint32_t width = src.width + 1;
uint32_t height = src.height + 1;
uint32_t depth = src.depth + 1;
TextureMemoryUsage usage;
if (mip == 0) {
usage = TextureMemoryUsage::Calculate(format_info, width, height, depth,
width, false);
} else {
*output_length = src.input_length;
return true;
uint32_t mip_width = xe::next_pow2(width) >> mip;
uint32_t mip_height = xe::next_pow2(height) >> mip;
usage = TextureMemoryUsage::Calculate(format_info, mip_width, mip_height,
depth, mip_width, false);
}
return usage;
}
uint32_t TextureCache::ComputeMipStorage(const FormatInfo* format_info,
uint32_t width, uint32_t height,
uint32_t depth, uint32_t mip) {
assert_not_null(format_info);
TextureMemoryUsage usage;
if (mip == 0) {
usage = TextureMemoryUsage::Calculate(format_info, width, height, depth,
false, false);
} else {
uint32_t mip_width = xe::next_pow2(width) >> mip;
uint32_t mip_height = xe::next_pow2(height) >> mip;
usage = TextureMemoryUsage::Calculate(format_info, mip_width, mip_height,
depth, false, false);
}
uint32_t bytes_per_block = format_info->bytes_per_block();
return usage.blocks() * bytes_per_block;
}
uint32_t TextureCache::ComputeMipStorage(const TextureInfo& src, uint32_t mip) {
return ComputeMipStorage(GetFormatInfo(src.format), src.width + 1,
src.height + 1, src.depth + 1, mip);
}
uint32_t TextureCache::ComputeTextureStorage(const TextureInfo& src) {
auto format_info = GetFormatInfo(src.format);
uint32_t width = src.width + 1;
uint32_t height = src.height + 1;
uint32_t depth = src.depth + 1;
uint32_t length = 0;
for (uint32_t mip = 0; mip < src.mip_levels; mip++) {
length += ComputeMipStorage(format_info, width, height, depth, mip);
}
return length;
}
void TextureCache::WritebackTexture(Texture* texture) {
@ -1332,7 +1339,8 @@ void TextureCache::WritebackTexture(Texture* texture) {
auto dest = memory_->TranslatePhysical(texture->texture_info.guest_address);
if (status == VK_SUCCESS) {
std::memcpy(dest, alloc->host_ptr, texture->texture_info.input_length);
std::memcpy(dest, alloc->host_ptr,
texture->texture_info.GetByteSize(false));
}
wb_staging_buffer_.Scavenge();
@ -1473,7 +1481,7 @@ bool TextureCache::SetupTextureBinding(VkCommandBuffer command_buffer,
}
// Search via the base format.
texture_info.texture_format = GetBaseFormat(texture_info.texture_format);
texture_info.format = GetBaseFormat(texture_info.format);
auto texture = Demand(texture_info, command_buffer, completion_fence);
auto sampler = Demand(sampler_info);

View File

@ -16,6 +16,7 @@
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/sampler_info.h"
#include "xenia/gpu/shader.h"
#include "xenia/gpu/texture_conversion.h"
#include "xenia/gpu/texture_info.h"
#include "xenia/gpu/trace_writer.h"
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
@ -155,7 +156,17 @@ class TextureCache {
uint32_t mip, const TextureInfo& src);
bool ConvertTexture(uint8_t* dest, VkBufferImageCopy* copy_region,
uint32_t mip, const TextureInfo& src);
bool ComputeTextureStorage(size_t* output_length, const TextureInfo& src);
static const FormatInfo* GetFormatInfo(TextureFormat format);
static texture_conversion::CopyBlockCallback GetFormatCopyBlock(
TextureFormat format);
static TextureMemoryUsage GetMipMemoryUsage(const TextureInfo& src,
uint32_t mip);
static uint32_t ComputeMipStorage(const FormatInfo* format_info,
uint32_t width, uint32_t height,
uint32_t depth, uint32_t mip);
static uint32_t ComputeMipStorage(const TextureInfo& src, uint32_t mip);
static uint32_t ComputeTextureStorage(const TextureInfo& src);
// Writes a texture back into guest memory. This call is (mostly) asynchronous
// but the texture must not be flagged for destruction.