[Vulkan] Initial support for mipmaps

This commit is contained in:
DrChat 2018-05-03 10:02:07 -05:00
parent f7c7cc54ed
commit 1f157f35f4
6 changed files with 401 additions and 272 deletions

View File

@ -44,6 +44,8 @@ bool SamplerInfo::Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
out_info->border_color = static_cast<BorderColor>(fetch.border_color);
out_info->lod_bias = (fetch.lod_bias) / 32.f;
out_info->mip_min_level = fetch.mip_min_level;
out_info->mip_max_level = fetch.mip_max_level;
return true;
}

View File

@ -26,6 +26,8 @@ struct SamplerInfo {
AnisoFilter aniso_filter;
BorderColor border_color;
float lod_bias;
uint32_t mip_min_level;
uint32_t mip_max_level;
static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
const ParsedTextureFetchInstruction& fetch_instr,
@ -36,7 +38,9 @@ struct SamplerInfo {
return min_filter == other.min_filter && mag_filter == other.mag_filter &&
mip_filter == other.mip_filter && clamp_u == other.clamp_u &&
clamp_v == other.clamp_v && clamp_w == other.clamp_w &&
aniso_filter == other.aniso_filter;
aniso_filter == other.aniso_filter && lod_bias == other.lod_bias &&
mip_min_level == other.mip_min_level &&
mip_max_level == other.mip_max_level;
}
};

View File

@ -15,6 +15,7 @@
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "third_party/xxhash/xxhash.h"
@ -59,6 +60,8 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
info.endianness = static_cast<Endian>(fetch.endianness);
info.is_tiled = fetch.tiled;
info.has_packed_mips = fetch.packed_mips;
info.mip_address = fetch.mip_address << 12;
info.mip_levels = fetch.packed_mips ? fetch.mip_max_level + 1 : 1;
info.input_length = 0; // Populated below.
if (info.format_info()->format == TextureFormat::kUnknown) {
@ -78,6 +81,7 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
} break;
case Dimension::k3D: {
// TODO(benvanik): calculate size.
assert_always();
return false;
}
case Dimension::kCube: {
@ -106,6 +110,8 @@ bool TextureInfo::PrepareResolve(uint32_t physical_address,
info.endianness = endian;
info.is_tiled = true;
info.has_packed_mips = false;
info.mip_address = 0;
info.mip_levels = 1;
info.input_length = 0;
if (info.format_info()->format == TextureFormat::kUnknown) {
@ -121,10 +127,6 @@ void TextureInfo::CalculateTextureSizes2D(uint32_t width, uint32_t height) {
size_2d.logical_width = width;
size_2d.logical_height = height;
// Here be dragons. The values here are used in texture_cache.cc to copy
// images and create GL textures. Changes here will impact that code.
// TODO(benvanik): generic texture copying utility.
auto format = format_info();
// w/h in blocks.
@ -135,11 +137,15 @@ void TextureInfo::CalculateTextureSizes2D(uint32_t width, uint32_t height) {
xe::round_up(size_2d.logical_height, format->block_height) /
format->block_height;
// Tiles are 32x32 blocks. The pitch of all textures must a multiple of tile
// dimensions.
uint32_t tile_width = xe::round_up(block_width, 32) / 32;
size_2d.block_width = tile_width * 32;
size_2d.block_height = block_height;
if (is_tiled) {
// If the texture is tiled, its dimensions must be a multiple of tile
// dimensions (32x32 blocks).
size_2d.block_width = xe::round_up(block_width, 32);
size_2d.block_height = xe::round_up(block_height, 32);
} else {
size_2d.block_width = block_width;
size_2d.block_height = block_height;
}
uint32_t bytes_per_block =
format->block_width * format->block_height * format->bits_per_pixel / 8;
@ -177,11 +183,15 @@ void TextureInfo::CalculateTextureSizesCube(uint32_t width, uint32_t height,
xe::round_up(size_cube.logical_height, format->block_height) /
format->block_height;
// Tiles are 32x32 blocks. All textures must be multiples of tile dimensions.
uint32_t tile_width = xe::round_up(block_width, 32) / 32;
uint32_t tile_height = xe::round_up(block_height, 32) / 32;
size_cube.block_width = tile_width * 32;
size_cube.block_height = tile_height * 32;
if (is_tiled) {
// If the texture is tiled, its dimensions must be a multiple of tile
// dimensions (32x32 blocks).
size_cube.block_width = xe::round_up(block_width, 32);
size_cube.block_height = xe::round_up(block_height, 32);
} else {
size_cube.block_width = block_width;
size_cube.block_height = block_height;
}
uint32_t bytes_per_block =
format->block_width * format->block_height * format->bits_per_pixel / 8;
@ -204,12 +214,154 @@ void TextureInfo::CalculateTextureSizesCube(uint32_t width, uint32_t height,
input_length = size_cube.input_face_length * 6;
}
static void TextureSwap(Endian endianness, void* dest, const void* src,
size_t length) {
switch (endianness) {
case Endian::k8in16:
xe::copy_and_swap_16_unaligned(dest, src, length / 2);
break;
case Endian::k8in32:
xe::copy_and_swap_32_unaligned(dest, src, length / 4);
break;
case Endian::k16in32: // Swap high and low 16 bits within a 32 bit word
xe::copy_and_swap_16_in_32_unaligned(dest, src, length);
break;
default:
case Endian::kUnspecified:
std::memcpy(dest, src, length);
break;
}
}
static void ConvertTexelCTX1(uint8_t* dest, size_t dest_pitch,
const uint8_t* src, Endian src_endianness) {
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
union {
uint8_t data[8];
struct {
uint8_t r0, g0, r1, g1;
uint32_t xx;
};
} block;
static_assert(sizeof(block) == 8, "CTX1 block mismatch");
const uint32_t bytes_per_block = 8;
TextureSwap(src_endianness, block.data, src, bytes_per_block);
uint8_t cr[4] = {
block.r0, block.r1,
static_cast<uint8_t>(2.f / 3.f * block.r0 + 1.f / 3.f * block.r1),
static_cast<uint8_t>(1.f / 3.f * block.r0 + 2.f / 3.f * block.r1)};
uint8_t cg[4] = {
block.g0, block.g1,
static_cast<uint8_t>(2.f / 3.f * block.g0 + 1.f / 3.f * block.g1),
static_cast<uint8_t>(1.f / 3.f * block.g0 + 2.f / 3.f * block.g1)};
for (uint32_t oy = 0; oy < 4; ++oy) {
for (uint32_t ox = 0; ox < 4; ++ox) {
uint8_t xx = (block.xx >> (((ox + (oy * 4)) * 2))) & 3;
dest[(oy * dest_pitch) + (ox * 2) + 0] = cr[xx];
dest[(oy * dest_pitch) + (ox * 2) + 1] = cg[xx];
}
}
}
void TextureInfo::ConvertTiled(uint8_t* dest, const uint8_t* src, Endian endian,
const FormatInfo* format_info, uint32_t offset_x,
uint32_t offset_y, uint32_t block_pitch,
uint32_t width, uint32_t height,
uint32_t output_width) {
// TODO(benvanik): optimize this inner loop (or work by tiles).
uint32_t bytes_per_block = format_info->block_width *
format_info->block_height *
format_info->bits_per_pixel / 8;
uint32_t output_pitch =
output_width * format_info->block_width * format_info->bits_per_pixel / 8;
uint32_t output_row_height = 1;
if (format_info->format == TextureFormat::k_CTX1) {
// TODO: Can we calculate this?
output_row_height = 4;
}
// logical w/h in blocks.
uint32_t block_width =
xe::round_up(width, format_info->block_width) / format_info->block_width;
uint32_t block_height = xe::round_up(height, format_info->block_height) /
format_info->block_height;
// Bytes per pixel
auto log2_bpp =
(bytes_per_block / 4) + ((bytes_per_block / 2) >> (bytes_per_block / 4));
// Offset to the current row, in bytes.
uint32_t output_row_offset = 0;
for (uint32_t y = 0; y < block_height; y++) {
auto input_row_offset =
TextureInfo::TiledOffset2DOuter(offset_y + y, block_pitch, log2_bpp);
// Go block-by-block on this row.
uint32_t output_offset = output_row_offset;
for (uint32_t x = 0; x < block_width; x++) {
auto input_offset = TextureInfo::TiledOffset2DInner(
offset_x + x, offset_y + y, log2_bpp, input_row_offset);
input_offset >>= log2_bpp;
if (format_info->format == TextureFormat::k_CTX1) {
// Convert to R8G8.
ConvertTexelCTX1(&dest[output_offset], output_pitch, src, endian);
} else {
// Generic swap to destination.
TextureSwap(endian, dest + output_offset,
src + input_offset * bytes_per_block, bytes_per_block);
}
output_offset += bytes_per_block;
}
output_row_offset += output_pitch * output_row_height;
}
}
uint32_t TextureInfo::GetMaxMipLevels(uint32_t width, uint32_t height,
uint32_t depth) {
return 1 + xe::log2_floor(std::max({width, height, depth}));
}
bool TextureInfo::GetPackedTileOffset(const TextureInfo& texture_info,
uint32_t TextureInfo::GetMipLocation(const TextureInfo& src, uint32_t mip,
uint32_t* offset_x, uint32_t* offset_y) {
if (mip == 0) {
// Short-circuit. Mip 0 is always stored in guest_address.
GetPackedTileOffset(src, offset_x, offset_y);
return src.guest_address;
}
// Walk forward to find the address of the mip
// If the texture is <= 16 pixels w/h, the mips are packed with the base
// texture. Otherwise, they're stored beginning from mip_address.
uint32_t address_base = std::min(src.width, src.height) < 16
? src.guest_address
: src.mip_address;
uint32_t address_offset = 0;
for (uint32_t i = 1; i < mip; i++) {
uint32_t logical_width = std::max((src.width + 1) >> mip, 1u);
uint32_t logical_height = std::max((src.height + 1) >> mip, 1u);
if (std::min(logical_width, logical_height) <= 16) {
// We've reached the point where the mips are packed into a single tile.
// TODO(DrChat): Figure out how to calculate the packed tile offset.
continue;
}
address_offset += src.input_length >> (i * 2);
}
return address_base + address_offset;
}
bool TextureInfo::GetPackedTileOffset(uint32_t width, uint32_t height,
const FormatInfo* format_info,
uint32_t* out_offset_x,
uint32_t* out_offset_y) {
// Tile size is 32x32, and once textures go <=16 they are packed into a
@ -231,6 +383,13 @@ bool TextureInfo::GetPackedTileOffset(const TextureInfo& texture_info,
// This only works for square textures, or textures that are some non-pot
// <= square. As soon as the aspect ratio goes weird, the textures start to
// stretch across tiles.
//
// The 2x2 and 1x1 squares are packed in their specific positions because
// each square is the size of at least one block (which is 4x4 pixels max)
// 4x4: x = 4
// 2x2: y = (x & 0x3) << 2
// 1x1: y = (x & 0x3) << 2
//
// if (tile_aligned(w) > tile_aligned(h)) {
// // wider than tall, so packed horizontally
// } else if (tile_aligned(w) < tile_aligned(h)) {
@ -243,16 +402,14 @@ bool TextureInfo::GetPackedTileOffset(const TextureInfo& texture_info,
// The minimum dimension is what matters most: if either width or height
// is <= 16 this mode kicks in.
if (std::min(texture_info.size_2d.logical_width,
texture_info.size_2d.logical_height) > 16) {
if (std::min(width, height) > 16) {
// Too big, not packed.
*out_offset_x = 0;
*out_offset_y = 0;
return false;
}
if (xe::log2_ceil(texture_info.size_2d.logical_width) >
xe::log2_ceil(texture_info.size_2d.logical_height)) {
if (xe::log2_ceil(width) > xe::log2_ceil(height)) {
// Wider than tall. Laid out vertically.
*out_offset_x = 0;
*out_offset_y = 16;
@ -261,26 +418,37 @@ bool TextureInfo::GetPackedTileOffset(const TextureInfo& texture_info,
*out_offset_x = 16;
*out_offset_y = 0;
}
*out_offset_x /= texture_info.format_info()->block_width;
*out_offset_y /= texture_info.format_info()->block_height;
*out_offset_x /= format_info->block_width;
*out_offset_y /= format_info->block_height;
return true;
}
bool TextureInfo::GetPackedTileOffset(const TextureInfo& texture_info,
uint32_t* out_offset_x,
uint32_t* out_offset_y) {
return GetPackedTileOffset(
texture_info.size_2d.logical_width, texture_info.size_2d.logical_height,
texture_info.format_info(), out_offset_x, out_offset_y);
}
// https://github.com/BinomialLLC/crunch/blob/ea9b8d8c00c8329791256adafa8cf11e4e7942a2/inc/crn_decomp.h#L4108
uint32_t TextureInfo::TiledOffset2DOuter(uint32_t y, uint32_t width,
uint32_t log_bpp) {
uint32_t macro = ((y >> 5) * (width >> 5)) << (log_bpp + 7);
uint32_t micro = ((y & 6) << 2) << log_bpp;
return macro + ((micro & ~15) << 1) + (micro & 15) +
((y & 8) << (3 + log_bpp)) + ((y & 1) << 4);
uint32_t log2_bpp) {
uint32_t macro = ((y / 32) * (width / 32)) << (log2_bpp + 7);
uint32_t micro = ((y & 6) << 2) << log2_bpp;
return macro + ((micro & ~0xF) << 1) + (micro & 0xF) +
((y & 8) << (3 + log2_bpp)) + ((y & 1) << 4);
}
uint32_t TextureInfo::TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
uint32_t TextureInfo::TiledOffset2DInner(uint32_t x, uint32_t y,
uint32_t log2_bpp,
uint32_t base_offset) {
uint32_t macro = (x >> 5) << (bpp + 7);
uint32_t micro = (x & 7) << bpp;
uint32_t offset = base_offset + (macro + ((micro & ~15) << 1) + (micro & 15));
return ((offset & ~511) << 3) + ((offset & 448) << 2) + (offset & 63) +
uint32_t macro = (x / 32) << (log2_bpp + 7);
uint32_t micro = (x & 7) << log2_bpp;
uint32_t offset =
base_offset + (macro + ((micro & ~0xF) << 1) + (micro & 0xF));
return ((offset & ~0x1FF) << 3) + ((offset & 0x1C0) << 2) + (offset & 0x3F) +
((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6);
}

View File

@ -256,6 +256,8 @@ struct TextureInfo {
Endian endianness;
bool is_tiled;
bool has_packed_mips;
uint32_t mip_address;
uint32_t mip_levels;
uint32_t input_length;
const FormatInfo* format_info() const {
@ -304,14 +306,26 @@ struct TextureInfo {
uint32_t width, uint32_t height,
TextureInfo* out_info);
static void ConvertTiled(uint8_t* dest, const uint8_t* src, Endian endian,
const FormatInfo* format_info, uint32_t offset_x,
uint32_t offset_y, uint32_t block_pitch,
uint32_t width, uint32_t height,
uint32_t output_width);
static uint32_t GetMaxMipLevels(uint32_t width, uint32_t height,
uint32_t depth);
static uint32_t GetMipLocation(const TextureInfo& src, uint32_t mip,
uint32_t* offset_x, uint32_t* offset_y);
static bool GetPackedTileOffset(uint32_t width, uint32_t height,
const FormatInfo* format_info,
uint32_t* out_offset_x,
uint32_t* out_offset_y);
static bool GetPackedTileOffset(const TextureInfo& texture_info,
uint32_t* out_offset_x,
uint32_t* out_offset_y);
static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width,
uint32_t log_bpp);
static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
uint32_t log2_bpp);
static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t log2_bpp,
uint32_t base_offset);
uint64_t hash() const;

View File

@ -258,7 +258,13 @@ TextureCache::Texture* TextureCache::AllocateTexture(
assert_not_null(texture_info.format_info());
auto& config = texture_configs[int(texture_info.format_info()->format)];
VkFormat format = config.host_format;
assert(format != VK_FORMAT_UNDEFINED);
if (format == VK_FORMAT_UNDEFINED) {
XELOGE(
"Texture Cache: Attempted to allocate texture format %s, which is "
"defined as VK_FORMAT_UNDEFINED!",
texture_info.format_info()->name);
return nullptr;
}
image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
image_info.usage =
@ -302,7 +308,7 @@ TextureCache::Texture* TextureCache::AllocateTexture(
image_info.format = format;
image_info.extent = {texture_info.width + 1, texture_info.height + 1, 1};
image_info.mipLevels = 1;
image_info.mipLevels = texture_info.mip_levels;
image_info.arrayLayers = texture_info.depth + 1;
image_info.samples = VK_SAMPLE_COUNT_1_BIT;
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
@ -664,8 +670,6 @@ TextureCache::Sampler* TextureCache::Demand(const SamplerInfo& sampler_info) {
sampler_create_info.addressModeW =
address_mode_map[static_cast<int>(sampler_info.clamp_w)];
sampler_create_info.mipLodBias = sampler_info.lod_bias;
float aniso = 0.f;
switch (sampler_info.aniso_filter) {
case AnisoFilter::kDisabled:
@ -697,8 +701,9 @@ TextureCache::Sampler* TextureCache::Demand(const SamplerInfo& sampler_info) {
sampler_create_info.compareEnable = VK_FALSE;
sampler_create_info.compareOp = VK_COMPARE_OP_NEVER;
sampler_create_info.minLod = 0.0f;
sampler_create_info.maxLod = 0.0f;
sampler_create_info.mipLodBias = sampler_info.lod_bias;
sampler_create_info.minLod = float(sampler_info.mip_min_level);
sampler_create_info.maxLod = float(sampler_info.mip_max_level);
sampler_create_info.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
sampler_create_info.unnormalizedCoordinates = VK_FALSE;
VkSampler vk_sampler;
@ -816,13 +821,13 @@ void TextureSwap(Endian endianness, void* dest, const void* src,
size_t length) {
switch (endianness) {
case Endian::k8in16:
xe::copy_and_swap_16_aligned(dest, src, length / 2);
xe::copy_and_swap_16_unaligned(dest, src, length / 2);
break;
case Endian::k8in32:
xe::copy_and_swap_32_aligned(dest, src, length / 4);
xe::copy_and_swap_32_unaligned(dest, src, length / 4);
break;
case Endian::k16in32: // Swap high and low 16 bits within a 32 bit word
xe::copy_and_swap_16_in_32_aligned(dest, src, length);
xe::copy_and_swap_16_in_32_unaligned(dest, src, length);
break;
default:
case Endian::kUnspecified:
@ -867,43 +872,21 @@ void TextureCache::FlushPendingCommands(VkCommandBuffer command_buffer,
vkBeginCommandBuffer(command_buffer, &begin_info);
}
void TextureCache::ConvertTexelCTX1(uint8_t* dest, size_t dest_pitch,
const uint8_t* src, Endian src_endianness) {
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
union {
uint8_t data[8];
struct {
uint8_t r0, g0, r1, g1;
uint32_t xx;
};
} block;
static_assert(sizeof(block) == 8, "CTX1 block mismatch");
const uint32_t bytes_per_block = 8;
TextureSwap(src_endianness, block.data, src, bytes_per_block);
uint8_t cr[4] = {
block.r0, block.r1,
static_cast<uint8_t>(2.f / 3.f * block.r0 + 1.f / 3.f * block.r1),
static_cast<uint8_t>(1.f / 3.f * block.r0 + 2.f / 3.f * block.r1)};
uint8_t cg[4] = {
block.g0, block.g1,
static_cast<uint8_t>(2.f / 3.f * block.g0 + 1.f / 3.f * block.g1),
static_cast<uint8_t>(1.f / 3.f * block.g0 + 2.f / 3.f * block.g1)};
for (uint32_t oy = 0; oy < 4; ++oy) {
for (uint32_t ox = 0; ox < 4; ++ox) {
uint8_t xx = (block.xx >> (((ox + (oy * 4)) * 2))) & 3;
dest[(oy * dest_pitch) + (ox * 2) + 0] = cr[xx];
dest[(oy * dest_pitch) + (ox * 2) + 1] = cg[xx];
}
}
}
bool TextureCache::ConvertTexture2D(uint8_t* dest,
VkBufferImageCopy* copy_region,
const TextureInfo& src) {
void* host_address = memory_->TranslatePhysical(src.guest_address);
uint32_t mip, const TextureInfo& src) {
uint32_t offset_x = 0;
uint32_t offset_y = 0;
uint32_t address =
TextureInfo::GetMipLocation(src, mip, &offset_x, &offset_y);
void* host_address = memory_->TranslatePhysical(address);
uint32_t logical_width = src.size_2d.logical_width >> mip;
uint32_t logical_height = src.size_2d.logical_height >> mip;
uint32_t block_width = src.size_2d.block_width >> mip;
uint32_t input_width = src.size_2d.input_width >> mip;
uint32_t input_height = src.size_2d.input_height >> mip;
if (!src.is_tiled) {
uint32_t offset_x, offset_y;
if (src.has_packed_mips &&
@ -922,89 +905,25 @@ bool TextureCache::ConvertTexture2D(uint8_t* dest,
src_mem += src.size_2d.input_pitch;
dest += src.size_2d.input_pitch;
}
copy_region->bufferRowLength = src.size_2d.input_width;
copy_region->bufferImageHeight = src.size_2d.input_height;
copy_region->imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
copy_region->imageExtent = {src.size_2d.logical_width,
src.size_2d.logical_height, 1};
return true;
} else {
// Fast path copy entire image.
TextureSwap(src.endianness, dest, host_address, src.input_length);
copy_region->bufferRowLength = src.size_2d.input_width;
copy_region->bufferImageHeight = src.size_2d.input_height;
copy_region->imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
copy_region->imageExtent = {src.size_2d.logical_width,
src.size_2d.logical_height, 1};
return true;
}
} else {
// Untile image.
// We could do this in a shader to speed things up, as this is pretty
// slow.
// TODO(benvanik): optimize this inner loop (or work by tiles).
const uint8_t* src_mem = reinterpret_cast<const uint8_t*>(host_address);
uint32_t bytes_per_block = src.format_info()->block_width *
src.format_info()->block_height *
src.format_info()->bits_per_pixel / 8;
uint32_t output_pitch = src.size_2d.input_width *
src.format_info()->block_width *
src.format_info()->bits_per_pixel / 8;
uint32_t output_row_height = 1;
if (src.texture_format == TextureFormat::k_CTX1) {
// TODO: Can we calculate this?
output_row_height = 4;
}
// Tiled textures can be packed; get the offset into the packed texture.
uint32_t offset_x;
uint32_t offset_y;
TextureInfo::GetPackedTileOffset(src, &offset_x, &offset_y);
auto log2_bpp = (bytes_per_block >> 2) +
((bytes_per_block >> 1) >> (bytes_per_block >> 2));
// Offset to the current row, in bytes.
uint32_t output_row_offset = 0;
for (uint32_t y = 0; y < src.size_2d.block_height; y++) {
auto input_row_offset = TextureInfo::TiledOffset2DOuter(
offset_y + y, src.size_2d.block_width, log2_bpp);
// Go block-by-block on this row.
uint32_t output_offset = output_row_offset;
for (uint32_t x = 0; x < src.size_2d.block_width; x++) {
auto input_offset = TextureInfo::TiledOffset2DInner(
offset_x + x, offset_y + y, log2_bpp, input_row_offset);
input_offset >>= log2_bpp;
if (src.texture_format == TextureFormat::k_CTX1) {
// Convert to R8G8.
ConvertTexelCTX1(&dest[output_offset], output_pitch, src_mem,
src.endianness);
} else {
// Generic swap to destination.
TextureSwap(src.endianness, dest + output_offset,
src_mem + input_offset * bytes_per_block,
bytes_per_block);
}
output_offset += bytes_per_block;
}
output_row_offset += output_pitch * output_row_height;
}
copy_region->bufferRowLength = src.size_2d.input_width;
copy_region->bufferImageHeight = src.size_2d.input_height;
copy_region->imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
copy_region->imageExtent = {src.size_2d.logical_width,
src.size_2d.logical_height, 1};
return true;
TextureInfo::ConvertTiled(dest, src_mem, src.endianness, src.format_info(),
offset_x, offset_y, block_width, logical_width,
logical_height, input_width);
}
return false;
copy_region->bufferRowLength = input_width;
copy_region->bufferImageHeight = input_height;
copy_region->imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, mip, 0, 1};
copy_region->imageExtent = {logical_width, logical_height, 1};
return true;
}
bool TextureCache::ConvertTextureCube(uint8_t* dest,
@ -1067,13 +986,13 @@ bool TextureCache::ConvertTextureCube(uint8_t* dest,
}
bool TextureCache::ConvertTexture(uint8_t* dest, VkBufferImageCopy* copy_region,
const TextureInfo& src) {
uint32_t mip, const TextureInfo& src) {
switch (src.dimension) {
case Dimension::k1D:
assert_always();
break;
case Dimension::k2D:
return ConvertTexture2D(dest, copy_region, src);
return ConvertTexture2D(dest, copy_region, mip, src);
case Dimension::k3D:
assert_always();
break;
@ -1083,6 +1002,145 @@ bool TextureCache::ConvertTexture(uint8_t* dest, VkBufferImageCopy* copy_region,
return false;
}
bool TextureCache::UploadTexture(VkCommandBuffer command_buffer,
VkFence completion_fence, Texture* dest,
const TextureInfo& src) {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
size_t unpack_length;
if (!ComputeTextureStorage(&unpack_length, src)) {
XELOGW("Failed to compute texture storage");
return false;
}
size_t total_unpack_length = unpack_length;
for (uint32_t i = 1; i < src.mip_levels; i++) {
// Add in more space for mips.
total_unpack_length += unpack_length >> (2 * i);
}
if (!staging_buffer_.CanAcquire(total_unpack_length)) {
// Need to have unique memory for every upload for at least one frame. If we
// run out of memory, we need to flush all queued upload commands to the
// GPU.
FlushPendingCommands(command_buffer, completion_fence);
// Uploads have been flushed. Continue.
if (!staging_buffer_.CanAcquire(total_unpack_length)) {
// The staging buffer isn't big enough to hold this texture.
XELOGE(
"TextureCache staging buffer is too small! (uploading 0x%.8X bytes)",
total_unpack_length);
assert_always();
return false;
}
}
// Grab some temporary memory for staging.
auto alloc = staging_buffer_.Acquire(total_unpack_length, completion_fence);
assert_not_null(alloc);
if (!alloc) {
XELOGE("%s: Failed to acquire staging memory", __func__);
return false;
}
// DEBUG: Check the source address. If it's completely zero'd out, print it.
bool valid = false;
auto src_data = memory_->TranslatePhysical(src.guest_address);
for (uint32_t i = 0; i < src.input_length; i++) {
if (src_data[i] != 0) {
valid = true;
break;
}
}
if (!valid) {
XELOGW(
"Warning: Uploading blank texture at address 0x%.8X "
"(length: 0x%.8X, format: %s)",
src.guest_address, src.input_length, src.format_info()->name);
}
// Upload texture into GPU memory.
// TODO: If the GPU supports it, we can submit a compute batch to convert the
// texture and copy it to its destination. Otherwise, fallback to conversion
// on the CPU.
std::vector<VkBufferImageCopy> copy_regions(src.mip_levels);
// Base MIP
if (!ConvertTexture(reinterpret_cast<uint8_t*>(alloc->host_ptr),
&copy_regions[0], 0, src)) {
XELOGW("Failed to convert texture");
return false;
}
copy_regions[0].bufferOffset = alloc->offset;
copy_regions[0].imageOffset = {0, 0, 0};
// Now upload all the MIPs
VkDeviceSize buffer_offset = unpack_length;
for (uint32_t mip = 1; mip < src.mip_levels; mip++) {
uint8_t* dest = reinterpret_cast<uint8_t*>(alloc->host_ptr) + buffer_offset;
ConvertTexture(dest, &copy_regions[mip], mip, src);
copy_regions[mip].bufferOffset = alloc->offset + buffer_offset;
copy_regions[mip].imageOffset = {0, 0, 0};
// With each mip, the length is divided by 4.
buffer_offset += unpack_length >> (2 * mip);
}
// Transition the texture into a transfer destination layout.
VkImageMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.pNext = nullptr;
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.oldLayout = dest->image_layout;
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = dest->image;
barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, src.mip_levels,
copy_regions[0].imageSubresource.baseArrayLayer,
copy_regions[0].imageSubresource.layerCount};
if (dest->format == VK_FORMAT_D16_UNORM_S8_UINT ||
dest->format == VK_FORMAT_D24_UNORM_S8_UINT ||
dest->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
barrier.subresourceRange.aspectMask =
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
}
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0,
nullptr, 1, &barrier);
// Now move the converted texture into the destination.
if (dest->format == VK_FORMAT_D16_UNORM_S8_UINT ||
dest->format == VK_FORMAT_D24_UNORM_S8_UINT ||
dest->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
// Do just a depth upload (for now).
// This assumes depth buffers don't have mips (hopefully they don't)
copy_regions[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
}
vkCmdCopyBufferToImage(command_buffer, staging_buffer_.gpu_buffer(),
dest->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
src.mip_levels, copy_regions.data());
// Now transition the texture into a shader readonly source.
barrier.srcAccessMask = barrier.dstAccessMask;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
barrier.oldLayout = barrier.newLayout;
barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
0, 0, nullptr, 0, nullptr, 1, &barrier);
dest->image_layout = barrier.newLayout;
return true;
}
bool TextureCache::ComputeTextureStorage(size_t* output_length,
const TextureInfo& src) {
if (src.texture_format == TextureFormat::k_CTX1) {
@ -1182,120 +1240,6 @@ void TextureCache::WritebackTexture(Texture* texture) {
wb_staging_buffer_.Scavenge();
}
bool TextureCache::UploadTexture(VkCommandBuffer command_buffer,
VkFence completion_fence, Texture* dest,
const TextureInfo& src) {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
size_t unpack_length;
if (!ComputeTextureStorage(&unpack_length, src)) {
XELOGW("Failed to compute texture storage");
return false;
}
if (!staging_buffer_.CanAcquire(unpack_length)) {
// Need to have unique memory for every upload for at least one frame. If we
// run out of memory, we need to flush all queued upload commands to the
// GPU.
FlushPendingCommands(command_buffer, completion_fence);
// Uploads have been flushed. Continue.
if (!staging_buffer_.CanAcquire(unpack_length)) {
// The staging buffer isn't big enough to hold this texture.
XELOGE(
"TextureCache staging buffer is too small! (uploading 0x%.8X bytes)",
unpack_length);
assert_always();
return false;
}
}
// Grab some temporary memory for staging.
auto alloc = staging_buffer_.Acquire(unpack_length, completion_fence);
assert_not_null(alloc);
// DEBUG: Check the source address. If it's completely zero'd out, print it.
bool valid = false;
auto src_data = memory_->TranslatePhysical(src.guest_address);
for (uint32_t i = 0; i < src.input_length; i++) {
if (src_data[i] != 0) {
valid = true;
break;
}
}
if (!valid) {
XELOGW(
"Warning: Uploading blank texture at address 0x%.8X "
"(length: 0x%.8X, format: %d)",
src.guest_address, src.input_length, src.texture_format);
}
// Upload texture into GPU memory.
// TODO: If the GPU supports it, we can submit a compute batch to convert the
// texture and copy it to its destination. Otherwise, fallback to conversion
// on the CPU.
VkBufferImageCopy copy_region;
if (!ConvertTexture(reinterpret_cast<uint8_t*>(alloc->host_ptr), &copy_region,
src)) {
XELOGW("Failed to convert texture");
return false;
}
// Transition the texture into a transfer destination layout.
VkImageMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.pNext = nullptr;
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.oldLayout = dest->image_layout;
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = dest->image;
barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1,
copy_region.imageSubresource.baseArrayLayer,
copy_region.imageSubresource.layerCount};
if (dest->format == VK_FORMAT_D16_UNORM_S8_UINT ||
dest->format == VK_FORMAT_D24_UNORM_S8_UINT ||
dest->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
barrier.subresourceRange.aspectMask =
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
}
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0,
nullptr, 1, &barrier);
// Now move the converted texture into the destination.
copy_region.bufferOffset = alloc->offset;
copy_region.imageOffset = {0, 0, 0};
if (dest->format == VK_FORMAT_D16_UNORM_S8_UINT ||
dest->format == VK_FORMAT_D24_UNORM_S8_UINT ||
dest->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
// Do just a depth upload (for now).
copy_region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
}
vkCmdCopyBufferToImage(command_buffer, staging_buffer_.gpu_buffer(),
dest->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
&copy_region);
// Now transition the texture into a shader readonly source.
barrier.srcAccessMask = barrier.dstAccessMask;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
barrier.oldLayout = barrier.newLayout;
barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
0, 0, nullptr, 0, nullptr, 1, &barrier);
dest->image_layout = barrier.newLayout;
return true;
}
void TextureCache::HashTextureBindings(
XXH64_state_t* hash_state, uint32_t& fetch_mask,
const std::vector<Shader::TextureBinding>& bindings) {

View File

@ -149,15 +149,12 @@ class TextureCache {
void FlushPendingCommands(VkCommandBuffer command_buffer,
VkFence completion_fence);
static void ConvertTexelCTX1(uint8_t* dest, size_t dest_pitch,
const uint8_t* src, Endian src_endianness);
bool ConvertTexture2D(uint8_t* dest, VkBufferImageCopy* copy_region,
const TextureInfo& src);
bool ConvertTextureCube(uint8_t* dest, VkBufferImageCopy* copy_region,
uint32_t mip, const TextureInfo& src);
bool ConvertTextureCube(uint8_t* dest, VkBufferImageCopy* copy_regions,
const TextureInfo& src);
bool ConvertTexture(uint8_t* dest, VkBufferImageCopy* copy_region,
const TextureInfo& src);
uint32_t mip, const TextureInfo& src);
bool ComputeTextureStorage(size_t* output_length, const TextureInfo& src);
// Writes a texture back into guest memory. This call is (mostly) asynchronous