[Vulkan] Reimplemented texture conversions. Here be dragons! Probably breaks everything. Also bonus DXT3A support.
This commit is contained in:
parent
926464cb90
commit
7116b5fc82
|
@ -0,0 +1,156 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/texture_conversion.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/memory.h"
|
||||
|
||||
#include "third_party/xxhash/xxhash.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace texture_conversion {
|
||||
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
void CopySwapBlock(Endian endian, void* output, const void* input,
|
||||
size_t length) {
|
||||
switch (endian) {
|
||||
case Endian::k8in16:
|
||||
xe::copy_and_swap_16_unaligned(output, input, length / 2);
|
||||
break;
|
||||
case Endian::k8in32:
|
||||
xe::copy_and_swap_32_unaligned(output, input, length / 4);
|
||||
break;
|
||||
case Endian::k16in32: // Swap high and low 16 bits within a 32 bit word
|
||||
xe::copy_and_swap_16_in_32_unaligned(output, input, length);
|
||||
break;
|
||||
default:
|
||||
case Endian::kUnspecified:
|
||||
std::memcpy(output, input, length);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertTexelCTX1ToR8G8(Endian endian, void* output, const void* input,
|
||||
size_t length) {
|
||||
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
|
||||
union {
|
||||
uint8_t data[8];
|
||||
struct {
|
||||
uint8_t r0, g0, r1, g1;
|
||||
uint32_t xx;
|
||||
};
|
||||
} block;
|
||||
static_assert(sizeof(block) == 8, "CTX1 block mismatch");
|
||||
|
||||
const uint32_t bytes_per_block = 8;
|
||||
CopySwapBlock(endian, block.data, input, bytes_per_block);
|
||||
|
||||
uint8_t cr[4] = {
|
||||
block.r0, block.r1,
|
||||
static_cast<uint8_t>(2.f / 3.f * block.r0 + 1.f / 3.f * block.r1),
|
||||
static_cast<uint8_t>(1.f / 3.f * block.r0 + 2.f / 3.f * block.r1)};
|
||||
uint8_t cg[4] = {
|
||||
block.g0, block.g1,
|
||||
static_cast<uint8_t>(2.f / 3.f * block.g0 + 1.f / 3.f * block.g1),
|
||||
static_cast<uint8_t>(1.f / 3.f * block.g0 + 2.f / 3.f * block.g1)};
|
||||
|
||||
auto output_bytes = static_cast<uint8_t*>(output);
|
||||
for (uint32_t oy = 0; oy < 4; ++oy) {
|
||||
for (uint32_t ox = 0; ox < 4; ++ox) {
|
||||
uint8_t xx = (block.xx >> (((ox + (oy * 4)) * 2))) & 3;
|
||||
output_bytes[(oy * length) + (ox * 2) + 0] = cr[xx];
|
||||
output_bytes[(oy * length) + (ox * 2) + 1] = cg[xx];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertTexelDXT3AToDXT3(Endian endian, void* output, const void* input,
|
||||
size_t length) {
|
||||
const uint32_t bytes_per_block = 8;
|
||||
auto output_bytes = static_cast<uint8_t*>(output);
|
||||
std::memset(&output_bytes[0], 0, 8);
|
||||
CopySwapBlock(endian, &output_bytes[8], input, bytes_per_block);
|
||||
}
|
||||
|
||||
// https://github.com/BinomialLLC/crunch/blob/ea9b8d8c00c8329791256adafa8cf11e4e7942a2/inc/crn_decomp.h#L4108
|
||||
static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width,
|
||||
uint32_t log2_bpp) {
|
||||
uint32_t macro = ((y / 32) * (width / 32)) << (log2_bpp + 7);
|
||||
uint32_t micro = ((y & 6) << 2) << log2_bpp;
|
||||
return macro + ((micro & ~0xF) << 1) + (micro & 0xF) +
|
||||
((y & 8) << (3 + log2_bpp)) + ((y & 1) << 4);
|
||||
}
|
||||
|
||||
static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t log2_bpp,
|
||||
uint32_t base_offset) {
|
||||
uint32_t macro = (x / 32) << (log2_bpp + 7);
|
||||
uint32_t micro = (x & 7) << log2_bpp;
|
||||
uint32_t offset =
|
||||
base_offset + (macro + ((micro & ~0xF) << 1) + (micro & 0xF));
|
||||
return ((offset & ~0x1FF) << 3) + ((offset & 0x1C0) << 2) + (offset & 0x3F) +
|
||||
((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6);
|
||||
}
|
||||
|
||||
void Untile(uint8_t* output_buffer, const uint8_t* input_buffer,
|
||||
const UntileInfo* untile_info) {
|
||||
assert_not_null(untile_info);
|
||||
assert_not_null(untile_info->input_format_info);
|
||||
assert_not_null(untile_info->output_format_info);
|
||||
assert_true(untile_info->width <= untile_info->input_pitch);
|
||||
assert_true(untile_info->width <= untile_info->output_pitch);
|
||||
|
||||
uint32_t input_bytes_per_block =
|
||||
untile_info->input_format_info->bytes_per_block();
|
||||
uint32_t output_bytes_per_block =
|
||||
untile_info->output_format_info->bytes_per_block();
|
||||
uint32_t output_pitch = untile_info->output_pitch * output_bytes_per_block;
|
||||
|
||||
// Bytes per pixel
|
||||
auto log2_bpp = (input_bytes_per_block / 4) +
|
||||
((input_bytes_per_block / 2) >> (input_bytes_per_block / 4));
|
||||
|
||||
// Offset to the current row, in bytes.
|
||||
uint32_t output_row_offset = 0;
|
||||
for (uint32_t y = 0; y < untile_info->height; y++) {
|
||||
auto input_row_offset = TiledOffset2DOuter(
|
||||
untile_info->offset_y + y, untile_info->input_pitch, log2_bpp);
|
||||
|
||||
// Go block-by-block on this row.
|
||||
uint32_t output_offset = output_row_offset;
|
||||
|
||||
for (uint32_t x = 0; x < untile_info->width; x++) {
|
||||
auto input_offset = TiledOffset2DInner(untile_info->offset_x + x,
|
||||
untile_info->offset_y + y,
|
||||
log2_bpp, input_row_offset);
|
||||
input_offset >>= log2_bpp;
|
||||
|
||||
untile_info->copy_callback(
|
||||
&output_buffer[output_offset],
|
||||
&input_buffer[input_offset * input_bytes_per_block],
|
||||
output_bytes_per_block);
|
||||
|
||||
output_offset += output_bytes_per_block;
|
||||
}
|
||||
|
||||
output_row_offset += output_pitch;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace texture_conversion
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,56 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_TEXTURE_CONVERSION_H_
|
||||
#define XENIA_GPU_TEXTURE_CONVERSION_H_
|
||||
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/gpu/texture_info.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace texture_conversion {
|
||||
|
||||
typedef std::function<void(Endian, void*, const void*, size_t)>
|
||||
CopyBlockCallback;
|
||||
|
||||
void CopySwapBlock(Endian endian, void* output, const void* input,
|
||||
size_t length);
|
||||
void ConvertTexelCTX1ToR8G8(Endian endian, void* output, const void* input,
|
||||
size_t length);
|
||||
void ConvertTexelDXT3AToDXT3(Endian endian, void* output, const void* input,
|
||||
size_t length);
|
||||
|
||||
typedef std::function<void(void*, const void*, size_t)> UntileCopyBlockCallback;
|
||||
|
||||
typedef struct UntileInfo {
|
||||
uint32_t offset_x;
|
||||
uint32_t offset_y;
|
||||
uint32_t width;
|
||||
uint32_t height;
|
||||
uint32_t input_pitch;
|
||||
uint32_t output_pitch;
|
||||
const FormatInfo* input_format_info;
|
||||
const FormatInfo* output_format_info;
|
||||
UntileCopyBlockCallback copy_callback;
|
||||
} UntileInfo;
|
||||
|
||||
void Untile(uint8_t* output_buffer, const uint8_t* input_buffer,
|
||||
const UntileInfo* untile_info);
|
||||
|
||||
} // namespace texture_conversion
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_TEXTURE_CONVERSION_H_
|
|
@ -26,21 +26,22 @@ using namespace xe::gpu::xenos;
|
|||
|
||||
bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
|
||||
TextureInfo* out_info) {
|
||||
std::memset(out_info, 0, sizeof(TextureInfo));
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308051(v=vs.85).aspx
|
||||
// a2xx_sq_surfaceformat
|
||||
|
||||
std::memset(out_info, 0, sizeof(TextureInfo));
|
||||
|
||||
auto& info = *out_info;
|
||||
info.guest_address = fetch.address << 12;
|
||||
|
||||
info.format = static_cast<TextureFormat>(fetch.format);
|
||||
info.endianness = static_cast<Endian>(fetch.endianness);
|
||||
|
||||
info.dimension = static_cast<Dimension>(fetch.dimension);
|
||||
info.pitch = fetch.pitch << 5;
|
||||
info.width = info.height = info.depth = 0;
|
||||
switch (info.dimension) {
|
||||
case Dimension::k1D:
|
||||
info.dimension = Dimension::k2D;
|
||||
info.dimension = Dimension::k2D; // we treat 1D textures as 2D
|
||||
info.width = fetch.size_1d.width;
|
||||
info.height = 1;
|
||||
break;
|
||||
case Dimension::k2D:
|
||||
info.width = fetch.size_2d.width;
|
||||
|
@ -56,228 +57,62 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
|
|||
info.height = fetch.size_stack.height;
|
||||
info.depth = fetch.size_stack.depth;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(info.dimension);
|
||||
break;
|
||||
}
|
||||
info.texture_format = static_cast<TextureFormat>(fetch.format);
|
||||
info.endianness = static_cast<Endian>(fetch.endianness);
|
||||
info.pitch = fetch.pitch << 5;
|
||||
info.mip_levels = fetch.packed_mips ? fetch.mip_max_level + 1 : 1;
|
||||
|
||||
info.is_tiled = fetch.tiled;
|
||||
info.has_packed_mips = fetch.packed_mips;
|
||||
|
||||
info.guest_address = fetch.address << 12;
|
||||
info.mip_address = fetch.mip_address << 12;
|
||||
info.mip_levels = fetch.packed_mips ? fetch.mip_max_level + 1 : 1;
|
||||
info.input_length = 0; // Populated below.
|
||||
|
||||
if (info.format_info()->format == TextureFormat::kUnknown) {
|
||||
XELOGE("Attempting to fetch from unsupported texture format %d",
|
||||
info.texture_format);
|
||||
info.format);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Must be called here when we know the format.
|
||||
switch (info.dimension) {
|
||||
case Dimension::k1D: {
|
||||
info.CalculateTextureSizes1D(fetch.size_1d.width + 1);
|
||||
} break;
|
||||
case Dimension::k2D: {
|
||||
info.CalculateTextureSizes2D(fetch.size_2d.width + 1,
|
||||
fetch.size_2d.height + 1);
|
||||
} break;
|
||||
case Dimension::k3D: {
|
||||
info.CalculateTextureSizes3D(fetch.size_3d.width + 1,
|
||||
fetch.size_3d.height + 1,
|
||||
fetch.size_3d.depth + 1);
|
||||
break;
|
||||
}
|
||||
case Dimension::kCube: {
|
||||
info.CalculateTextureSizesCube(fetch.size_stack.width + 1,
|
||||
fetch.size_stack.height + 1,
|
||||
fetch.size_stack.depth + 1);
|
||||
} break;
|
||||
}
|
||||
|
||||
info.memory_usage = TextureMemoryUsage::Calculate(out_info, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TextureInfo::PrepareResolve(uint32_t physical_address,
|
||||
TextureFormat texture_format, Endian endian,
|
||||
TextureFormat format, Endian endian,
|
||||
uint32_t pitch, uint32_t width,
|
||||
uint32_t height, TextureInfo* out_info) {
|
||||
std::memset(out_info, 0, sizeof(TextureInfo));
|
||||
auto& info = *out_info;
|
||||
info.guest_address = physical_address;
|
||||
info.dimension = Dimension::k2D;
|
||||
assert_true(width > 0);
|
||||
assert_true(height > 0);
|
||||
info.pitch = pitch;
|
||||
|
||||
std::memset(out_info, 0, sizeof(TextureInfo));
|
||||
|
||||
auto& info = *out_info;
|
||||
info.format = format;
|
||||
info.dimension = Dimension::k2D;
|
||||
info.width = width - 1;
|
||||
info.height = height - 1;
|
||||
info.texture_format = texture_format;
|
||||
info.mip_levels = 1;
|
||||
info.depth = 0;
|
||||
info.pitch = pitch;
|
||||
|
||||
info.endianness = endian;
|
||||
info.is_tiled = true;
|
||||
|
||||
info.guest_address = physical_address;
|
||||
info.mip_address = 0;
|
||||
info.mip_levels = 1;
|
||||
info.input_length = 0;
|
||||
|
||||
if (info.format_info()->format == TextureFormat::kUnknown) {
|
||||
assert_true("Unsupported texture format");
|
||||
return false;
|
||||
}
|
||||
|
||||
info.CalculateTextureSizes2D(width, height);
|
||||
info.memory_usage = TextureMemoryUsage::Calculate(out_info, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
void TextureInfo::CalculateTextureSizes1D(uint32_t width) {
|
||||
size.logical_width = width;
|
||||
|
||||
auto format = format_info();
|
||||
|
||||
// width in blocks.
|
||||
uint32_t block_width =
|
||||
xe::round_up(pitch, format->block_width) / format->block_width;
|
||||
|
||||
// Texture dimensions must be a multiple of tile
|
||||
// dimensions (32x32 blocks).
|
||||
size.block_width = xe::round_up(block_width, 32);
|
||||
|
||||
uint32_t bytes_per_block = format->block_width * format->bits_per_pixel / 8;
|
||||
uint32_t byte_pitch = size.block_width * bytes_per_block;
|
||||
|
||||
uint32_t texel_width;
|
||||
if (!is_tiled) {
|
||||
// Each row must be a multiple of 256 in linear textures.
|
||||
byte_pitch = xe::round_up(byte_pitch, 256);
|
||||
texel_width = (byte_pitch / bytes_per_block) * format->block_width;
|
||||
} else {
|
||||
texel_width = size.block_width * format->block_width;
|
||||
}
|
||||
|
||||
size.input_width = texel_width;
|
||||
|
||||
// Set some reasonable defaults for unused fields.
|
||||
size.logical_height = 1;
|
||||
size.block_height = format->block_height;
|
||||
size.input_height = 1;
|
||||
size.input_face_length = pitch * bytes_per_block;
|
||||
|
||||
input_length = size.input_face_length;
|
||||
}
|
||||
|
||||
void TextureInfo::CalculateTextureSizes2D(uint32_t width, uint32_t height) {
|
||||
size.logical_width = width;
|
||||
size.logical_height = height;
|
||||
|
||||
auto format = format_info();
|
||||
|
||||
// w/h in blocks.
|
||||
uint32_t block_width =
|
||||
xe::round_up(pitch, format->block_width) / format->block_width;
|
||||
uint32_t block_height =
|
||||
xe::round_up(size.logical_height, format->block_height) /
|
||||
format->block_height;
|
||||
|
||||
// Texture dimensions must be a multiple of tile
|
||||
// dimensions (32x32 blocks).
|
||||
size.block_width = xe::round_up(block_width, 32);
|
||||
size.block_height = xe::round_up(block_height, 32);
|
||||
|
||||
uint32_t bytes_per_block =
|
||||
format->block_width * format->block_height * format->bits_per_pixel / 8;
|
||||
uint32_t byte_pitch = size.block_width * bytes_per_block;
|
||||
|
||||
uint32_t texel_width;
|
||||
if (!is_tiled) {
|
||||
// Each row must be a multiple of 256 in linear textures.
|
||||
byte_pitch = xe::round_up(byte_pitch, 256);
|
||||
texel_width = (byte_pitch / bytes_per_block) * format->block_width;
|
||||
} else {
|
||||
texel_width = size.block_width * format->block_width;
|
||||
}
|
||||
|
||||
size.input_width = texel_width;
|
||||
size.input_height = size.block_height * format->block_height;
|
||||
size.input_face_length = pitch * bytes_per_block * size.block_height;
|
||||
|
||||
input_length = size.input_face_length;
|
||||
}
|
||||
|
||||
void TextureInfo::CalculateTextureSizes3D(uint32_t width, uint32_t height,
|
||||
uint32_t depth) {
|
||||
size.logical_width = width;
|
||||
size.logical_height = height;
|
||||
|
||||
auto format = format_info();
|
||||
|
||||
// w/h in blocks must be a multiple of block size.
|
||||
uint32_t block_width =
|
||||
xe::round_up(pitch, format->block_width) / format->block_width;
|
||||
uint32_t block_height =
|
||||
xe::round_up(size.logical_height, format->block_height) /
|
||||
format->block_height;
|
||||
|
||||
// Texture dimensions must be a multiple of tile
|
||||
// dimensions (32x32 blocks).
|
||||
size.block_width = xe::round_up(block_width, 32);
|
||||
size.block_height = xe::round_up(block_height, 32);
|
||||
|
||||
uint32_t bytes_per_block =
|
||||
format->block_width * format->block_height * format->bits_per_pixel / 8;
|
||||
uint32_t byte_pitch = size.block_width * bytes_per_block;
|
||||
|
||||
uint32_t texel_width;
|
||||
if (!is_tiled) {
|
||||
// Each row must be a multiple of 256 in linear textures.
|
||||
byte_pitch = xe::round_up(byte_pitch, 256);
|
||||
texel_width = (byte_pitch / bytes_per_block) * format->block_width;
|
||||
} else {
|
||||
texel_width = size.block_width * format->block_width;
|
||||
}
|
||||
|
||||
size.input_width = texel_width;
|
||||
size.input_height = size.block_height * format->block_height;
|
||||
size.input_face_length = pitch * bytes_per_block * size.block_height;
|
||||
|
||||
input_length = size.input_face_length * depth;
|
||||
}
|
||||
|
||||
void TextureInfo::CalculateTextureSizesCube(uint32_t width, uint32_t height,
|
||||
uint32_t depth) {
|
||||
assert_true(depth == 6);
|
||||
size.logical_width = width;
|
||||
size.logical_height = height;
|
||||
|
||||
auto format = format_info();
|
||||
|
||||
// w/h in blocks must be a multiple of block size.
|
||||
uint32_t block_width =
|
||||
xe::round_up(pitch, format->block_width) / format->block_width;
|
||||
uint32_t block_height =
|
||||
xe::round_up(size.logical_height, format->block_height) /
|
||||
format->block_height;
|
||||
|
||||
// Texture dimensions must be a multiple of tile
|
||||
// dimensions (32x32 blocks).
|
||||
size.block_width = xe::round_up(block_width, 32);
|
||||
size.block_height = xe::round_up(block_height, 32);
|
||||
|
||||
uint32_t bytes_per_block =
|
||||
format->block_width * format->block_height * format->bits_per_pixel / 8;
|
||||
uint32_t byte_pitch = size.block_width * bytes_per_block;
|
||||
|
||||
uint32_t texel_width;
|
||||
if (!is_tiled) {
|
||||
// Each row must be a multiple of 256 in linear textures.
|
||||
byte_pitch = xe::round_up(byte_pitch, 256);
|
||||
texel_width = (byte_pitch / bytes_per_block) * format->block_width;
|
||||
} else {
|
||||
texel_width = size.block_width * format->block_width;
|
||||
}
|
||||
|
||||
size.input_width = texel_width;
|
||||
size.input_height = size.block_height * format->block_height;
|
||||
size.input_face_length = pitch * bytes_per_block * size.block_height;
|
||||
|
||||
input_length = size.input_face_length * depth;
|
||||
}
|
||||
|
||||
static void TextureSwap(Endian endianness, void* dest, const void* src,
|
||||
size_t length) {
|
||||
switch (endianness) {
|
||||
|
@ -330,167 +165,104 @@ static void ConvertTexelCTX1(uint8_t* dest, size_t dest_pitch,
|
|||
}
|
||||
}
|
||||
|
||||
void TextureInfo::ConvertTiled(uint8_t* dest, const uint8_t* src, Endian endian,
|
||||
const FormatInfo* format_info, uint32_t offset_x,
|
||||
uint32_t offset_y, uint32_t block_pitch,
|
||||
uint32_t width, uint32_t height,
|
||||
uint32_t output_width) {
|
||||
// TODO(benvanik): optimize this inner loop (or work by tiles).
|
||||
uint32_t bytes_per_block = format_info->block_width *
|
||||
format_info->block_height *
|
||||
format_info->bits_per_pixel / 8;
|
||||
|
||||
uint32_t output_pitch =
|
||||
output_width * format_info->block_width * format_info->bits_per_pixel / 8;
|
||||
|
||||
uint32_t output_row_height = 1;
|
||||
if (format_info->format == TextureFormat::k_CTX1) {
|
||||
// TODO: Can we calculate this?
|
||||
output_row_height = 4;
|
||||
uint32_t TextureInfo::GetMaxMipLevels() const {
|
||||
return 1 + xe::log2_floor(std::max({width + 1, height + 1, depth + 1}));
|
||||
}
|
||||
|
||||
// logical w/h in blocks.
|
||||
uint32_t block_width =
|
||||
xe::round_up(width, format_info->block_width) / format_info->block_width;
|
||||
uint32_t block_height = xe::round_up(height, format_info->block_height) /
|
||||
format_info->block_height;
|
||||
|
||||
// Bytes per pixel
|
||||
auto log2_bpp =
|
||||
(bytes_per_block / 4) + ((bytes_per_block / 2) >> (bytes_per_block / 4));
|
||||
|
||||
// Offset to the current row, in bytes.
|
||||
uint32_t output_row_offset = 0;
|
||||
for (uint32_t y = 0; y < block_height; y++) {
|
||||
auto input_row_offset =
|
||||
TextureInfo::TiledOffset2DOuter(offset_y + y, block_pitch, log2_bpp);
|
||||
|
||||
// Go block-by-block on this row.
|
||||
uint32_t output_offset = output_row_offset;
|
||||
for (uint32_t x = 0; x < block_width; x++) {
|
||||
auto input_offset = TextureInfo::TiledOffset2DInner(
|
||||
offset_x + x, offset_y + y, log2_bpp, input_row_offset);
|
||||
input_offset >>= log2_bpp;
|
||||
|
||||
if (format_info->format == TextureFormat::k_CTX1) {
|
||||
// Convert to R8G8.
|
||||
ConvertTexelCTX1(&dest[output_offset], output_pitch, src, endian);
|
||||
} else {
|
||||
// Generic swap to destination.
|
||||
TextureSwap(endian, dest + output_offset,
|
||||
src + input_offset * bytes_per_block, bytes_per_block);
|
||||
const TextureMemoryUsage TextureInfo::GetMipMemoryUsage(uint32_t mip,
|
||||
bool is_guest) const {
|
||||
if (mip == 0) {
|
||||
return memory_usage;
|
||||
}
|
||||
uint32_t mip_width = xe::next_pow2(width + 1) >> mip;
|
||||
uint32_t mip_height = xe::next_pow2(height + 1) >> mip;
|
||||
return TextureMemoryUsage::Calculate(format_info(), mip_width, mip_height,
|
||||
depth + 1, is_tiled, is_guest);
|
||||
}
|
||||
|
||||
output_offset += bytes_per_block;
|
||||
void TextureInfo::GetMipSize(uint32_t mip, uint32_t* out_width,
|
||||
uint32_t* out_height) const {
|
||||
assert_not_null(out_width);
|
||||
assert_not_null(out_height);
|
||||
if (mip == 0) {
|
||||
*out_width = width + 1;
|
||||
*out_height = height + 1;
|
||||
return;
|
||||
}
|
||||
uint32_t width_pow2 = xe::next_pow2(width + 1);
|
||||
uint32_t height_pow2 = xe::next_pow2(height + 1);
|
||||
*out_width = std::max(width_pow2 >> mip, 1u);
|
||||
*out_height = std::max(height_pow2 >> mip, 1u);
|
||||
}
|
||||
|
||||
output_row_offset += output_pitch * output_row_height;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t TextureInfo::GetMaxMipLevels(uint32_t width, uint32_t height,
|
||||
uint32_t depth) {
|
||||
return 1 + xe::log2_floor(std::max({width, height, depth}));
|
||||
}
|
||||
|
||||
uint32_t TextureInfo::GetMipLocation(const TextureInfo& src, uint32_t mip,
|
||||
uint32_t* offset_x, uint32_t* offset_y) {
|
||||
uint32_t TextureInfo::GetMipLocation(uint32_t mip, uint32_t* offset_x,
|
||||
uint32_t* offset_y, bool is_guest) const {
|
||||
if (mip == 0) {
|
||||
// Short-circuit. Mip 0 is always stored in guest_address.
|
||||
if (!src.has_packed_mips) {
|
||||
if (!has_packed_mips) {
|
||||
*offset_x = 0;
|
||||
*offset_y = 0;
|
||||
} else {
|
||||
GetPackedTileOffset(src, 0, offset_x, offset_y);
|
||||
GetPackedTileOffset(0, offset_x, offset_y);
|
||||
}
|
||||
return src.guest_address;
|
||||
return guest_address;
|
||||
}
|
||||
|
||||
// If the texture is <= 16 pixels w/h, the mips are packed with the base
|
||||
// texture. Otherwise, they're stored beginning from mip_address.
|
||||
uint32_t address_base = std::min(src.width, src.height) < 16
|
||||
? src.guest_address
|
||||
: src.mip_address;
|
||||
uint32_t address_base =
|
||||
std::min(width, height) < 16 ? guest_address : mip_address;
|
||||
uint32_t address_offset = 0;
|
||||
|
||||
if (!src.has_packed_mips) {
|
||||
if (!has_packed_mips) {
|
||||
for (uint32_t i = 1; i < mip; i++) {
|
||||
address_offset += GetMipByteSize(src, i);
|
||||
address_offset += GetMipByteSize(i, is_guest);
|
||||
}
|
||||
*offset_x = 0;
|
||||
*offset_y = 0;
|
||||
return address_base + address_offset;
|
||||
}
|
||||
|
||||
uint32_t width_pow2 = xe::next_pow2(width + 1);
|
||||
uint32_t height_pow2 = xe::next_pow2(height + 1);
|
||||
|
||||
// Walk forward to find the address of the mip.
|
||||
uint32_t packed_mip_base = 1;
|
||||
for (uint32_t i = packed_mip_base; i < mip; i++, packed_mip_base++) {
|
||||
uint32_t logical_width = std::max(xe::next_pow2(src.width + 1) >> i, 1u);
|
||||
uint32_t logical_height = std::max(xe::next_pow2(src.height + 1) >> i, 1u);
|
||||
if (std::min(logical_width, logical_height) <= 16) {
|
||||
uint32_t mip_width = std::max(width_pow2 >> i, 1u);
|
||||
uint32_t mip_height = std::max(height_pow2 >> i, 1u);
|
||||
if (std::min(mip_width, mip_height) <= 16) {
|
||||
// We've reached the point where the mips are packed into a single tile.
|
||||
break;
|
||||
}
|
||||
|
||||
address_offset += GetMipByteSize(src, i);
|
||||
address_offset += GetMipByteSize(i, is_guest);
|
||||
}
|
||||
|
||||
// Now, check if the mip is packed at an offset.
|
||||
GetPackedTileOffset(xe::next_pow2(src.width + 1) >> mip,
|
||||
xe::next_pow2(src.height + 1) >> mip, src.format_info(),
|
||||
GetPackedTileOffset(width_pow2 >> mip, height_pow2 >> mip, format_info(),
|
||||
mip - packed_mip_base, offset_x, offset_y);
|
||||
return address_base + address_offset;
|
||||
}
|
||||
|
||||
uint32_t TextureInfo::GetMipByteSize(const TextureInfo& src, uint32_t mip) {
|
||||
if (mip == 0) {
|
||||
return src.input_length;
|
||||
uint32_t TextureInfo::GetMipByteSize(uint32_t mip, bool is_guest) const {
|
||||
uint32_t bytes_per_block = format_info()->bytes_per_block();
|
||||
auto mip_usage = GetMipMemoryUsage(mip, is_guest);
|
||||
return mip_usage.blocks() * bytes_per_block;
|
||||
}
|
||||
|
||||
uint32_t bytes_per_block = src.format_info()->block_width *
|
||||
src.format_info()->block_height *
|
||||
src.format_info()->bits_per_pixel / 8;
|
||||
|
||||
uint32_t logical_width = xe::next_pow2(src.width + 1) >> mip;
|
||||
uint32_t logical_height = xe::next_pow2(src.height + 1) >> mip;
|
||||
|
||||
// w/h in blocks
|
||||
uint32_t block_width =
|
||||
xe::round_up(logical_width, src.format_info()->block_width) /
|
||||
src.format_info()->block_width;
|
||||
uint32_t block_height =
|
||||
xe::round_up(logical_height, src.format_info()->block_height) /
|
||||
src.format_info()->block_height;
|
||||
|
||||
// Texture dimensions must be a multiple of tile
|
||||
// dimensions (32x32 blocks).
|
||||
block_width = xe::round_up(block_width, 32);
|
||||
block_height = xe::round_up(block_height, 32);
|
||||
|
||||
uint32_t byte_pitch = block_width * bytes_per_block;
|
||||
|
||||
if (!src.is_tiled) {
|
||||
// Each row must be a multiple of 256 in linear textures.
|
||||
byte_pitch = xe::round_up(byte_pitch, 256);
|
||||
uint32_t TextureInfo::GetByteSize(bool is_guest) const {
|
||||
uint32_t length = 0;
|
||||
for (uint32_t mip = 0; mip < mip_levels; ++mip) {
|
||||
length += GetMipByteSize(mip, is_guest);
|
||||
}
|
||||
|
||||
return byte_pitch * block_height * (src.depth + 1);
|
||||
}
|
||||
|
||||
uint32_t TextureInfo::GetMipLinearSize(const TextureInfo& src, uint32_t mip) {
|
||||
uint32_t bytes_per_block = src.format_info()->block_width *
|
||||
src.format_info()->block_height *
|
||||
src.format_info()->bits_per_pixel / 8;
|
||||
uint32_t size = src.input_length >> (mip * 2);
|
||||
|
||||
// The size is a multiple of the block size.
|
||||
return xe::round_up(size, bytes_per_block) * (src.depth + 1);
|
||||
return length;
|
||||
}
|
||||
|
||||
bool TextureInfo::GetPackedTileOffset(uint32_t width, uint32_t height,
|
||||
const FormatInfo* format_info,
|
||||
int packed_tile, uint32_t* out_offset_x,
|
||||
uint32_t* out_offset_y) {
|
||||
int packed_tile, uint32_t* offset_x,
|
||||
uint32_t* offset_y) {
|
||||
// Tile size is 32x32, and once textures go <=16 they are packed into a
|
||||
// single tile together. The math here is insane. Most sourced
|
||||
// from graph paper and looking at dds dumps.
|
||||
|
@ -530,8 +302,8 @@ bool TextureInfo::GetPackedTileOffset(uint32_t width, uint32_t height,
|
|||
uint32_t log2_height = xe::log2_ceil(height);
|
||||
if (std::min(log2_width, log2_height) > 4) {
|
||||
// Too big, not packed.
|
||||
*out_offset_x = 0;
|
||||
*out_offset_y = 0;
|
||||
*offset_x = 0;
|
||||
*offset_y = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -539,62 +311,40 @@ bool TextureInfo::GetPackedTileOffset(uint32_t width, uint32_t height,
|
|||
if (packed_tile < 3) {
|
||||
if (log2_width > log2_height) {
|
||||
// Wider than tall. Laid out vertically.
|
||||
*out_offset_x = 0;
|
||||
*out_offset_y = 16 >> packed_tile;
|
||||
*offset_x = 0;
|
||||
*offset_y = 16 >> packed_tile;
|
||||
} else {
|
||||
// Taller than wide. Laid out horizontally.
|
||||
*out_offset_x = 16 >> packed_tile;
|
||||
*out_offset_y = 0;
|
||||
*offset_x = 16 >> packed_tile;
|
||||
*offset_y = 0;
|
||||
}
|
||||
} else {
|
||||
if (log2_width > log2_height) {
|
||||
// Wider than tall. Laid out vertically.
|
||||
*out_offset_x = 16 >> (packed_tile - 2);
|
||||
*out_offset_y = 0;
|
||||
*offset_x = 16 >> (packed_tile - 2);
|
||||
*offset_y = 0;
|
||||
} else {
|
||||
// Taller than wide. Laid out horizontally.
|
||||
*out_offset_x = 0;
|
||||
*out_offset_y = 16 >> (packed_tile - 2);
|
||||
*offset_x = 0;
|
||||
*offset_y = 16 >> (packed_tile - 2);
|
||||
}
|
||||
}
|
||||
|
||||
*out_offset_x /= format_info->block_width;
|
||||
*out_offset_y /= format_info->block_height;
|
||||
*offset_x /= format_info->block_width;
|
||||
*offset_y /= format_info->block_height;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TextureInfo::GetPackedTileOffset(const TextureInfo& texture_info,
|
||||
int packed_tile, uint32_t* out_offset_x,
|
||||
uint32_t* out_offset_y) {
|
||||
if (!texture_info.has_packed_mips) {
|
||||
*out_offset_x = 0;
|
||||
*out_offset_y = 0;
|
||||
bool TextureInfo::GetPackedTileOffset(int packed_tile, uint32_t* offset_x,
|
||||
uint32_t* offset_y) const {
|
||||
if (!has_packed_mips) {
|
||||
*offset_x = 0;
|
||||
*offset_y = 0;
|
||||
return false;
|
||||
}
|
||||
return GetPackedTileOffset(xe::next_pow2(texture_info.size.logical_width),
|
||||
xe::next_pow2(texture_info.size.logical_height),
|
||||
texture_info.format_info(), packed_tile,
|
||||
out_offset_x, out_offset_y);
|
||||
}
|
||||
|
||||
// https://github.com/BinomialLLC/crunch/blob/ea9b8d8c00c8329791256adafa8cf11e4e7942a2/inc/crn_decomp.h#L4108
|
||||
uint32_t TextureInfo::TiledOffset2DOuter(uint32_t y, uint32_t width,
|
||||
uint32_t log2_bpp) {
|
||||
uint32_t macro = ((y / 32) * (width / 32)) << (log2_bpp + 7);
|
||||
uint32_t micro = ((y & 6) << 2) << log2_bpp;
|
||||
return macro + ((micro & ~0xF) << 1) + (micro & 0xF) +
|
||||
((y & 8) << (3 + log2_bpp)) + ((y & 1) << 4);
|
||||
}
|
||||
|
||||
uint32_t TextureInfo::TiledOffset2DInner(uint32_t x, uint32_t y,
|
||||
uint32_t log2_bpp,
|
||||
uint32_t base_offset) {
|
||||
uint32_t macro = (x / 32) << (log2_bpp + 7);
|
||||
uint32_t micro = (x & 7) << log2_bpp;
|
||||
uint32_t offset =
|
||||
base_offset + (macro + ((micro & ~0xF) << 1) + (micro & 0xF));
|
||||
return ((offset & ~0x1FF) << 3) + ((offset & 0x1C0) << 2) + (offset & 0x3F) +
|
||||
((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6);
|
||||
return GetPackedTileOffset(xe::next_pow2(width + 1),
|
||||
xe::next_pow2(height + 1), format_info(),
|
||||
packed_tile, offset_x, offset_y);
|
||||
}
|
||||
|
||||
uint64_t TextureInfo::hash() const {
|
||||
|
|
|
@ -279,36 +279,56 @@ struct FormatInfo {
|
|||
uint32_t block_height;
|
||||
uint32_t bits_per_pixel;
|
||||
|
||||
uint32_t bytes_per_block() const {
|
||||
return block_width * block_height * bits_per_pixel / 8;
|
||||
}
|
||||
|
||||
static const FormatInfo* Get(uint32_t gpu_format);
|
||||
|
||||
static const FormatInfo* Get(TextureFormat format) {
|
||||
return Get(static_cast<uint32_t>(format));
|
||||
}
|
||||
};
|
||||
|
||||
struct TextureInfo;
|
||||
|
||||
struct TextureMemoryUsage {
|
||||
uint32_t pitch; // texel pitch
|
||||
uint32_t height; // texel height
|
||||
uint32_t block_pitch; // # of horizontal pitch blocks
|
||||
uint32_t block_height; // # of vertical blocks
|
||||
uint32_t depth;
|
||||
|
||||
uint32_t blocks() const { return block_pitch * block_height * depth; }
|
||||
|
||||
static TextureMemoryUsage Calculate(const FormatInfo* format_info,
|
||||
uint32_t pitch, uint32_t height,
|
||||
uint32_t depth, bool is_tiled,
|
||||
bool is_guest);
|
||||
static TextureMemoryUsage Calculate(const TextureInfo* texture_info,
|
||||
bool is_guest);
|
||||
};
|
||||
|
||||
struct TextureInfo {
|
||||
uint32_t guest_address;
|
||||
TextureFormat texture_format;
|
||||
TextureFormat format;
|
||||
Endian endianness;
|
||||
|
||||
Dimension dimension;
|
||||
uint32_t pitch; // pitch in blocks
|
||||
uint32_t width; // width in pixels
|
||||
uint32_t height; // height in pixels
|
||||
uint32_t depth; // depth in layers
|
||||
Endian endianness;
|
||||
uint32_t pitch; // pitch in blocks
|
||||
uint32_t mip_levels;
|
||||
bool is_tiled;
|
||||
bool has_packed_mips;
|
||||
uint32_t mip_address;
|
||||
uint32_t mip_levels;
|
||||
uint32_t input_length;
|
||||
|
||||
struct Size {
|
||||
uint32_t logical_width;
|
||||
uint32_t logical_height;
|
||||
uint32_t block_width; // # of horizontal blocks
|
||||
uint32_t block_height; // # of vertical blocks
|
||||
uint32_t input_width; // (full) texel pitch
|
||||
uint32_t input_height; // (full) texel height
|
||||
uint32_t input_face_length; // byte length of face
|
||||
} size;
|
||||
TextureMemoryUsage memory_usage;
|
||||
|
||||
uint32_t guest_address;
|
||||
uint32_t mip_address;
|
||||
|
||||
const FormatInfo* format_info() const {
|
||||
return FormatInfo::Get(static_cast<uint32_t>(texture_format));
|
||||
return FormatInfo::Get(static_cast<uint32_t>(format));
|
||||
}
|
||||
|
||||
bool is_compressed() const {
|
||||
|
@ -323,47 +343,32 @@ struct TextureInfo {
|
|||
uint32_t pitch, uint32_t width, uint32_t height,
|
||||
TextureInfo* out_info);
|
||||
|
||||
static void ConvertTiled(uint8_t* dest, const uint8_t* src, Endian endian,
|
||||
const FormatInfo* format_info, uint32_t offset_x,
|
||||
uint32_t offset_y, uint32_t block_pitch,
|
||||
uint32_t width, uint32_t height,
|
||||
uint32_t output_width);
|
||||
uint32_t GetMaxMipLevels() const;
|
||||
|
||||
static uint32_t GetMaxMipLevels(uint32_t width, uint32_t height,
|
||||
uint32_t depth);
|
||||
const TextureMemoryUsage GetMipMemoryUsage(uint32_t mip, bool is_guest) const;
|
||||
|
||||
void GetMipSize(uint32_t mip, uint32_t* width, uint32_t* height) const;
|
||||
|
||||
// Get the memory location of a mip. offset_x and offset_y are in blocks.
|
||||
static uint32_t GetMipLocation(const TextureInfo& src, uint32_t mip,
|
||||
uint32_t* offset_x, uint32_t* offset_y);
|
||||
static uint32_t GetMipByteSize(const TextureInfo& src, uint32_t mip);
|
||||
static uint32_t GetMipSizes(const TextureInfo& src, uint32_t mip);
|
||||
uint32_t GetMipLocation(uint32_t mip, uint32_t* offset_x, uint32_t* offset_y,
|
||||
bool is_guest) const;
|
||||
|
||||
// Get the byte size of a MIP when stored linearly.
|
||||
static uint32_t GetMipLinearSize(const TextureInfo& src, uint32_t mip);
|
||||
uint32_t GetMipByteSize(uint32_t mip, bool is_guest) const;
|
||||
|
||||
uint32_t GetByteSize(bool is_guest) const;
|
||||
|
||||
static bool GetPackedTileOffset(uint32_t width, uint32_t height,
|
||||
const FormatInfo* format_info,
|
||||
int packed_tile, uint32_t* out_offset_x,
|
||||
uint32_t* out_offset_y);
|
||||
static bool GetPackedTileOffset(const TextureInfo& texture_info,
|
||||
int packed_tile, uint32_t* out_offset_x,
|
||||
uint32_t* out_offset_y);
|
||||
static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width,
|
||||
uint32_t log2_bpp);
|
||||
static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t log2_bpp,
|
||||
uint32_t base_offset);
|
||||
int packed_tile, uint32_t* offset_x,
|
||||
uint32_t* offset_y);
|
||||
|
||||
bool GetPackedTileOffset(int packed_tile, uint32_t* offset_x,
|
||||
uint32_t* offset_y) const;
|
||||
|
||||
uint64_t hash() const;
|
||||
bool operator==(const TextureInfo& other) const {
|
||||
return std::memcmp(this, &other, sizeof(TextureInfo)) == 0;
|
||||
}
|
||||
|
||||
private:
|
||||
void CalculateTextureSizes1D(uint32_t width);
|
||||
void CalculateTextureSizes2D(uint32_t width, uint32_t height);
|
||||
void CalculateTextureSizes3D(uint32_t width, uint32_t height, uint32_t depth);
|
||||
void CalculateTextureSizesCube(uint32_t width, uint32_t height,
|
||||
uint32_t depth);
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/texture_info.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
#include "xenia/base/math.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
static TextureMemoryUsage CalculateMemoryUsage(const FormatInfo* format_info,
|
||||
uint32_t pitch, uint32_t height,
|
||||
uint32_t depth, bool is_tiled,
|
||||
bool is_guest) {
|
||||
TextureMemoryUsage usage;
|
||||
|
||||
usage.pitch = pitch;
|
||||
usage.height = height;
|
||||
usage.block_pitch = xe::round_up(usage.pitch, format_info->block_width) /
|
||||
format_info->block_width;
|
||||
usage.block_height = xe::round_up(usage.height, format_info->block_height) /
|
||||
format_info->block_height;
|
||||
usage.depth = depth;
|
||||
|
||||
if (is_guest) {
|
||||
// Texture dimensions must be a multiple of tile
|
||||
// dimensions (32x32 blocks).
|
||||
usage.block_pitch = xe::round_up(usage.block_pitch, 32);
|
||||
usage.block_height = xe::round_up(usage.block_height, 32);
|
||||
|
||||
usage.pitch = usage.block_pitch * format_info->block_width;
|
||||
usage.height = usage.block_height * format_info->block_height;
|
||||
|
||||
uint32_t bytes_per_block = format_info->bytes_per_block();
|
||||
uint32_t byte_pitch = usage.block_pitch * bytes_per_block;
|
||||
|
||||
if (!is_tiled) {
|
||||
// Each row must be a multiple of 256 bytes in linear textures.
|
||||
byte_pitch = xe::round_up(byte_pitch, 256);
|
||||
usage.block_pitch = byte_pitch / bytes_per_block;
|
||||
usage.pitch = usage.block_pitch * format_info->block_width;
|
||||
}
|
||||
|
||||
// Is depth special?
|
||||
usage.depth = usage.depth;
|
||||
}
|
||||
|
||||
return usage;
|
||||
}
|
||||
|
||||
TextureMemoryUsage TextureMemoryUsage::Calculate(const FormatInfo* format_info,
|
||||
uint32_t pitch,
|
||||
uint32_t height,
|
||||
uint32_t depth, bool is_tiled,
|
||||
bool is_guest) {
|
||||
return CalculateMemoryUsage(format_info, pitch, height, depth, is_tiled,
|
||||
is_guest);
|
||||
}
|
||||
|
||||
TextureMemoryUsage TextureMemoryUsage::Calculate(const TextureInfo* info,
|
||||
bool is_guest) {
|
||||
assert_not_null(info);
|
||||
return CalculateMemoryUsage(info->format_info(), info->pitch,
|
||||
info->height + 1, info->depth + 1, info->is_tiled,
|
||||
is_guest);
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -15,6 +15,7 @@
|
|||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/gpu/sampler_info.h"
|
||||
#include "xenia/gpu/texture_conversion.h"
|
||||
#include "xenia/gpu/texture_info.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
|
||||
#include "xenia/ui/vulkan/vulkan_mem_alloc.h"
|
||||
|
@ -38,6 +39,7 @@ struct TextureConfig {
|
|||
|
||||
#define SWIZ(r, g, b, a) r, g, b, a
|
||||
#define ___R SWIZ(-7, -7, -7, 0)
|
||||
#define ___A SWIZ(-7, -7, -7, 3)
|
||||
#define RRRR SWIZ(0, 0, 0, 0)
|
||||
#define RRRA SWIZ(0, 0, 0, 3)
|
||||
#define RGBA SWIZ(0, 1, 2, 3)
|
||||
|
@ -117,8 +119,8 @@ static const TextureConfig texture_configs[64] = {
|
|||
/* k_10_11_11_AS_16_16_16_16 */ {VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // ?
|
||||
/* k_11_11_10_AS_16_16_16_16 */ {VK_FORMAT_B10G11R11_UFLOAT_PACK32}, // ?
|
||||
/* k_32_32_32_FLOAT */ {VK_FORMAT_R32G32B32_SFLOAT},
|
||||
/* k_DXT3A */ {VK_FORMAT_UNDEFINED},
|
||||
/* k_DXT5A */ {VK_FORMAT_BC4_UNORM_BLOCK, RRRR}, // ATI1N
|
||||
/* k_DXT3A */ {VK_FORMAT_BC2_UNORM_BLOCK, ___A},
|
||||
/* k_DXT5A */ {VK_FORMAT_BC4_UNORM_BLOCK, ___R}, // ATI1N
|
||||
|
||||
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
|
||||
/* k_CTX1 */ {VK_FORMAT_R8G8_UINT},
|
||||
|
@ -137,6 +139,7 @@ static const TextureConfig texture_configs[64] = {
|
|||
#undef RGBA
|
||||
#undef RRRA
|
||||
#undef RRRR
|
||||
#undef ___A
|
||||
#undef ___R
|
||||
#undef SWIZ
|
||||
|
||||
|
@ -438,15 +441,15 @@ TextureCache::Texture* TextureCache::DemandResolveTexture(
|
|||
|
||||
// Tell the trace writer to "cache" this memory (but not read it)
|
||||
trace_writer_->WriteMemoryReadCachedNop(texture_info.guest_address,
|
||||
texture_info.input_length);
|
||||
texture_info.GetByteSize(true));
|
||||
|
||||
return it->second;
|
||||
}
|
||||
}
|
||||
|
||||
VkFormatFeatureFlags required_flags = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
|
||||
if (texture_info.texture_format == TextureFormat::k_24_8 ||
|
||||
texture_info.texture_format == TextureFormat::k_24_8_FLOAT) {
|
||||
if (texture_info.format == TextureFormat::k_24_8 ||
|
||||
texture_info.format == TextureFormat::k_24_8_FLOAT) {
|
||||
required_flags |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||
} else {
|
||||
required_flags |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT;
|
||||
|
@ -466,11 +469,11 @@ TextureCache::Texture* TextureCache::DemandResolveTexture(
|
|||
VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT,
|
||||
xe::format_string(
|
||||
"RT: 0x%.8X - 0x%.8X", texture_info.guest_address,
|
||||
texture_info.guest_address + texture_info.input_length));
|
||||
texture_info.guest_address + texture_info.GetByteSize(true)));
|
||||
|
||||
// Setup an access watch. If this texture is touched, it is destroyed.
|
||||
texture->access_watch_handle = memory_->AddPhysicalAccessWatch(
|
||||
texture_info.guest_address, texture_info.input_length,
|
||||
texture_info.guest_address, texture_info.GetByteSize(true),
|
||||
cpu::MMIOHandler::kWatchWrite, &WatchCallback, this, texture);
|
||||
|
||||
textures_[texture_hash] = texture;
|
||||
|
@ -492,7 +495,7 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info,
|
|||
}
|
||||
|
||||
trace_writer_->WriteMemoryReadCached(texture_info.guest_address,
|
||||
texture_info.input_length);
|
||||
texture_info.GetByteSize(true));
|
||||
|
||||
return it->second;
|
||||
}
|
||||
|
@ -525,12 +528,12 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info,
|
|||
}
|
||||
|
||||
trace_writer_->WriteMemoryRead(texture_info.guest_address,
|
||||
texture_info.input_length);
|
||||
texture_info.GetByteSize(true));
|
||||
|
||||
// Okay. Put a writewatch on it to tell us if it's been modified from the
|
||||
// guest.
|
||||
texture->access_watch_handle = memory_->AddPhysicalAccessWatch(
|
||||
texture_info.guest_address, texture_info.input_length,
|
||||
texture_info.guest_address, texture_info.GetByteSize(true),
|
||||
cpu::MMIOHandler::kWatchWrite, &WatchCallback, this, texture);
|
||||
|
||||
if (!UploadTexture(command_buffer, completion_fence, texture, texture_info)) {
|
||||
|
@ -542,9 +545,9 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info,
|
|||
device_->DbgSetObjectName(
|
||||
reinterpret_cast<uint64_t>(texture->image),
|
||||
VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT,
|
||||
xe::format_string("T: 0x%.8X - 0x%.8X (%s, %s)",
|
||||
texture_info.guest_address,
|
||||
texture_info.guest_address + texture_info.input_length,
|
||||
xe::format_string(
|
||||
"T: 0x%.8X - 0x%.8X (%s, %s)", texture_info.guest_address,
|
||||
texture_info.guest_address + texture_info.GetByteSize(true),
|
||||
texture_info.format_info()->name,
|
||||
get_dimension_name(texture_info.dimension)));
|
||||
|
||||
|
@ -561,8 +564,7 @@ TextureCache::TextureView* TextureCache::DemandView(Texture* texture,
|
|||
}
|
||||
}
|
||||
|
||||
auto& config =
|
||||
texture_configs[uint32_t(texture->texture_info.texture_format)];
|
||||
auto& config = texture_configs[uint32_t(texture->texture_info.format)];
|
||||
|
||||
VkImageViewCreateInfo view_info;
|
||||
view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
|
||||
|
@ -821,10 +823,10 @@ TextureCache::Texture* TextureCache::Lookup(const TextureInfo& texture_info) {
|
|||
COMPARE_FIELD(depth);
|
||||
COMPARE_FIELD(endianness);
|
||||
COMPARE_FIELD(is_tiled);
|
||||
COMPARE_FIELD(input_length);
|
||||
COMPARE_FIELD(GetByteSize(true));
|
||||
#undef COMPARE_FIELD
|
||||
if (!TextureFormatIsSimilar(texture_info.texture_format,
|
||||
other_texture_info.texture_format)) {
|
||||
if (!TextureFormatIsSimilar(texture_info.format,
|
||||
other_texture_info.format)) {
|
||||
continue;
|
||||
}
|
||||
/*const auto format_info = texture_info.format_info();
|
||||
|
@ -850,9 +852,9 @@ TextureCache::Texture* TextureCache::LookupAddress(uint32_t guest_address,
|
|||
const auto& texture_info = it->second->texture_info;
|
||||
if (guest_address >= texture_info.guest_address &&
|
||||
guest_address <
|
||||
texture_info.guest_address + texture_info.input_length &&
|
||||
texture_info.size.input_width >= width &&
|
||||
texture_info.size.input_height >= height && out_offset) {
|
||||
texture_info.guest_address + texture_info.GetByteSize(true) &&
|
||||
texture_info.pitch >= width && texture_info.height >= height &&
|
||||
out_offset) {
|
||||
auto offset_bytes = guest_address - texture_info.guest_address;
|
||||
|
||||
if (texture_info.dimension == Dimension::k2D) {
|
||||
|
@ -868,8 +870,7 @@ TextureCache::Texture* TextureCache::LookupAddress(uint32_t guest_address,
|
|||
|
||||
if (texture_info.guest_address == guest_address &&
|
||||
texture_info.dimension == Dimension::k2D &&
|
||||
texture_info.size.input_width == width &&
|
||||
texture_info.size.input_height == height) {
|
||||
texture_info.pitch == width && texture_info.height == height) {
|
||||
if (out_offset) {
|
||||
out_offset->x = 0;
|
||||
out_offset->y = 0;
|
||||
|
@ -882,25 +883,6 @@ TextureCache::Texture* TextureCache::LookupAddress(uint32_t guest_address,
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
void TextureSwap(Endian endianness, void* dest, const void* src,
|
||||
size_t length) {
|
||||
switch (endianness) {
|
||||
case Endian::k8in16:
|
||||
xe::copy_and_swap_16_unaligned(dest, src, length / 2);
|
||||
break;
|
||||
case Endian::k8in32:
|
||||
xe::copy_and_swap_32_unaligned(dest, src, length / 4);
|
||||
break;
|
||||
case Endian::k16in32: // Swap high and low 16 bits within a 32 bit word
|
||||
xe::copy_and_swap_16_in_32_unaligned(dest, src, length);
|
||||
break;
|
||||
default:
|
||||
case Endian::kUnspecified:
|
||||
std::memcpy(dest, src, length);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCache::FlushPendingCommands(VkCommandBuffer command_buffer,
|
||||
VkFence completion_fence) {
|
||||
auto status = vkEndCommandBuffer(command_buffer);
|
||||
|
@ -942,60 +924,55 @@ bool TextureCache::ConvertTexture2D(uint8_t* dest,
|
|||
uint32_t mip, const TextureInfo& src) {
|
||||
uint32_t offset_x = 0;
|
||||
uint32_t offset_y = 0;
|
||||
uint32_t address =
|
||||
TextureInfo::GetMipLocation(src, mip, &offset_x, &offset_y);
|
||||
uint32_t address = src.GetMipLocation(mip, &offset_x, &offset_y, true);
|
||||
void* host_address = memory_->TranslatePhysical(address);
|
||||
|
||||
// Pitch of the source texture in blocks.
|
||||
uint32_t block_width;
|
||||
if (mip == 0) {
|
||||
block_width = src.size.block_width;
|
||||
} else {
|
||||
block_width = xe::next_pow2(src.size.block_width) >> mip;
|
||||
block_width = xe::round_up(block_width, 32);
|
||||
}
|
||||
auto src_usage = src.GetMipMemoryUsage(mip, true);
|
||||
auto dst_usage = GetMipMemoryUsage(src, mip);
|
||||
|
||||
uint32_t logical_width = src.size.logical_width >> mip;
|
||||
uint32_t logical_height = src.size.logical_height >> mip;
|
||||
uint32_t input_width = src.size.input_width >> mip;
|
||||
uint32_t input_height = src.size.input_height >> mip;
|
||||
uint32_t mip_width, mip_height;
|
||||
src.GetMipSize(mip, &mip_width, &mip_height);
|
||||
|
||||
// All dimensions must be a multiple of block w/h
|
||||
logical_width = xe::round_up(logical_width, src.format_info()->block_width);
|
||||
logical_height =
|
||||
xe::round_up(logical_height, src.format_info()->block_height);
|
||||
input_width = xe::round_up(input_width, src.format_info()->block_width);
|
||||
input_height = xe::round_up(input_height, src.format_info()->block_height);
|
||||
auto copy_block = GetFormatCopyBlock(src.format);
|
||||
|
||||
if (!src.is_tiled) {
|
||||
uint32_t bytes_per_block = src.format_info()->block_width *
|
||||
src.format_info()->block_height *
|
||||
src.format_info()->bits_per_pixel / 8;
|
||||
uint32_t src_pitch = xe::round_up(block_width * bytes_per_block, 256);
|
||||
uint32_t src_pitch =
|
||||
src_usage.block_pitch * src.format_info()->bytes_per_block();
|
||||
uint32_t dst_pitch =
|
||||
(input_width / src.format_info()->block_width) * bytes_per_block;
|
||||
assert_true(dst_pitch <= src_pitch);
|
||||
dst_usage.block_pitch * GetFormatInfo(src.format)->bytes_per_block();
|
||||
const uint8_t* src_mem = reinterpret_cast<const uint8_t*>(host_address);
|
||||
src_mem += offset_y * src_pitch;
|
||||
src_mem += offset_x * bytes_per_block;
|
||||
for (uint32_t y = 0; y < src.size.block_height; y++) {
|
||||
TextureSwap(src.endianness, dest + y * dst_pitch, src_mem + y * src_pitch,
|
||||
src_mem += offset_x * src.format_info()->bytes_per_block();
|
||||
for (uint32_t y = 0; y < dst_usage.block_height; y++) {
|
||||
copy_block(src.endianness, dest + y * dst_pitch, src_mem + y * src_pitch,
|
||||
dst_pitch);
|
||||
}
|
||||
} else {
|
||||
// Untile image.
|
||||
// We could do this in a shader to speed things up, as this is pretty
|
||||
// slow.
|
||||
// We could do this in a shader to speed things up, as this is pretty slow.
|
||||
|
||||
const uint8_t* src_mem = reinterpret_cast<const uint8_t*>(host_address);
|
||||
TextureInfo::ConvertTiled(dest, src_mem, src.endianness, src.format_info(),
|
||||
offset_x, offset_y, block_width, logical_width,
|
||||
logical_height, input_width);
|
||||
|
||||
texture_conversion::UntileInfo untile_info;
|
||||
std::memset(&untile_info, 0, sizeof(untile_info));
|
||||
untile_info.offset_x = offset_x;
|
||||
untile_info.offset_y = offset_y;
|
||||
untile_info.width = dst_usage.block_pitch;
|
||||
untile_info.height = dst_usage.block_height;
|
||||
untile_info.input_pitch = src_usage.block_pitch;
|
||||
untile_info.output_pitch = dst_usage.block_pitch;
|
||||
untile_info.input_format_info = src.format_info();
|
||||
untile_info.output_format_info = GetFormatInfo(src.format);
|
||||
untile_info.copy_callback = [=](auto o, auto i, auto l) {
|
||||
copy_block(src.endianness, o, i, l);
|
||||
};
|
||||
texture_conversion::Untile(dest, src_mem, &untile_info);
|
||||
}
|
||||
|
||||
copy_region->bufferRowLength = input_width;
|
||||
copy_region->bufferImageHeight = input_height;
|
||||
copy_region->bufferRowLength = dst_usage.pitch;
|
||||
copy_region->bufferImageHeight = dst_usage.height;
|
||||
copy_region->imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, mip, 0, 1};
|
||||
copy_region->imageExtent = {logical_width, logical_height, 1};
|
||||
copy_region->imageExtent = {mip_width, mip_height, 1};
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1004,77 +981,60 @@ bool TextureCache::ConvertTextureCube(uint8_t* dest,
|
|||
uint32_t mip, const TextureInfo& src) {
|
||||
uint32_t offset_x = 0;
|
||||
uint32_t offset_y = 0;
|
||||
uint32_t address =
|
||||
TextureInfo::GetMipLocation(src, mip, &offset_x, &offset_y);
|
||||
uint32_t address = src.GetMipLocation(mip, &offset_x, &offset_y, true);
|
||||
void* host_address = memory_->TranslatePhysical(address);
|
||||
|
||||
// Pitch of the source texture in blocks.
|
||||
uint32_t block_width, block_height, input_block_height;
|
||||
if (mip == 0) {
|
||||
block_width = src.size.block_width;
|
||||
input_block_height = block_height = src.size.block_height;
|
||||
} else {
|
||||
block_width = xe::next_pow2(src.size.block_width) >> mip;
|
||||
block_width = xe::round_up(block_width, 32);
|
||||
block_height = xe::next_pow2(src.size.block_height) >> mip;
|
||||
input_block_height = block_height;
|
||||
block_height = xe::round_up(block_height, 32);
|
||||
}
|
||||
auto src_usage = src.GetMipMemoryUsage(mip, true);
|
||||
auto dst_usage = GetMipMemoryUsage(src, mip);
|
||||
|
||||
uint32_t logical_width = src.size.logical_width >> mip;
|
||||
uint32_t logical_height = src.size.logical_height >> mip;
|
||||
uint32_t input_width = src.size.input_width >> mip;
|
||||
uint32_t input_height = src.size.input_height >> mip;
|
||||
uint32_t src_pitch =
|
||||
src_usage.block_pitch * src.format_info()->bytes_per_block();
|
||||
uint32_t dst_pitch =
|
||||
dst_usage.block_pitch * GetFormatInfo(src.format)->bytes_per_block();
|
||||
|
||||
// All dimensions must be a multiple of block w/h
|
||||
logical_width = xe::round_up(logical_width, src.format_info()->block_width);
|
||||
logical_height =
|
||||
xe::round_up(logical_height, src.format_info()->block_height);
|
||||
input_width = xe::round_up(input_width, src.format_info()->block_width);
|
||||
input_height = xe::round_up(input_height, src.format_info()->block_height);
|
||||
uint32_t mip_width, mip_height;
|
||||
src.GetMipSize(mip, &mip_width, &mip_height);
|
||||
|
||||
auto copy_block = GetFormatCopyBlock(src.format);
|
||||
|
||||
if (!src.is_tiled) {
|
||||
uint32_t bytes_per_block = src.format_info()->block_width *
|
||||
src.format_info()->block_height *
|
||||
src.format_info()->bits_per_pixel / 8;
|
||||
uint32_t src_pitch = xe::round_up(block_width * bytes_per_block, 256);
|
||||
uint32_t dst_pitch =
|
||||
(input_width / src.format_info()->block_width) * bytes_per_block;
|
||||
assert_true(dst_pitch <= src_pitch);
|
||||
const uint8_t* src_mem = reinterpret_cast<const uint8_t*>(host_address);
|
||||
for (int face = 0; face < 6; face++) {
|
||||
src_mem += offset_y * src_pitch;
|
||||
src_mem += offset_x * bytes_per_block;
|
||||
for (uint32_t y = 0; y < block_height; y++) {
|
||||
TextureSwap(src.endianness, dest + y * dst_pitch,
|
||||
src_mem += offset_x * src.format_info()->bytes_per_block();
|
||||
for (uint32_t y = 0; y < dst_usage.block_height; y++) {
|
||||
copy_block(src.endianness, dest + y * dst_pitch,
|
||||
src_mem + y * src_pitch, dst_pitch);
|
||||
}
|
||||
src_mem += src_pitch * block_height;
|
||||
dest += dst_pitch * input_block_height;
|
||||
src_mem += src_pitch * src_usage.block_height;
|
||||
dest += dst_pitch * dst_usage.block_height;
|
||||
}
|
||||
} else {
|
||||
// TODO(benvanik): optimize this inner loop (or work by tiles).
|
||||
uint32_t bytes_per_block = src.format_info()->block_width *
|
||||
src.format_info()->block_height *
|
||||
src.format_info()->bits_per_pixel / 8;
|
||||
uint32_t src_pitch = block_width * bytes_per_block;
|
||||
uint32_t dst_pitch =
|
||||
(input_width / src.format_info()->block_width) * bytes_per_block;
|
||||
assert_true(dst_pitch <= src_pitch);
|
||||
const uint8_t* src_mem = reinterpret_cast<const uint8_t*>(host_address);
|
||||
for (int face = 0; face < 6; face++) {
|
||||
TextureInfo::ConvertTiled(
|
||||
dest, src_mem, src.endianness, src.format_info(), offset_x, offset_y,
|
||||
block_width, logical_width, logical_height, input_width);
|
||||
src_mem += src_pitch * block_height;
|
||||
dest += dst_pitch * input_block_height;
|
||||
texture_conversion::UntileInfo untile_info;
|
||||
std::memset(&untile_info, 0, sizeof(untile_info));
|
||||
untile_info.offset_x = offset_x;
|
||||
untile_info.offset_y = offset_y;
|
||||
untile_info.width = dst_usage.block_pitch;
|
||||
untile_info.height = dst_usage.block_height;
|
||||
untile_info.input_pitch = src_usage.block_pitch;
|
||||
untile_info.output_pitch = dst_usage.block_pitch;
|
||||
untile_info.input_format_info = src.format_info();
|
||||
untile_info.output_format_info = GetFormatInfo(src.format);
|
||||
untile_info.copy_callback = [=](auto o, auto i, auto l) {
|
||||
copy_block(src.endianness, o, i, l);
|
||||
};
|
||||
|
||||
src_mem += src_pitch * src_usage.block_height;
|
||||
dest += dst_pitch * dst_usage.block_height;
|
||||
}
|
||||
}
|
||||
|
||||
copy_region->bufferRowLength = input_width;
|
||||
copy_region->bufferImageHeight = input_height;
|
||||
copy_region->bufferRowLength = dst_usage.pitch;
|
||||
copy_region->bufferImageHeight = dst_usage.height;
|
||||
copy_region->imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, mip, 0, 6};
|
||||
copy_region->imageExtent = {logical_width, logical_height, 1};
|
||||
copy_region->imageExtent = {mip_width, mip_height, 1};
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1102,53 +1062,49 @@ bool TextureCache::UploadTexture(VkCommandBuffer command_buffer,
|
|||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
size_t unpack_length = ComputeTextureStorage(src);
|
||||
|
||||
XELOGGPU(
|
||||
"Uploading texture @ 0x%.8X (%dx%d, length: 0x%.8X, format: %s, dim: %s, "
|
||||
"levels: %d, tiled: %s)",
|
||||
src.guest_address, src.width + 1, src.height + 1, src.input_length,
|
||||
src.guest_address, src.width + 1, src.height + 1, unpack_length,
|
||||
src.format_info()->name, get_dimension_name(src.dimension),
|
||||
src.mip_levels, src.is_tiled ? "yes" : "no");
|
||||
size_t unpack_length;
|
||||
if (!ComputeTextureStorage(&unpack_length, src)) {
|
||||
XELOGW("Failed to compute texture storage");
|
||||
|
||||
if (!unpack_length) {
|
||||
XELOGW("Failed to compute texture storage!");
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t total_unpack_length = unpack_length;
|
||||
for (uint32_t i = 1; i < src.mip_levels; i++) {
|
||||
// Add in more space for mips.
|
||||
total_unpack_length += TextureInfo::GetMipLinearSize(src, i);
|
||||
}
|
||||
|
||||
if (!staging_buffer_.CanAcquire(total_unpack_length)) {
|
||||
if (!staging_buffer_.CanAcquire(unpack_length)) {
|
||||
// Need to have unique memory for every upload for at least one frame. If we
|
||||
// run out of memory, we need to flush all queued upload commands to the
|
||||
// GPU.
|
||||
FlushPendingCommands(command_buffer, completion_fence);
|
||||
|
||||
// Uploads have been flushed. Continue.
|
||||
if (!staging_buffer_.CanAcquire(total_unpack_length)) {
|
||||
if (!staging_buffer_.CanAcquire(unpack_length)) {
|
||||
// The staging buffer isn't big enough to hold this texture.
|
||||
XELOGE(
|
||||
"TextureCache staging buffer is too small! (uploading 0x%.8X bytes)",
|
||||
total_unpack_length);
|
||||
unpack_length);
|
||||
assert_always();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Grab some temporary memory for staging.
|
||||
auto alloc = staging_buffer_.Acquire(total_unpack_length, completion_fence);
|
||||
auto alloc = staging_buffer_.Acquire(unpack_length, completion_fence);
|
||||
assert_not_null(alloc);
|
||||
if (!alloc) {
|
||||
XELOGE("%s: Failed to acquire staging memory", __func__);
|
||||
XELOGE("%s: Failed to acquire staging memory!", __func__);
|
||||
return false;
|
||||
}
|
||||
|
||||
// DEBUG: Check the source address. If it's completely zero'd out, print it.
|
||||
bool valid = false;
|
||||
auto src_data = memory_->TranslatePhysical(src.guest_address);
|
||||
for (uint32_t i = 0; i < src.input_length; i++) {
|
||||
for (uint32_t i = 0; i < unpack_length; i++) {
|
||||
if (src_data[i] != 0) {
|
||||
valid = true;
|
||||
break;
|
||||
|
@ -1175,7 +1131,7 @@ bool TextureCache::UploadTexture(VkCommandBuffer command_buffer,
|
|||
copy_regions[0].imageOffset = {0, 0, 0};
|
||||
|
||||
// Now upload all the MIPs
|
||||
VkDeviceSize buffer_offset = unpack_length;
|
||||
VkDeviceSize buffer_offset = ComputeMipStorage(src, 0);
|
||||
for (uint32_t mip = 1; mip < src.mip_levels; mip++) {
|
||||
uint8_t* dest = reinterpret_cast<uint8_t*>(alloc->host_ptr) + buffer_offset;
|
||||
if (!ConvertTexture(dest, ©_regions[mip], mip, src)) {
|
||||
|
@ -1186,7 +1142,7 @@ bool TextureCache::UploadTexture(VkCommandBuffer command_buffer,
|
|||
copy_regions[mip].imageOffset = {0, 0, 0};
|
||||
|
||||
// With each mip, the length is divided by 4.
|
||||
buffer_offset += TextureInfo::GetMipLinearSize(src, mip);
|
||||
buffer_offset += ComputeMipStorage(src, mip);
|
||||
}
|
||||
|
||||
// Transition the texture into a transfer destination layout.
|
||||
|
@ -1240,30 +1196,81 @@ bool TextureCache::UploadTexture(VkCommandBuffer command_buffer,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool TextureCache::ComputeTextureStorage(size_t* output_length,
|
||||
const TextureInfo& src) {
|
||||
if (src.texture_format == TextureFormat::k_CTX1) {
|
||||
switch (src.dimension) {
|
||||
case Dimension::k1D: {
|
||||
assert_always();
|
||||
} break;
|
||||
case Dimension::k2D: {
|
||||
*output_length = src.size.input_width * src.size.input_height * 2;
|
||||
return true;
|
||||
}
|
||||
case Dimension::k3D: {
|
||||
assert_always();
|
||||
} break;
|
||||
case Dimension::kCube: {
|
||||
*output_length = src.size.input_width * src.size.input_height * 2 * 6;
|
||||
return true;
|
||||
const FormatInfo* TextureCache::GetFormatInfo(TextureFormat format) {
|
||||
switch (format) {
|
||||
case TextureFormat::k_CTX1:
|
||||
return FormatInfo::Get(TextureFormat::k_8_8);
|
||||
case TextureFormat::k_DXT3A:
|
||||
return FormatInfo::Get(TextureFormat::k_DXT2_3);
|
||||
default:
|
||||
return FormatInfo::Get(format);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
||||
texture_conversion::CopyBlockCallback TextureCache::GetFormatCopyBlock(
|
||||
TextureFormat format) {
|
||||
switch (format) {
|
||||
case TextureFormat::k_CTX1:
|
||||
return texture_conversion::ConvertTexelCTX1ToR8G8;
|
||||
case TextureFormat::k_DXT3A:
|
||||
return texture_conversion::ConvertTexelDXT3AToDXT3;
|
||||
default:
|
||||
return texture_conversion::CopySwapBlock;
|
||||
}
|
||||
}
|
||||
|
||||
TextureMemoryUsage TextureCache::GetMipMemoryUsage(const TextureInfo& src,
|
||||
uint32_t mip) {
|
||||
auto format_info = GetFormatInfo(src.format);
|
||||
uint32_t width = src.width + 1;
|
||||
uint32_t height = src.height + 1;
|
||||
uint32_t depth = src.depth + 1;
|
||||
TextureMemoryUsage usage;
|
||||
if (mip == 0) {
|
||||
usage = TextureMemoryUsage::Calculate(format_info, width, height, depth,
|
||||
width, false);
|
||||
} else {
|
||||
*output_length = src.input_length;
|
||||
return true;
|
||||
uint32_t mip_width = xe::next_pow2(width) >> mip;
|
||||
uint32_t mip_height = xe::next_pow2(height) >> mip;
|
||||
usage = TextureMemoryUsage::Calculate(format_info, mip_width, mip_height,
|
||||
depth, mip_width, false);
|
||||
}
|
||||
return usage;
|
||||
}
|
||||
|
||||
uint32_t TextureCache::ComputeMipStorage(const FormatInfo* format_info,
|
||||
uint32_t width, uint32_t height,
|
||||
uint32_t depth, uint32_t mip) {
|
||||
assert_not_null(format_info);
|
||||
TextureMemoryUsage usage;
|
||||
if (mip == 0) {
|
||||
usage = TextureMemoryUsage::Calculate(format_info, width, height, depth,
|
||||
false, false);
|
||||
} else {
|
||||
uint32_t mip_width = xe::next_pow2(width) >> mip;
|
||||
uint32_t mip_height = xe::next_pow2(height) >> mip;
|
||||
usage = TextureMemoryUsage::Calculate(format_info, mip_width, mip_height,
|
||||
depth, false, false);
|
||||
}
|
||||
uint32_t bytes_per_block = format_info->bytes_per_block();
|
||||
return usage.blocks() * bytes_per_block;
|
||||
}
|
||||
|
||||
uint32_t TextureCache::ComputeMipStorage(const TextureInfo& src, uint32_t mip) {
|
||||
return ComputeMipStorage(GetFormatInfo(src.format), src.width + 1,
|
||||
src.height + 1, src.depth + 1, mip);
|
||||
}
|
||||
|
||||
uint32_t TextureCache::ComputeTextureStorage(const TextureInfo& src) {
|
||||
auto format_info = GetFormatInfo(src.format);
|
||||
uint32_t width = src.width + 1;
|
||||
uint32_t height = src.height + 1;
|
||||
uint32_t depth = src.depth + 1;
|
||||
uint32_t length = 0;
|
||||
for (uint32_t mip = 0; mip < src.mip_levels; mip++) {
|
||||
length += ComputeMipStorage(format_info, width, height, depth, mip);
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
void TextureCache::WritebackTexture(Texture* texture) {
|
||||
|
@ -1332,7 +1339,8 @@ void TextureCache::WritebackTexture(Texture* texture) {
|
|||
|
||||
auto dest = memory_->TranslatePhysical(texture->texture_info.guest_address);
|
||||
if (status == VK_SUCCESS) {
|
||||
std::memcpy(dest, alloc->host_ptr, texture->texture_info.input_length);
|
||||
std::memcpy(dest, alloc->host_ptr,
|
||||
texture->texture_info.GetByteSize(false));
|
||||
}
|
||||
|
||||
wb_staging_buffer_.Scavenge();
|
||||
|
@ -1473,7 +1481,7 @@ bool TextureCache::SetupTextureBinding(VkCommandBuffer command_buffer,
|
|||
}
|
||||
|
||||
// Search via the base format.
|
||||
texture_info.texture_format = GetBaseFormat(texture_info.texture_format);
|
||||
texture_info.format = GetBaseFormat(texture_info.format);
|
||||
|
||||
auto texture = Demand(texture_info, command_buffer, completion_fence);
|
||||
auto sampler = Demand(sampler_info);
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/sampler_info.h"
|
||||
#include "xenia/gpu/shader.h"
|
||||
#include "xenia/gpu/texture_conversion.h"
|
||||
#include "xenia/gpu/texture_info.h"
|
||||
#include "xenia/gpu/trace_writer.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
|
||||
|
@ -155,7 +156,17 @@ class TextureCache {
|
|||
uint32_t mip, const TextureInfo& src);
|
||||
bool ConvertTexture(uint8_t* dest, VkBufferImageCopy* copy_region,
|
||||
uint32_t mip, const TextureInfo& src);
|
||||
bool ComputeTextureStorage(size_t* output_length, const TextureInfo& src);
|
||||
|
||||
static const FormatInfo* GetFormatInfo(TextureFormat format);
|
||||
static texture_conversion::CopyBlockCallback GetFormatCopyBlock(
|
||||
TextureFormat format);
|
||||
static TextureMemoryUsage GetMipMemoryUsage(const TextureInfo& src,
|
||||
uint32_t mip);
|
||||
static uint32_t ComputeMipStorage(const FormatInfo* format_info,
|
||||
uint32_t width, uint32_t height,
|
||||
uint32_t depth, uint32_t mip);
|
||||
static uint32_t ComputeMipStorage(const TextureInfo& src, uint32_t mip);
|
||||
static uint32_t ComputeTextureStorage(const TextureInfo& src);
|
||||
|
||||
// Writes a texture back into guest memory. This call is (mostly) asynchronous
|
||||
// but the texture must not be flagged for destruction.
|
||||
|
|
Loading…
Reference in New Issue