Fixing small textures. This was painful.

Fixes #173.
This commit is contained in:
Ben Vanik 2015-03-05 22:22:34 -08:00
parent 4b5f77bde4
commit b19c98fd9a
5 changed files with 168 additions and 79 deletions

View File

@ -145,6 +145,15 @@ inline bool bit_scan_forward(int64_t v, uint32_t* out_first_set_index) {
return bit_scan_forward(static_cast<uint64_t>(v), out_first_set_index);
}
template <typename T>
inline T log2_floor(T v) {
return sizeof(T) * 8 - 1 - lzcnt(v);
}
template <typename T>
inline T log2_ceil(T v) {
return sizeof(T) * 8 - lzcnt(v - 1);
}
template <typename T>
inline T rotate_left(T v, uint8_t sh) {
return (T(v) << sh) | (T(v) >> ((sizeof(T) * 8) - sh));

View File

@ -547,7 +547,6 @@ bool TextureCache::UploadTexture2D(GLuint texture,
glTextureStorage2D(texture, 1, config.internal_format,
texture_info.size_2d.output_width,
texture_info.size_2d.output_height);
assert_true(unpack_length % 4 == 0);
auto allocation = scratch_buffer_->Acquire(unpack_length);
@ -573,26 +572,34 @@ bool TextureCache::UploadTexture2D(GLuint texture,
} else {
// Untile image.
// We could do this in a shader to speed things up, as this is pretty slow.
// TODO(benvanik): optimize this inner loop (or work by tiles).
const uint8_t* src = host_address;
uint8_t* dest = reinterpret_cast<uint8_t*>(allocation.host_ptr);
uint32_t bytes_per_block = texture_info.format_info->block_width *
texture_info.format_info->block_height *
texture_info.format_info->bits_per_pixel / 8;
// Tiled textures can be packed; get the offset into the packed texture.
uint32_t offset_x;
uint32_t offset_y;
TextureInfo::GetPackedTileOffset(texture_info, &offset_x, &offset_y);
auto bpp = (bytes_per_block >> 2) +
((bytes_per_block >> 1) >> (bytes_per_block >> 2));
for (uint32_t y = 0, output_base_offset = 0;
y < texture_info.size_2d.block_height;
y++, output_base_offset += texture_info.size_2d.output_pitch) {
auto input_base_offset = TextureInfo::TiledOffset2DOuter(
y, (texture_info.size_2d.input_width /
texture_info.format_info->block_width),
offset_y + y, (texture_info.size_2d.input_width /
texture_info.format_info->block_width),
bpp);
for (uint32_t x = 0, output_offset = output_base_offset;
x < texture_info.size_2d.block_width;
x++, output_offset += bytes_per_block) {
auto input_offset =
TextureInfo::TiledOffset2DInner(x, y, bpp, input_base_offset) >>
TextureInfo::TiledOffset2DInner(offset_x + x, offset_y + y, bpp,
input_base_offset) >>
bpp;
TextureSwap(texture_info.endianness, dest + output_offset,
src + input_offset * bytes_per_block, bytes_per_block);

View File

@ -19,77 +19,86 @@ namespace gpu {
using namespace xe::gpu::ucode;
using namespace xe::gpu::xenos;
static const FormatInfo format_infos[64] = {
{TextureFormat::k_1_REVERSE, FormatType::kUncompressed, 1, 1, 1},
{TextureFormat::k_1, FormatType::kUncompressed, 1, 1, 1},
{TextureFormat::k_8, FormatType::kUncompressed, 1, 1, 8},
{TextureFormat::k_1_5_5_5, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_5_6_5, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_6_5_5, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_8_8_8_8, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_2_10_10_10, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_8_A, FormatType::kUncompressed, 1, 1, 8},
{TextureFormat::k_8_B, FormatType::kUncompressed, 1, 1, 8},
{TextureFormat::k_8_8, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_Cr_Y1_Cb_Y0, FormatType::kCompressed, 2, 1, 16},
{TextureFormat::k_Y1_Cr_Y0_Cb, FormatType::kCompressed, 2, 1, 16},
{TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0},
{TextureFormat::k_8_8_8_8_A, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_4_4_4_4, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_10_11_11, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_11_11_10, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_DXT1, FormatType::kCompressed, 4, 4, 4},
{TextureFormat::k_DXT2_3, FormatType::kCompressed, 4, 4, 8},
{TextureFormat::k_DXT4_5, FormatType::kCompressed, 4, 4, 8},
{TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0},
{TextureFormat::k_24_8, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_24_8_FLOAT, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_16, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_16_16, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_16_16_16_16, FormatType::kUncompressed, 1, 1, 64},
{TextureFormat::k_16_EXPAND, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_16_16_EXPAND, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_16_16_16_16_EXPAND, FormatType::kUncompressed, 1, 1, 64},
{TextureFormat::k_16_FLOAT, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_16_16_FLOAT, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_16_16_16_16_FLOAT, FormatType::kUncompressed, 1, 1, 64},
{TextureFormat::k_32, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_32_32, FormatType::kUncompressed, 1, 1, 64},
{TextureFormat::k_32_32_32_32, FormatType::kUncompressed, 1, 1, 128},
{TextureFormat::k_32_FLOAT, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_32_32_FLOAT, FormatType::kUncompressed, 1, 1, 64},
{TextureFormat::k_32_32_32_32_FLOAT, FormatType::kUncompressed, 1, 1, 128},
{TextureFormat::k_32_AS_8, FormatType::kCompressed, 4, 1, 8},
{TextureFormat::k_32_AS_8_8, FormatType::kCompressed, 2, 1, 16},
{TextureFormat::k_16_MPEG, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_16_16_MPEG, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_8_INTERLACED, FormatType::kUncompressed, 1, 1, 8},
{TextureFormat::k_32_AS_8_INTERLACED, FormatType::kCompressed, 4, 1, 8},
{TextureFormat::k_32_AS_8_8_INTERLACED, FormatType::kCompressed, 1, 1, 16},
{TextureFormat::k_16_INTERLACED, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_16_MPEG_INTERLACED, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_16_16_MPEG_INTERLACED, FormatType::kUncompressed, 1, 1,
32},
{TextureFormat::k_DXN, FormatType::kCompressed, 4, 4, 8},
{TextureFormat::k_8_8_8_8_AS_16_16_16_16, FormatType::kUncompressed, 1, 1,
32},
{TextureFormat::k_DXT1_AS_16_16_16_16, FormatType::kCompressed, 4, 4, 4},
{TextureFormat::k_DXT2_3_AS_16_16_16_16, FormatType::kCompressed, 4, 4, 8},
{TextureFormat::k_DXT4_5_AS_16_16_16_16, FormatType::kCompressed, 4, 4, 8},
{TextureFormat::k_2_10_10_10_AS_16_16_16_16, FormatType::kUncompressed, 1,
1, 32},
{TextureFormat::k_10_11_11_AS_16_16_16_16, FormatType::kUncompressed, 1, 1,
32},
{TextureFormat::k_11_11_10_AS_16_16_16_16, FormatType::kUncompressed, 1, 1,
32},
{TextureFormat::k_32_32_32_FLOAT, FormatType::kUncompressed, 1, 1, 96},
{TextureFormat::k_DXT3A, FormatType::kCompressed, 4, 4, 4},
{TextureFormat::k_DXT5A, FormatType::kCompressed, 4, 4, 4},
{TextureFormat::k_CTX1, FormatType::kCompressed, 4, 4, 4},
{TextureFormat::k_DXT3A_AS_1_1_1_1, FormatType::kCompressed, 4, 4, 4},
{TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0},
{TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0},
};
const FormatInfo* FormatInfo::Get(uint32_t gpu_format) {
static const FormatInfo format_infos[64] = {
{TextureFormat::k_1_REVERSE, FormatType::kUncompressed, 1, 1, 1},
{TextureFormat::k_1, FormatType::kUncompressed, 1, 1, 1},
{TextureFormat::k_8, FormatType::kUncompressed, 1, 1, 8},
{TextureFormat::k_1_5_5_5, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_5_6_5, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_6_5_5, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_8_8_8_8, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_2_10_10_10, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_8_A, FormatType::kUncompressed, 1, 1, 8},
{TextureFormat::k_8_B, FormatType::kUncompressed, 1, 1, 8},
{TextureFormat::k_8_8, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_Cr_Y1_Cb_Y0, FormatType::kCompressed, 2, 1, 16},
{TextureFormat::k_Y1_Cr_Y0_Cb, FormatType::kCompressed, 2, 1, 16},
{TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0},
{TextureFormat::k_8_8_8_8_A, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_4_4_4_4, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_10_11_11, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_11_11_10, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_DXT1, FormatType::kCompressed, 4, 4, 4},
{TextureFormat::k_DXT2_3, FormatType::kCompressed, 4, 4, 8},
{TextureFormat::k_DXT4_5, FormatType::kCompressed, 4, 4, 8},
{TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0},
{TextureFormat::k_24_8, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_24_8_FLOAT, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_16, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_16_16, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_16_16_16_16, FormatType::kUncompressed, 1, 1, 64},
{TextureFormat::k_16_EXPAND, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_16_16_EXPAND, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_16_16_16_16_EXPAND, FormatType::kUncompressed, 1, 1,
64},
{TextureFormat::k_16_FLOAT, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_16_16_FLOAT, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_16_16_16_16_FLOAT, FormatType::kUncompressed, 1, 1, 64},
{TextureFormat::k_32, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_32_32, FormatType::kUncompressed, 1, 1, 64},
{TextureFormat::k_32_32_32_32, FormatType::kUncompressed, 1, 1, 128},
{TextureFormat::k_32_FLOAT, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_32_32_FLOAT, FormatType::kUncompressed, 1, 1, 64},
{TextureFormat::k_32_32_32_32_FLOAT, FormatType::kUncompressed, 1, 1,
128},
{TextureFormat::k_32_AS_8, FormatType::kCompressed, 4, 1, 8},
{TextureFormat::k_32_AS_8_8, FormatType::kCompressed, 2, 1, 16},
{TextureFormat::k_16_MPEG, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_16_16_MPEG, FormatType::kUncompressed, 1, 1, 32},
{TextureFormat::k_8_INTERLACED, FormatType::kUncompressed, 1, 1, 8},
{TextureFormat::k_32_AS_8_INTERLACED, FormatType::kCompressed, 4, 1, 8},
{TextureFormat::k_32_AS_8_8_INTERLACED, FormatType::kCompressed, 1, 1,
16},
{TextureFormat::k_16_INTERLACED, FormatType::kUncompressed, 1, 1, 16},
{TextureFormat::k_16_MPEG_INTERLACED, FormatType::kUncompressed, 1, 1,
16},
{TextureFormat::k_16_16_MPEG_INTERLACED, FormatType::kUncompressed, 1, 1,
32},
{TextureFormat::k_DXN, FormatType::kCompressed, 4, 4, 8},
{TextureFormat::k_8_8_8_8_AS_16_16_16_16, FormatType::kUncompressed, 1, 1,
32},
{TextureFormat::k_DXT1_AS_16_16_16_16, FormatType::kCompressed, 4, 4, 4},
{TextureFormat::k_DXT2_3_AS_16_16_16_16, FormatType::kCompressed, 4, 4,
8},
{TextureFormat::k_DXT4_5_AS_16_16_16_16, FormatType::kCompressed, 4, 4,
8},
{TextureFormat::k_2_10_10_10_AS_16_16_16_16, FormatType::kUncompressed, 1,
1, 32},
{TextureFormat::k_10_11_11_AS_16_16_16_16, FormatType::kUncompressed, 1,
1, 32},
{TextureFormat::k_11_11_10_AS_16_16_16_16, FormatType::kUncompressed, 1,
1, 32},
{TextureFormat::k_32_32_32_FLOAT, FormatType::kUncompressed, 1, 1, 96},
{TextureFormat::k_DXT3A, FormatType::kCompressed, 4, 4, 4},
{TextureFormat::k_DXT5A, FormatType::kCompressed, 4, 4, 4},
{TextureFormat::k_CTX1, FormatType::kCompressed, 4, 4, 4},
{TextureFormat::k_DXT3A_AS_1_1_1_1, FormatType::kCompressed, 4, 4, 4},
{TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0},
{TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0},
};
return &format_infos[gpu_format];
}
bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
TextureInfo* out_info) {
@ -118,7 +127,7 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
info.depth = fetch.size_3d.depth;
break;
}
info.format_info = &format_infos[fetch.format];
info.format_info = FormatInfo::Get(fetch.format);
info.endianness = static_cast<Endian>(fetch.endianness);
info.is_tiled = fetch.tiled;
info.input_length = 0; // Populated below.
@ -199,6 +208,62 @@ void TextureInfo::CalculateTextureSizes2D(const xe_gpu_texture_fetch_t& fetch) {
output_length = size_2d.output_pitch * block_height;
}
void TextureInfo::GetPackedTileOffset(const TextureInfo& texture_info,
uint32_t* out_offset_x,
uint32_t* out_offset_y) {
// Tile size is 32x32, and once textures go <=16 they are packed into a
// single tile together. The math here is insane. Most sourced
// from graph paper and looking at dds dumps.
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// 0 +.4x4.+ +.....8x8.....+ +............16x16............+
// 1 +.4x4.+ +.....8x8.....+ +............16x16............+
// 2 +.4x4.+ +.....8x8.....+ +............16x16............+
// 3 +.4x4.+ +.....8x8.....+ +............16x16............+
// 4 x +.....8x8.....+ +............16x16............+
// 5 +.....8x8.....+ +............16x16............+
// 6 +.....8x8.....+ +............16x16............+
// 7 +.....8x8.....+ +............16x16............+
// 8 2x2 +............16x16............+
// 9 2x2 +............16x16............+
// 0 +............16x16............+
// ... .....
// This only works for square textures, or textures that are some non-pot
// <= square. As soon as the aspect ratio goes weird, the textures start to
// stretch across tiles.
// if (tile_aligned(w) > tile_aligned(h)) {
// // wider than tall, so packed horizontally
// } else if (tile_aligned(w) < tile_aligned(h)) {
// // taller than wide, so packed vertically
// } else {
// square
// }
// It's important to use logical sizes here, as the input sizes will be
// for the entire packed tile set, not the actual texture.
// The minimum dimension is what matters most: if either width or height
// is <= 16 this mode kicks in.
if (std::min(texture_info.size_2d.logical_width,
texture_info.size_2d.logical_height) > 16) {
// Too big, not packed.
*out_offset_x = 0;
*out_offset_y = 0;
return;
}
if (poly::log2_ceil(texture_info.size_2d.logical_width) >
poly::log2_ceil(texture_info.size_2d.logical_height)) {
// Wider than tall. Laid out vertically.
*out_offset_x = 0;
*out_offset_y = 16;
} else {
// Taller than wide. Laid out horizontally.
*out_offset_x = 16;
*out_offset_y = 0;
}
*out_offset_x /= texture_info.format_info->block_width;
*out_offset_y /= texture_info.format_info->block_height;
}
// https://code.google.com/p/crunch/source/browse/trunk/inc/crn_decomp.h#4104
uint32_t TextureInfo::TiledOffset2DOuter(uint32_t y, uint32_t width,
uint32_t log_bpp) {

View File

@ -95,6 +95,8 @@ struct FormatInfo {
uint32_t block_width;
uint32_t block_height;
uint32_t bits_per_pixel;
static const FormatInfo* Get(uint32_t gpu_format);
};
struct TextureInfo {
@ -139,6 +141,9 @@ struct TextureInfo {
static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
TextureInfo* out_info);
static void GetPackedTileOffset(const TextureInfo& texture_info,
uint32_t* out_offset_x,
uint32_t* out_offset_y);
static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width,
uint32_t log_bpp);
static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,

View File

@ -318,9 +318,12 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, {
uint32_t mip_filter : 2;
uint32_t unk3_2 : 6;
uint32_t border : 1;
uint32_t unk4; // dword_4
uint32_t unk5 : 9; // dword_5
uint32_t dimension : 2;
uint32_t unk4_0 : 2; // dword_4
uint32_t mip_min_level : 4;
uint32_t mip_max_level : 4;
uint32_t unk4_1 : 22;
uint32_t unk5 : 9; // dword_5
uint32_t dimension : 2;
uint32_t unk5b : 21;
});
XEPACKEDSTRUCTANONYMOUS({