parent
4b5f77bde4
commit
b19c98fd9a
|
@ -145,6 +145,15 @@ inline bool bit_scan_forward(int64_t v, uint32_t* out_first_set_index) {
|
||||||
return bit_scan_forward(static_cast<uint64_t>(v), out_first_set_index);
|
return bit_scan_forward(static_cast<uint64_t>(v), out_first_set_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline T log2_floor(T v) {
|
||||||
|
return sizeof(T) * 8 - 1 - lzcnt(v);
|
||||||
|
}
|
||||||
|
template <typename T>
|
||||||
|
inline T log2_ceil(T v) {
|
||||||
|
return sizeof(T) * 8 - lzcnt(v - 1);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline T rotate_left(T v, uint8_t sh) {
|
inline T rotate_left(T v, uint8_t sh) {
|
||||||
return (T(v) << sh) | (T(v) >> ((sizeof(T) * 8) - sh));
|
return (T(v) << sh) | (T(v) >> ((sizeof(T) * 8) - sh));
|
||||||
|
|
|
@ -547,7 +547,6 @@ bool TextureCache::UploadTexture2D(GLuint texture,
|
||||||
glTextureStorage2D(texture, 1, config.internal_format,
|
glTextureStorage2D(texture, 1, config.internal_format,
|
||||||
texture_info.size_2d.output_width,
|
texture_info.size_2d.output_width,
|
||||||
texture_info.size_2d.output_height);
|
texture_info.size_2d.output_height);
|
||||||
assert_true(unpack_length % 4 == 0);
|
|
||||||
|
|
||||||
auto allocation = scratch_buffer_->Acquire(unpack_length);
|
auto allocation = scratch_buffer_->Acquire(unpack_length);
|
||||||
|
|
||||||
|
@ -573,26 +572,34 @@ bool TextureCache::UploadTexture2D(GLuint texture,
|
||||||
} else {
|
} else {
|
||||||
// Untile image.
|
// Untile image.
|
||||||
// We could do this in a shader to speed things up, as this is pretty slow.
|
// We could do this in a shader to speed things up, as this is pretty slow.
|
||||||
|
|
||||||
// TODO(benvanik): optimize this inner loop (or work by tiles).
|
// TODO(benvanik): optimize this inner loop (or work by tiles).
|
||||||
const uint8_t* src = host_address;
|
const uint8_t* src = host_address;
|
||||||
uint8_t* dest = reinterpret_cast<uint8_t*>(allocation.host_ptr);
|
uint8_t* dest = reinterpret_cast<uint8_t*>(allocation.host_ptr);
|
||||||
uint32_t bytes_per_block = texture_info.format_info->block_width *
|
uint32_t bytes_per_block = texture_info.format_info->block_width *
|
||||||
texture_info.format_info->block_height *
|
texture_info.format_info->block_height *
|
||||||
texture_info.format_info->bits_per_pixel / 8;
|
texture_info.format_info->bits_per_pixel / 8;
|
||||||
|
|
||||||
|
// Tiled textures can be packed; get the offset into the packed texture.
|
||||||
|
uint32_t offset_x;
|
||||||
|
uint32_t offset_y;
|
||||||
|
TextureInfo::GetPackedTileOffset(texture_info, &offset_x, &offset_y);
|
||||||
|
|
||||||
auto bpp = (bytes_per_block >> 2) +
|
auto bpp = (bytes_per_block >> 2) +
|
||||||
((bytes_per_block >> 1) >> (bytes_per_block >> 2));
|
((bytes_per_block >> 1) >> (bytes_per_block >> 2));
|
||||||
for (uint32_t y = 0, output_base_offset = 0;
|
for (uint32_t y = 0, output_base_offset = 0;
|
||||||
y < texture_info.size_2d.block_height;
|
y < texture_info.size_2d.block_height;
|
||||||
y++, output_base_offset += texture_info.size_2d.output_pitch) {
|
y++, output_base_offset += texture_info.size_2d.output_pitch) {
|
||||||
auto input_base_offset = TextureInfo::TiledOffset2DOuter(
|
auto input_base_offset = TextureInfo::TiledOffset2DOuter(
|
||||||
y, (texture_info.size_2d.input_width /
|
offset_y + y, (texture_info.size_2d.input_width /
|
||||||
texture_info.format_info->block_width),
|
texture_info.format_info->block_width),
|
||||||
bpp);
|
bpp);
|
||||||
for (uint32_t x = 0, output_offset = output_base_offset;
|
for (uint32_t x = 0, output_offset = output_base_offset;
|
||||||
x < texture_info.size_2d.block_width;
|
x < texture_info.size_2d.block_width;
|
||||||
x++, output_offset += bytes_per_block) {
|
x++, output_offset += bytes_per_block) {
|
||||||
auto input_offset =
|
auto input_offset =
|
||||||
TextureInfo::TiledOffset2DInner(x, y, bpp, input_base_offset) >>
|
TextureInfo::TiledOffset2DInner(offset_x + x, offset_y + y, bpp,
|
||||||
|
input_base_offset) >>
|
||||||
bpp;
|
bpp;
|
||||||
TextureSwap(texture_info.endianness, dest + output_offset,
|
TextureSwap(texture_info.endianness, dest + output_offset,
|
||||||
src + input_offset * bytes_per_block, bytes_per_block);
|
src + input_offset * bytes_per_block, bytes_per_block);
|
||||||
|
|
|
@ -19,7 +19,8 @@ namespace gpu {
|
||||||
using namespace xe::gpu::ucode;
|
using namespace xe::gpu::ucode;
|
||||||
using namespace xe::gpu::xenos;
|
using namespace xe::gpu::xenos;
|
||||||
|
|
||||||
static const FormatInfo format_infos[64] = {
|
const FormatInfo* FormatInfo::Get(uint32_t gpu_format) {
|
||||||
|
static const FormatInfo format_infos[64] = {
|
||||||
{TextureFormat::k_1_REVERSE, FormatType::kUncompressed, 1, 1, 1},
|
{TextureFormat::k_1_REVERSE, FormatType::kUncompressed, 1, 1, 1},
|
||||||
{TextureFormat::k_1, FormatType::kUncompressed, 1, 1, 1},
|
{TextureFormat::k_1, FormatType::kUncompressed, 1, 1, 1},
|
||||||
{TextureFormat::k_8, FormatType::kUncompressed, 1, 1, 8},
|
{TextureFormat::k_8, FormatType::kUncompressed, 1, 1, 8},
|
||||||
|
@ -49,7 +50,8 @@ static const FormatInfo format_infos[64] = {
|
||||||
{TextureFormat::k_16_16_16_16, FormatType::kUncompressed, 1, 1, 64},
|
{TextureFormat::k_16_16_16_16, FormatType::kUncompressed, 1, 1, 64},
|
||||||
{TextureFormat::k_16_EXPAND, FormatType::kUncompressed, 1, 1, 16},
|
{TextureFormat::k_16_EXPAND, FormatType::kUncompressed, 1, 1, 16},
|
||||||
{TextureFormat::k_16_16_EXPAND, FormatType::kUncompressed, 1, 1, 32},
|
{TextureFormat::k_16_16_EXPAND, FormatType::kUncompressed, 1, 1, 32},
|
||||||
{TextureFormat::k_16_16_16_16_EXPAND, FormatType::kUncompressed, 1, 1, 64},
|
{TextureFormat::k_16_16_16_16_EXPAND, FormatType::kUncompressed, 1, 1,
|
||||||
|
64},
|
||||||
{TextureFormat::k_16_FLOAT, FormatType::kUncompressed, 1, 1, 16},
|
{TextureFormat::k_16_FLOAT, FormatType::kUncompressed, 1, 1, 16},
|
||||||
{TextureFormat::k_16_16_FLOAT, FormatType::kUncompressed, 1, 1, 32},
|
{TextureFormat::k_16_16_FLOAT, FormatType::kUncompressed, 1, 1, 32},
|
||||||
{TextureFormat::k_16_16_16_16_FLOAT, FormatType::kUncompressed, 1, 1, 64},
|
{TextureFormat::k_16_16_16_16_FLOAT, FormatType::kUncompressed, 1, 1, 64},
|
||||||
|
@ -58,30 +60,35 @@ static const FormatInfo format_infos[64] = {
|
||||||
{TextureFormat::k_32_32_32_32, FormatType::kUncompressed, 1, 1, 128},
|
{TextureFormat::k_32_32_32_32, FormatType::kUncompressed, 1, 1, 128},
|
||||||
{TextureFormat::k_32_FLOAT, FormatType::kUncompressed, 1, 1, 32},
|
{TextureFormat::k_32_FLOAT, FormatType::kUncompressed, 1, 1, 32},
|
||||||
{TextureFormat::k_32_32_FLOAT, FormatType::kUncompressed, 1, 1, 64},
|
{TextureFormat::k_32_32_FLOAT, FormatType::kUncompressed, 1, 1, 64},
|
||||||
{TextureFormat::k_32_32_32_32_FLOAT, FormatType::kUncompressed, 1, 1, 128},
|
{TextureFormat::k_32_32_32_32_FLOAT, FormatType::kUncompressed, 1, 1,
|
||||||
|
128},
|
||||||
{TextureFormat::k_32_AS_8, FormatType::kCompressed, 4, 1, 8},
|
{TextureFormat::k_32_AS_8, FormatType::kCompressed, 4, 1, 8},
|
||||||
{TextureFormat::k_32_AS_8_8, FormatType::kCompressed, 2, 1, 16},
|
{TextureFormat::k_32_AS_8_8, FormatType::kCompressed, 2, 1, 16},
|
||||||
{TextureFormat::k_16_MPEG, FormatType::kUncompressed, 1, 1, 16},
|
{TextureFormat::k_16_MPEG, FormatType::kUncompressed, 1, 1, 16},
|
||||||
{TextureFormat::k_16_16_MPEG, FormatType::kUncompressed, 1, 1, 32},
|
{TextureFormat::k_16_16_MPEG, FormatType::kUncompressed, 1, 1, 32},
|
||||||
{TextureFormat::k_8_INTERLACED, FormatType::kUncompressed, 1, 1, 8},
|
{TextureFormat::k_8_INTERLACED, FormatType::kUncompressed, 1, 1, 8},
|
||||||
{TextureFormat::k_32_AS_8_INTERLACED, FormatType::kCompressed, 4, 1, 8},
|
{TextureFormat::k_32_AS_8_INTERLACED, FormatType::kCompressed, 4, 1, 8},
|
||||||
{TextureFormat::k_32_AS_8_8_INTERLACED, FormatType::kCompressed, 1, 1, 16},
|
{TextureFormat::k_32_AS_8_8_INTERLACED, FormatType::kCompressed, 1, 1,
|
||||||
|
16},
|
||||||
{TextureFormat::k_16_INTERLACED, FormatType::kUncompressed, 1, 1, 16},
|
{TextureFormat::k_16_INTERLACED, FormatType::kUncompressed, 1, 1, 16},
|
||||||
{TextureFormat::k_16_MPEG_INTERLACED, FormatType::kUncompressed, 1, 1, 16},
|
{TextureFormat::k_16_MPEG_INTERLACED, FormatType::kUncompressed, 1, 1,
|
||||||
|
16},
|
||||||
{TextureFormat::k_16_16_MPEG_INTERLACED, FormatType::kUncompressed, 1, 1,
|
{TextureFormat::k_16_16_MPEG_INTERLACED, FormatType::kUncompressed, 1, 1,
|
||||||
32},
|
32},
|
||||||
{TextureFormat::k_DXN, FormatType::kCompressed, 4, 4, 8},
|
{TextureFormat::k_DXN, FormatType::kCompressed, 4, 4, 8},
|
||||||
{TextureFormat::k_8_8_8_8_AS_16_16_16_16, FormatType::kUncompressed, 1, 1,
|
{TextureFormat::k_8_8_8_8_AS_16_16_16_16, FormatType::kUncompressed, 1, 1,
|
||||||
32},
|
32},
|
||||||
{TextureFormat::k_DXT1_AS_16_16_16_16, FormatType::kCompressed, 4, 4, 4},
|
{TextureFormat::k_DXT1_AS_16_16_16_16, FormatType::kCompressed, 4, 4, 4},
|
||||||
{TextureFormat::k_DXT2_3_AS_16_16_16_16, FormatType::kCompressed, 4, 4, 8},
|
{TextureFormat::k_DXT2_3_AS_16_16_16_16, FormatType::kCompressed, 4, 4,
|
||||||
{TextureFormat::k_DXT4_5_AS_16_16_16_16, FormatType::kCompressed, 4, 4, 8},
|
8},
|
||||||
|
{TextureFormat::k_DXT4_5_AS_16_16_16_16, FormatType::kCompressed, 4, 4,
|
||||||
|
8},
|
||||||
{TextureFormat::k_2_10_10_10_AS_16_16_16_16, FormatType::kUncompressed, 1,
|
{TextureFormat::k_2_10_10_10_AS_16_16_16_16, FormatType::kUncompressed, 1,
|
||||||
1, 32},
|
1, 32},
|
||||||
{TextureFormat::k_10_11_11_AS_16_16_16_16, FormatType::kUncompressed, 1, 1,
|
{TextureFormat::k_10_11_11_AS_16_16_16_16, FormatType::kUncompressed, 1,
|
||||||
32},
|
1, 32},
|
||||||
{TextureFormat::k_11_11_10_AS_16_16_16_16, FormatType::kUncompressed, 1, 1,
|
{TextureFormat::k_11_11_10_AS_16_16_16_16, FormatType::kUncompressed, 1,
|
||||||
32},
|
1, 32},
|
||||||
{TextureFormat::k_32_32_32_FLOAT, FormatType::kUncompressed, 1, 1, 96},
|
{TextureFormat::k_32_32_32_FLOAT, FormatType::kUncompressed, 1, 1, 96},
|
||||||
{TextureFormat::k_DXT3A, FormatType::kCompressed, 4, 4, 4},
|
{TextureFormat::k_DXT3A, FormatType::kCompressed, 4, 4, 4},
|
||||||
{TextureFormat::k_DXT5A, FormatType::kCompressed, 4, 4, 4},
|
{TextureFormat::k_DXT5A, FormatType::kCompressed, 4, 4, 4},
|
||||||
|
@ -89,7 +96,9 @@ static const FormatInfo format_infos[64] = {
|
||||||
{TextureFormat::k_DXT3A_AS_1_1_1_1, FormatType::kCompressed, 4, 4, 4},
|
{TextureFormat::k_DXT3A_AS_1_1_1_1, FormatType::kCompressed, 4, 4, 4},
|
||||||
{TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0},
|
{TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0},
|
||||||
{TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0},
|
{TextureFormat::kUnknown, FormatType::kUncompressed, 0, 0},
|
||||||
};
|
};
|
||||||
|
return &format_infos[gpu_format];
|
||||||
|
}
|
||||||
|
|
||||||
bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
|
bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
|
||||||
TextureInfo* out_info) {
|
TextureInfo* out_info) {
|
||||||
|
@ -118,7 +127,7 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
|
||||||
info.depth = fetch.size_3d.depth;
|
info.depth = fetch.size_3d.depth;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
info.format_info = &format_infos[fetch.format];
|
info.format_info = FormatInfo::Get(fetch.format);
|
||||||
info.endianness = static_cast<Endian>(fetch.endianness);
|
info.endianness = static_cast<Endian>(fetch.endianness);
|
||||||
info.is_tiled = fetch.tiled;
|
info.is_tiled = fetch.tiled;
|
||||||
info.input_length = 0; // Populated below.
|
info.input_length = 0; // Populated below.
|
||||||
|
@ -199,6 +208,62 @@ void TextureInfo::CalculateTextureSizes2D(const xe_gpu_texture_fetch_t& fetch) {
|
||||||
output_length = size_2d.output_pitch * block_height;
|
output_length = size_2d.output_pitch * block_height;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TextureInfo::GetPackedTileOffset(const TextureInfo& texture_info,
|
||||||
|
uint32_t* out_offset_x,
|
||||||
|
uint32_t* out_offset_y) {
|
||||||
|
// Tile size is 32x32, and once textures go <=16 they are packed into a
|
||||||
|
// single tile together. The math here is insane. Most sourced
|
||||||
|
// from graph paper and looking at dds dumps.
|
||||||
|
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
||||||
|
// 0 +.4x4.+ +.....8x8.....+ +............16x16............+
|
||||||
|
// 1 +.4x4.+ +.....8x8.....+ +............16x16............+
|
||||||
|
// 2 +.4x4.+ +.....8x8.....+ +............16x16............+
|
||||||
|
// 3 +.4x4.+ +.....8x8.....+ +............16x16............+
|
||||||
|
// 4 x +.....8x8.....+ +............16x16............+
|
||||||
|
// 5 +.....8x8.....+ +............16x16............+
|
||||||
|
// 6 +.....8x8.....+ +............16x16............+
|
||||||
|
// 7 +.....8x8.....+ +............16x16............+
|
||||||
|
// 8 2x2 +............16x16............+
|
||||||
|
// 9 2x2 +............16x16............+
|
||||||
|
// 0 +............16x16............+
|
||||||
|
// ... .....
|
||||||
|
// This only works for square textures, or textures that are some non-pot
|
||||||
|
// <= square. As soon as the aspect ratio goes weird, the textures start to
|
||||||
|
// stretch across tiles.
|
||||||
|
// if (tile_aligned(w) > tile_aligned(h)) {
|
||||||
|
// // wider than tall, so packed horizontally
|
||||||
|
// } else if (tile_aligned(w) < tile_aligned(h)) {
|
||||||
|
// // taller than wide, so packed vertically
|
||||||
|
// } else {
|
||||||
|
// square
|
||||||
|
// }
|
||||||
|
// It's important to use logical sizes here, as the input sizes will be
|
||||||
|
// for the entire packed tile set, not the actual texture.
|
||||||
|
// The minimum dimension is what matters most: if either width or height
|
||||||
|
// is <= 16 this mode kicks in.
|
||||||
|
|
||||||
|
if (std::min(texture_info.size_2d.logical_width,
|
||||||
|
texture_info.size_2d.logical_height) > 16) {
|
||||||
|
// Too big, not packed.
|
||||||
|
*out_offset_x = 0;
|
||||||
|
*out_offset_y = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (poly::log2_ceil(texture_info.size_2d.logical_width) >
|
||||||
|
poly::log2_ceil(texture_info.size_2d.logical_height)) {
|
||||||
|
// Wider than tall. Laid out vertically.
|
||||||
|
*out_offset_x = 0;
|
||||||
|
*out_offset_y = 16;
|
||||||
|
} else {
|
||||||
|
// Taller than wide. Laid out horizontally.
|
||||||
|
*out_offset_x = 16;
|
||||||
|
*out_offset_y = 0;
|
||||||
|
}
|
||||||
|
*out_offset_x /= texture_info.format_info->block_width;
|
||||||
|
*out_offset_y /= texture_info.format_info->block_height;
|
||||||
|
}
|
||||||
|
|
||||||
// https://code.google.com/p/crunch/source/browse/trunk/inc/crn_decomp.h#4104
|
// https://code.google.com/p/crunch/source/browse/trunk/inc/crn_decomp.h#4104
|
||||||
uint32_t TextureInfo::TiledOffset2DOuter(uint32_t y, uint32_t width,
|
uint32_t TextureInfo::TiledOffset2DOuter(uint32_t y, uint32_t width,
|
||||||
uint32_t log_bpp) {
|
uint32_t log_bpp) {
|
||||||
|
|
|
@ -95,6 +95,8 @@ struct FormatInfo {
|
||||||
uint32_t block_width;
|
uint32_t block_width;
|
||||||
uint32_t block_height;
|
uint32_t block_height;
|
||||||
uint32_t bits_per_pixel;
|
uint32_t bits_per_pixel;
|
||||||
|
|
||||||
|
static const FormatInfo* Get(uint32_t gpu_format);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TextureInfo {
|
struct TextureInfo {
|
||||||
|
@ -139,6 +141,9 @@ struct TextureInfo {
|
||||||
static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
|
static bool Prepare(const xenos::xe_gpu_texture_fetch_t& fetch,
|
||||||
TextureInfo* out_info);
|
TextureInfo* out_info);
|
||||||
|
|
||||||
|
static void GetPackedTileOffset(const TextureInfo& texture_info,
|
||||||
|
uint32_t* out_offset_x,
|
||||||
|
uint32_t* out_offset_y);
|
||||||
static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width,
|
static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width,
|
||||||
uint32_t log_bpp);
|
uint32_t log_bpp);
|
||||||
static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
|
static uint32_t TiledOffset2DInner(uint32_t x, uint32_t y, uint32_t bpp,
|
||||||
|
|
|
@ -318,7 +318,10 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, {
|
||||||
uint32_t mip_filter : 2;
|
uint32_t mip_filter : 2;
|
||||||
uint32_t unk3_2 : 6;
|
uint32_t unk3_2 : 6;
|
||||||
uint32_t border : 1;
|
uint32_t border : 1;
|
||||||
uint32_t unk4; // dword_4
|
uint32_t unk4_0 : 2; // dword_4
|
||||||
|
uint32_t mip_min_level : 4;
|
||||||
|
uint32_t mip_max_level : 4;
|
||||||
|
uint32_t unk4_1 : 22;
|
||||||
uint32_t unk5 : 9; // dword_5
|
uint32_t unk5 : 9; // dword_5
|
||||||
uint32_t dimension : 2;
|
uint32_t dimension : 2;
|
||||||
uint32_t unk5b : 21;
|
uint32_t unk5b : 21;
|
||||||
|
|
Loading…
Reference in New Issue