Vulkan: Refactor CTX1 conversion
This commit is contained in:
parent
1eac03a11c
commit
7e819a4ccb
|
@ -363,6 +363,12 @@ TextureCache::Texture* TextureCache::DemandResolveTexture(
|
||||||
|
|
||||||
// No texture at this location. Make a new one.
|
// No texture at this location. Make a new one.
|
||||||
auto texture = AllocateTexture(texture_info, required_flags);
|
auto texture = AllocateTexture(texture_info, required_flags);
|
||||||
|
if (!texture) {
|
||||||
|
// Failed to allocate texture (out of memory?)
|
||||||
|
assert_always();
|
||||||
|
XELOGE("Vulkan Texture Cache: Failed to allocate texture!");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
// Setup a debug name for the texture.
|
// Setup a debug name for the texture.
|
||||||
device_->DbgSetObjectName(
|
device_->DbgSetObjectName(
|
||||||
|
@ -427,6 +433,7 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info,
|
||||||
if (!texture) {
|
if (!texture) {
|
||||||
// Failed to allocate texture (out of memory?)
|
// Failed to allocate texture (out of memory?)
|
||||||
assert_always();
|
assert_always();
|
||||||
|
XELOGE("Vulkan Texture Cache: Failed to allocate texture!");
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -843,45 +850,8 @@ void TextureCache::FlushPendingCommands(VkCommandBuffer command_buffer,
|
||||||
vkBeginCommandBuffer(command_buffer, &begin_info);
|
vkBeginCommandBuffer(command_buffer, &begin_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TextureCache::ConvertTexture2D(uint8_t* dest,
|
void TextureCache::ConvertTexelCTX1(uint8_t* dest, size_t dest_pitch,
|
||||||
VkBufferImageCopy* copy_region,
|
const uint8_t* src, Endian src_endianness) {
|
||||||
const TextureInfo& src) {
|
|
||||||
void* host_address = memory_->TranslatePhysical(src.guest_address);
|
|
||||||
if (src.texture_format == TextureFormat::k_CTX1) {
|
|
||||||
if (!src.is_tiled) {
|
|
||||||
assert_always();
|
|
||||||
} else {
|
|
||||||
// Untile image.
|
|
||||||
// We could do this in a shader to speed things up, as this is pretty
|
|
||||||
// slow.
|
|
||||||
|
|
||||||
// TODO(benvanik): optimize this inner loop (or work by tiles).
|
|
||||||
const uint8_t* src_mem = reinterpret_cast<const uint8_t*>(host_address);
|
|
||||||
const uint32_t bytes_per_block = 8;
|
|
||||||
|
|
||||||
// Tiled textures can be packed; get the offset into the packed texture.
|
|
||||||
uint32_t offset_x;
|
|
||||||
uint32_t offset_y;
|
|
||||||
TextureInfo::GetPackedTileOffset(src, &offset_x, &offset_y);
|
|
||||||
auto log2_bpp = (bytes_per_block >> 2) +
|
|
||||||
((bytes_per_block >> 1) >> (bytes_per_block >> 2));
|
|
||||||
|
|
||||||
uint32_t output_pitch = src.size_2d.input_width * 2;
|
|
||||||
// Offset to the current row, in bytes.
|
|
||||||
uint32_t output_row_offset = 0;
|
|
||||||
for (uint32_t y = 0; y < src.size_2d.block_height; y++) {
|
|
||||||
auto input_row_offset = TextureInfo::TiledOffset2DOuter(
|
|
||||||
offset_y + y, src.size_2d.block_width, log2_bpp);
|
|
||||||
|
|
||||||
// Go block-by-block on this row.
|
|
||||||
uint32_t output_offset = output_row_offset;
|
|
||||||
for (uint32_t x = 0; x < src.size_2d.block_width;
|
|
||||||
x++, output_offset += 8) {
|
|
||||||
auto input_offset =
|
|
||||||
TextureInfo::TiledOffset2DInner(offset_x + x, offset_y + y,
|
|
||||||
log2_bpp, input_row_offset) >>
|
|
||||||
log2_bpp;
|
|
||||||
|
|
||||||
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
|
// http://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
|
||||||
union {
|
union {
|
||||||
uint8_t data[8];
|
uint8_t data[8];
|
||||||
|
@ -892,70 +862,31 @@ bool TextureCache::ConvertTexture2D(uint8_t* dest,
|
||||||
} block;
|
} block;
|
||||||
static_assert(sizeof(block) == 8, "CTX1 block mismatch");
|
static_assert(sizeof(block) == 8, "CTX1 block mismatch");
|
||||||
|
|
||||||
TextureSwap(src.endianness, block.data,
|
const uint32_t bytes_per_block = 8;
|
||||||
src_mem + input_offset * bytes_per_block,
|
TextureSwap(src_endianness, block.data, src, bytes_per_block);
|
||||||
bytes_per_block);
|
|
||||||
|
|
||||||
uint8_t cr[4] = {
|
uint8_t cr[4] = {
|
||||||
block.r0, block.r1,
|
block.r0, block.r1,
|
||||||
static_cast<uint8_t>(2.f / 3.f * block.r0 + 1.f / 3.f * block.r1),
|
static_cast<uint8_t>(2.f / 3.f * block.r0 + 1.f / 3.f * block.r1),
|
||||||
static_cast<uint8_t>(1.f / 3.f * block.r0 +
|
static_cast<uint8_t>(1.f / 3.f * block.r0 + 2.f / 3.f * block.r1)};
|
||||||
2.f / 3.f * block.r1)};
|
|
||||||
uint8_t cg[4] = {
|
uint8_t cg[4] = {
|
||||||
block.g0, block.g1,
|
block.g0, block.g1,
|
||||||
static_cast<uint8_t>(2.f / 3.f * block.g0 + 1.f / 3.f * block.g1),
|
static_cast<uint8_t>(2.f / 3.f * block.g0 + 1.f / 3.f * block.g1),
|
||||||
static_cast<uint8_t>(1.f / 3.f * block.g0 +
|
static_cast<uint8_t>(1.f / 3.f * block.g0 + 2.f / 3.f * block.g1)};
|
||||||
2.f / 3.f * block.g1)};
|
|
||||||
|
|
||||||
for (uint32_t oy = 0; oy < 4; ++oy) {
|
for (uint32_t oy = 0; oy < 4; ++oy) {
|
||||||
for (uint32_t ox = 0; ox < 4; ++ox) {
|
for (uint32_t ox = 0; ox < 4; ++ox) {
|
||||||
uint8_t xx = (block.xx >> (((ox + (oy * 4)) * 2))) & 3;
|
uint8_t xx = (block.xx >> (((ox + (oy * 4)) * 2))) & 3;
|
||||||
dest[output_offset + (oy * output_pitch) + (ox * 2) + 0] = cr[xx];
|
dest[(oy * dest_pitch) + (ox * 2) + 0] = cr[xx];
|
||||||
dest[output_offset + (oy * output_pitch) + (ox * 2) + 1] = cg[xx];
|
dest[(oy * dest_pitch) + (ox * 2) + 1] = cg[xx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
output_row_offset += output_pitch * 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if 0
|
bool TextureCache::ConvertTexture2D(uint8_t* dest,
|
||||||
static int dds_counter = 0;
|
VkBufferImageCopy* copy_region,
|
||||||
uint8_t dds_header[] = {
|
const TextureInfo& src) {
|
||||||
0x44, 0x44, 0x53, 0x20, 0x7C, 0x00, 0x00, 0x00, 0x07, 0x10, 0x00,
|
void* host_address = memory_->TranslatePhysical(src.guest_address);
|
||||||
0x00, 0x58, 0x02, 0x00, 0x00, 0x20, 0x03, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20,
|
|
||||||
0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x20, 0x00, 0x00, 0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x00,
|
|
||||||
0x00, 0x00, 0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
|
|
||||||
*((uint32_t*)(&dds_header[12])) = src.size_2d.input_height;
|
|
||||||
*((uint32_t*)(&dds_header[16])) = src.size_2d.input_width;
|
|
||||||
|
|
||||||
char dds_name[512];
|
|
||||||
sprintf(dds_name, "TEST_CTX1_%u.dds", ++dds_counter);
|
|
||||||
auto handle = fopen(dds_name, "wb");
|
|
||||||
fwrite(dds_header, sizeof(dds_header), 1, handle);
|
|
||||||
uint8_t dummy[2] = {0, 0};
|
|
||||||
for (uint32_t i = 0;
|
|
||||||
i < src.size_2d.input_width * src.size_2d.input_height * 2; i += 2) {
|
|
||||||
fwrite(&dest[i], 2, 1, handle);
|
|
||||||
fwrite(dummy, 2, 1, handle);
|
|
||||||
}
|
|
||||||
fclose(handle);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
copy_region->bufferRowLength = src.size_2d.input_width;
|
|
||||||
copy_region->bufferImageHeight = src.size_2d.input_height;
|
|
||||||
copy_region->imageExtent = {src.size_2d.logical_width,
|
|
||||||
src.size_2d.logical_height, 1};
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (!src.is_tiled) {
|
if (!src.is_tiled) {
|
||||||
uint32_t offset_x, offset_y;
|
uint32_t offset_x, offset_y;
|
||||||
if (src.has_packed_mips &&
|
if (src.has_packed_mips &&
|
||||||
|
@ -999,6 +930,16 @@ bool TextureCache::ConvertTexture2D(uint8_t* dest,
|
||||||
src.format_info()->block_height *
|
src.format_info()->block_height *
|
||||||
src.format_info()->bits_per_pixel / 8;
|
src.format_info()->bits_per_pixel / 8;
|
||||||
|
|
||||||
|
uint32_t output_pitch = src.size_2d.input_width *
|
||||||
|
src.format_info()->block_width *
|
||||||
|
src.format_info()->bits_per_pixel / 8;
|
||||||
|
|
||||||
|
uint32_t output_row_height = 1;
|
||||||
|
if (src.texture_format == TextureFormat::k_CTX1) {
|
||||||
|
// TODO: Can we calculate this?
|
||||||
|
output_row_height = 4;
|
||||||
|
}
|
||||||
|
|
||||||
// Tiled textures can be packed; get the offset into the packed texture.
|
// Tiled textures can be packed; get the offset into the packed texture.
|
||||||
uint32_t offset_x;
|
uint32_t offset_x;
|
||||||
uint32_t offset_y;
|
uint32_t offset_y;
|
||||||
|
@ -1015,19 +956,25 @@ bool TextureCache::ConvertTexture2D(uint8_t* dest,
|
||||||
// Go block-by-block on this row.
|
// Go block-by-block on this row.
|
||||||
uint32_t output_offset = output_row_offset;
|
uint32_t output_offset = output_row_offset;
|
||||||
for (uint32_t x = 0; x < src.size_2d.block_width; x++) {
|
for (uint32_t x = 0; x < src.size_2d.block_width; x++) {
|
||||||
auto input_offset =
|
auto input_offset = TextureInfo::TiledOffset2DInner(
|
||||||
TextureInfo::TiledOffset2DInner(offset_x + x, offset_y + y,
|
offset_x + x, offset_y + y, log2_bpp, input_row_offset);
|
||||||
log2_bpp, input_row_offset) >>
|
input_offset >>= log2_bpp;
|
||||||
log2_bpp;
|
|
||||||
|
|
||||||
|
if (src.texture_format == TextureFormat::k_CTX1) {
|
||||||
|
// Convert to R8G8.
|
||||||
|
ConvertTexelCTX1(&dest[output_offset], output_pitch, src_mem,
|
||||||
|
src.endianness);
|
||||||
|
} else {
|
||||||
|
// Generic swap to destination.
|
||||||
TextureSwap(src.endianness, dest + output_offset,
|
TextureSwap(src.endianness, dest + output_offset,
|
||||||
src_mem + input_offset * bytes_per_block,
|
src_mem + input_offset * bytes_per_block,
|
||||||
bytes_per_block);
|
bytes_per_block);
|
||||||
|
}
|
||||||
|
|
||||||
output_offset += bytes_per_block;
|
output_offset += bytes_per_block;
|
||||||
}
|
}
|
||||||
|
|
||||||
output_row_offset += src.size_2d.input_pitch;
|
output_row_offset += output_pitch * output_row_height;
|
||||||
}
|
}
|
||||||
|
|
||||||
copy_region->bufferRowLength = src.size_2d.input_width;
|
copy_region->bufferRowLength = src.size_2d.input_width;
|
||||||
|
@ -1036,7 +983,7 @@ bool TextureCache::ConvertTexture2D(uint8_t* dest,
|
||||||
src.size_2d.logical_height, 1};
|
src.size_2d.logical_height, 1};
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1044,9 +991,6 @@ bool TextureCache::ConvertTextureCube(uint8_t* dest,
|
||||||
VkBufferImageCopy* copy_region,
|
VkBufferImageCopy* copy_region,
|
||||||
const TextureInfo& src) {
|
const TextureInfo& src) {
|
||||||
void* host_address = memory_->TranslatePhysical(src.guest_address);
|
void* host_address = memory_->TranslatePhysical(src.guest_address);
|
||||||
if (src.texture_format == TextureFormat::k_CTX1) {
|
|
||||||
assert_always();
|
|
||||||
} else {
|
|
||||||
if (!src.is_tiled) {
|
if (!src.is_tiled) {
|
||||||
// Fast path copy entire image.
|
// Fast path copy entire image.
|
||||||
TextureSwap(src.endianness, dest, host_address, src.input_length);
|
TextureSwap(src.endianness, dest, host_address, src.input_length);
|
||||||
|
@ -1073,8 +1017,7 @@ bool TextureCache::ConvertTextureCube(uint8_t* dest,
|
||||||
y++, output_base_offset += src.size_cube.input_pitch) {
|
y++, output_base_offset += src.size_cube.input_pitch) {
|
||||||
auto input_base_offset = TextureInfo::TiledOffset2DOuter(
|
auto input_base_offset = TextureInfo::TiledOffset2DOuter(
|
||||||
offset_y + y,
|
offset_y + y,
|
||||||
(src.size_cube.input_width / src.format_info()->block_width),
|
(src.size_cube.input_width / src.format_info()->block_width), bpp);
|
||||||
bpp);
|
|
||||||
for (uint32_t x = 0, output_offset = output_base_offset;
|
for (uint32_t x = 0, output_offset = output_base_offset;
|
||||||
x < src.size_cube.block_width;
|
x < src.size_cube.block_width;
|
||||||
x++, output_offset += bytes_per_block) {
|
x++, output_offset += bytes_per_block) {
|
||||||
|
@ -1097,7 +1040,7 @@ bool TextureCache::ConvertTextureCube(uint8_t* dest,
|
||||||
src.size_cube.logical_height, 6};
|
src.size_cube.logical_height, 6};
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -140,6 +140,9 @@ class TextureCache {
|
||||||
void FlushPendingCommands(VkCommandBuffer command_buffer,
|
void FlushPendingCommands(VkCommandBuffer command_buffer,
|
||||||
VkFence completion_fence);
|
VkFence completion_fence);
|
||||||
|
|
||||||
|
static void ConvertTexelCTX1(uint8_t* dest, size_t dest_pitch,
|
||||||
|
const uint8_t* src, Endian src_endianness);
|
||||||
|
|
||||||
bool ConvertTexture2D(uint8_t* dest, VkBufferImageCopy* copy_region,
|
bool ConvertTexture2D(uint8_t* dest, VkBufferImageCopy* copy_region,
|
||||||
const TextureInfo& src);
|
const TextureInfo& src);
|
||||||
bool ConvertTextureCube(uint8_t* dest, VkBufferImageCopy* copy_region,
|
bool ConvertTextureCube(uint8_t* dest, VkBufferImageCopy* copy_region,
|
||||||
|
|
Loading…
Reference in New Issue