[GPU] Track unaligned (visible) block width in texture info.

[Vulkan] Don't use power of two for mip width/height calculation for host textures.
[Vulkan] Add more Vulkan error logging.
[Vulkan/GPU] Rudimentary texture dumping.
This commit is contained in:
gibbed 2018-06-04 06:08:29 -05:00
parent b35fe935f9
commit b5a88d1a7d
6 changed files with 167 additions and 31 deletions

View File

@ -113,8 +113,6 @@ void Untile(uint8_t* output_buffer, const uint8_t* input_buffer,
assert_not_null(untile_info);
assert_not_null(untile_info->input_format_info);
assert_not_null(untile_info->output_format_info);
assert_true(untile_info->width <= untile_info->input_pitch);
assert_true(untile_info->width <= untile_info->output_pitch);
uint32_t input_bytes_per_block =
untile_info->input_format_info->bytes_per_block();

View File

@ -0,0 +1,110 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <gflags/gflags.h>
#include "xenia/gpu/texture_info.h"
DEFINE_bool(texture_dump, false, "Dump textures to DDS");
namespace xe {
namespace gpu {
void TextureDump(const TextureInfo& src, void* buffer, size_t length) {
struct {
uint32_t size;
uint32_t flags;
uint32_t height;
uint32_t width;
uint32_t pitch_or_linear_size;
uint32_t depth;
uint32_t mip_levels;
uint32_t reserved1[11];
struct {
uint32_t size;
uint32_t flags;
uint32_t fourcc;
uint32_t rgb_bit_count;
uint32_t r_bit_mask;
uint32_t g_bit_mask;
uint32_t b_bit_mask;
uint32_t a_bit_mask;
} pixel_format;
uint32_t caps[4];
uint32_t reserved2;
} dds_header;
std::memset(&dds_header, 0, sizeof(dds_header));
dds_header.size = sizeof(dds_header);
dds_header.flags = 1u | 2u | 4u | 0x1000u | 0x20000u;
if (src.is_compressed()) {
dds_header.flags |= 0x80000u;
} else {
dds_header.flags |= 0x8u;
}
dds_header.height = src.height + 1;
dds_header.width = src.width + 1;
dds_header.pitch_or_linear_size = src.GetMipExtent(0, false).block_pitch_h *
src.format_info()->bytes_per_block();
dds_header.mip_levels = src.mip_levels;
dds_header.pixel_format.size = sizeof(dds_header.pixel_format);
switch (src.format) {
case TextureFormat::k_DXT1: {
dds_header.pixel_format.flags = 0x4u;
dds_header.pixel_format.fourcc = '1TXD';
break;
}
case TextureFormat::k_DXT2_3: {
dds_header.pixel_format.flags = 0x4u;
dds_header.pixel_format.fourcc = '3TXD';
break;
}
case TextureFormat::k_DXT4_5: {
dds_header.pixel_format.flags = 0x4u;
dds_header.pixel_format.fourcc = '5TXD';
break;
}
case TextureFormat::k_8_8_8_8: {
dds_header.pixel_format.flags = 0x1u | 0x40u;
dds_header.pixel_format.rgb_bit_count = 32;
dds_header.pixel_format.r_bit_mask = 0x00FF0000u;
dds_header.pixel_format.g_bit_mask = 0x0000FF00u;
dds_header.pixel_format.b_bit_mask = 0x000000FFu;
dds_header.pixel_format.a_bit_mask = 0xFF000000u;
break;
}
default: {
assert_unhandled_case(src.format);
std::memset(&dds_header.pixel_format, 0xCD,
sizeof(dds_header.pixel_format));
return;
}
}
dds_header.caps[0] = 8u | 0x1000u;
static int dump_counter = 0;
char path[256];
sprintf(path, "texture_dumps\\%05d_%.8X_%.8X_%s.dds", dump_counter++,
src.memory.base_address, src.memory.mip_address,
src.format_info()->name);
FILE* handle = fopen(path, "wb");
if (handle) {
const uint32_t signature = ' SDD';
fwrite(&signature, sizeof(signature), 1, handle);
fwrite(&dds_header, sizeof(dds_header), 1, handle);
fwrite(buffer, 1, length, handle);
fclose(handle);
}
}
} // namespace gpu
} // namespace xe

View File

@ -27,10 +27,11 @@ static TextureExtent CalculateExtent(const FormatInfo* format_info,
extent.pitch = pitch;
extent.height = height;
extent.block_pitch_h = xe::round_up(extent.pitch, format_info->block_width) /
format_info->block_width;
extent.block_width = xe::round_up(extent.pitch, format_info->block_width) /
format_info->block_width;
extent.block_height = xe::round_up(extent.height, format_info->block_height) /
format_info->block_height;
extent.block_pitch_h = extent.block_width;
extent.block_pitch_v = extent.block_height;
extent.depth = depth;

View File

@ -293,7 +293,8 @@ struct TextureInfo;
struct TextureExtent {
uint32_t pitch; // texel pitch
uint32_t height; // texel height
uint32_t block_height; // # of vertical blocks
uint32_t block_width; // # of horizontal visible blocks
uint32_t block_height; // # of vertical visible blocks
uint32_t block_pitch_h; // # of horizontal pitch blocks
uint32_t block_pitch_v; // # of vertical pitch blocks
uint32_t depth;

View File

@ -21,14 +21,19 @@
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
#include "xenia/ui/vulkan/vulkan_mem_alloc.h"
DEFINE_bool(enable_mip_watches, false, "Enable mipmap watches");
DECLARE_bool(texture_dump);
namespace xe {
namespace gpu {
void TextureDump(const TextureInfo& src, void* buffer, size_t length);
namespace vulkan {
using xe::ui::vulkan::CheckResult;
DEFINE_bool(enable_mip_watches, false, "Enable mipmap watches");
constexpr uint32_t kMaxTextureSamplers = 32;
constexpr VkDeviceSize kStagingBufferSize = 64 * 1024 * 1024;
@ -935,8 +940,8 @@ bool TextureCache::ConvertTexture(uint8_t* dest, VkBufferImageCopy* copy_region,
std::memset(&untile_info, 0, sizeof(untile_info));
untile_info.offset_x = offset_x;
untile_info.offset_y = offset_y;
untile_info.width = dst_extent.block_pitch_h;
untile_info.height = dst_extent.block_height;
untile_info.width = src_extent.block_width;
untile_info.height = src_extent.block_height;
untile_info.input_pitch = src_extent.block_pitch_h;
untile_info.output_pitch = dst_extent.block_pitch_h;
untile_info.input_format_info = src.format_info();
@ -972,12 +977,18 @@ bool TextureCache::UploadTexture(VkCommandBuffer command_buffer,
size_t unpack_length = ComputeTextureStorage(src);
XELOGGPU(
"Uploading texture @ 0x%.8X/0x%.8X (%dx%dx%d, format: "
"%s, dim: %s, levels: %d, tiled: %s, unpack length: 0x%.8X)",
"Uploading texture @ 0x%.8X/0x%.8X (%ux%ux%u, format: %s, dim: %s, "
"levels: %u, pitch: %u, tiled: %s, packed mips: %s, unpack length: "
"0x%.8X)",
src.memory.base_address, src.memory.mip_address, src.width + 1,
src.height + 1, src.depth + 1, src.format_info()->name,
get_dimension_name(src.dimension), src.mip_levels,
src.is_tiled ? "yes" : "no", unpack_length);
get_dimension_name(src.dimension), src.mip_levels, src.pitch,
src.is_tiled ? "yes" : "no", src.has_packed_mips ? "yes" : "no",
unpack_length);
XELOGGPU("Extent: %ux%ux%u %u,%u,%u", src.extent.pitch, src.extent.height,
src.extent.depth, src.extent.block_pitch_h, src.extent.block_height,
src.extent.block_pitch_v);
if (!unpack_length) {
XELOGW("Failed to compute texture storage!");
@ -1027,29 +1038,35 @@ bool TextureCache::UploadTexture(VkCommandBuffer command_buffer,
// TODO: If the GPU supports it, we can submit a compute batch to convert the
// texture and copy it to its destination. Otherwise, fallback to conversion
// on the CPU.
std::vector<VkBufferImageCopy> copy_regions(src.mip_levels);
uint32_t copy_region_count = src.mip_levels;
std::vector<VkBufferImageCopy> copy_regions(copy_region_count);
// Upload all mips.
auto unpack_buffer = reinterpret_cast<uint8_t*>(alloc->host_ptr);
VkDeviceSize unpack_offset = 0;
for (uint32_t mip = 0; mip < src.mip_levels; mip++) {
if (!ConvertTexture(&unpack_buffer[unpack_offset], &copy_regions[mip], mip,
src)) {
for (uint32_t mip = 0, region = 0; mip < src.mip_levels; mip++, region++) {
if (!ConvertTexture(&unpack_buffer[unpack_offset], &copy_regions[region],
mip, src)) {
XELOGW("Failed to convert texture mip %u!", mip);
return false;
}
copy_regions[mip].bufferOffset = alloc->offset + unpack_offset;
copy_regions[mip].imageOffset = {0, 0, 0};
copy_regions[region].bufferOffset = alloc->offset + unpack_offset;
copy_regions[region].imageOffset = {0, 0, 0};
/*
XELOGGPU("Mip %u %ux%ux%u @ 0x%X", mip, copy_regions[mip].imageExtent.width,
copy_regions[mip].imageExtent.height,
copy_regions[mip].imageExtent.depth, buffer_offset);
XELOGGPU("Mip %u %ux%ux%u @ 0x%X", mip,
copy_regions[region].imageExtent.width,
copy_regions[region].imageExtent.height,
copy_regions[region].imageExtent.depth, unpack_offset);
*/
unpack_offset += ComputeMipStorage(src, mip);
}
if (FLAGS_texture_dump) {
TextureDump(src, unpack_buffer, unpack_length);
}
// Transition the texture into a transfer destination layout.
VkImageMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
@ -1092,7 +1109,7 @@ bool TextureCache::UploadTexture(VkCommandBuffer command_buffer,
vkCmdCopyBufferToImage(command_buffer, staging_buffer_.gpu_buffer(),
dest->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
src.mip_levels, copy_regions.data());
copy_region_count, copy_regions.data());
// Now transition the texture into a shader readonly source.
barrier.srcAccessMask = barrier.dstAccessMask;
@ -1141,8 +1158,8 @@ TextureExtent TextureCache::GetMipExtent(const TextureInfo& src, uint32_t mip) {
extent = TextureExtent::Calculate(format_info, width, height, depth, width,
false);
} else {
uint32_t mip_width = xe::next_pow2(width) >> mip;
uint32_t mip_height = xe::next_pow2(height) >> mip;
uint32_t mip_width = std::max(1u, width >> mip);
uint32_t mip_height = std::max(1u, height >> mip);
extent = TextureExtent::Calculate(format_info, mip_width, mip_height, depth,
mip_width, false);
}
@ -1158,8 +1175,8 @@ uint32_t TextureCache::ComputeMipStorage(const FormatInfo* format_info,
extent = TextureExtent::Calculate(format_info, width, height, depth, false,
false);
} else {
uint32_t mip_width = xe::next_pow2(width) >> mip;
uint32_t mip_height = xe::next_pow2(height) >> mip;
uint32_t mip_width = std::max(1u, width >> mip);
uint32_t mip_height = std::max(1u, height >> mip);
extent = TextureExtent::Calculate(format_info, mip_width, mip_height, depth,
false, false);
}
@ -1180,11 +1197,13 @@ uint32_t TextureCache::ComputeTextureStorage(const TextureInfo& src) {
uint32_t height = src.height + 1;
uint32_t depth = src.depth + 1;
uint32_t length = 0;
length += ComputeMipStorage(format_info, width, height, depth, 0);
if (src.memory.mip_address) {
for (uint32_t mip = 1; mip < src.mip_levels; mip++) {
length += ComputeMipStorage(format_info, width, height, depth, mip);
for (uint32_t mip = 0; mip < src.mip_levels; ++mip) {
if (mip == 0 && !src.memory.base_address) {
continue;
} else if (mip > 0 && !src.memory.mip_address) {
continue;
}
length += ComputeMipStorage(format_info, width, height, depth, mip);
}
return length;
}
@ -1389,6 +1408,7 @@ bool TextureCache::SetupTextureBinding(VkCommandBuffer command_buffer,
// Disabled?
// TODO(benvanik): reset sampler.
if (fetch.type != 0x2) {
XELOGGPU("Fetch type is not 2!");
return false;
}
@ -1409,6 +1429,7 @@ bool TextureCache::SetupTextureBinding(VkCommandBuffer command_buffer,
auto texture = Demand(texture_info, command_buffer, completion_fence);
auto sampler = Demand(sampler_info);
if (texture == nullptr || sampler == nullptr) {
XELOGE("Texture or sampler is NULL!");
return false;
}

View File

@ -10,6 +10,7 @@
#include "xenia/ui/vulkan/vulkan_immediate_drawer.h"
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/ui/graphics_context.h"
#include "xenia/ui/vulkan/vulkan_context.h"
@ -835,6 +836,10 @@ void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) {
}
auto texture_set = texture->descriptor_set();
if (!texture_set) {
XELOGW("Failed to acquire texture descriptor set for immediate drawer!");
}
vkCmdBindDescriptorSets(current_cmd_buffer_,
VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout_,
0, 1, &texture_set, 0, nullptr);