From a95de67b8ce9e80fc6d1082393fbeccb69fcfb85 Mon Sep 17 00:00:00 2001
From: gibbed <rick@gibbed.us>
Date: Tue, 10 Jan 2017 00:04:41 -0600
Subject: [PATCH] 1D textures. Maybe.

---
 src/xenia/gpu/gl4/texture_cache.cc    | 60 +++++++++++++++-
 src/xenia/gpu/gl4/texture_cache.h     |  1 +
 src/xenia/gpu/texture_info.cc         | 28 +++++++-
 src/xenia/gpu/texture_info.h          |  7 +-
 src/xenia/gpu/vulkan/texture_cache.cc | 98 +++++++++++++++++++++++++++
 src/xenia/gpu/vulkan/texture_cache.h  |  4 ++
 6 files changed, 195 insertions(+), 3 deletions(-)
diff --git a/src/xenia/gpu/gl4/texture_cache.cc b/src/xenia/gpu/gl4/texture_cache.cc
index 72e1c9639..7e9e68840 100644
--- a/src/xenia/gpu/gl4/texture_cache.cc
+++ b/src/xenia/gpu/gl4/texture_cache.cc
@@ -477,13 +477,15 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture(
   // Upload/convert.
   bool uploaded = false;
   switch (texture_info.dimension) {
+    case Dimension::k1D:
+      uploaded = UploadTexture1D(entry->handle, texture_info);
+      break;
     case Dimension::k2D:
       uploaded = UploadTexture2D(entry->handle, texture_info);
       break;
     case Dimension::kCube:
       uploaded = UploadTextureCube(entry->handle, texture_info);
       break;
-    case Dimension::k1D:
     case Dimension::k3D:
       assert_unhandled_case(texture_info.dimension);
       return nullptr;
@@ -706,6 +708,62 @@ void TextureSwap(Endian endianness, void* dest, const void* src,
   }
 }
 
+bool TextureCache::UploadTexture1D(GLuint texture,
+                                   const TextureInfo& texture_info) {
+  SCOPE_profile_cpu_f("gpu");
+  const auto host_address =
+      memory_->TranslatePhysical(texture_info.guest_address);
+
+  const auto& config =
+      texture_configs[uint32_t(texture_info.format_info->format)];
+  if (config.format == GL_INVALID_ENUM) {
+    assert_always("Unhandled texture format");
+    return false;
+  }
+
+  size_t unpack_length = texture_info.output_length;
+  glTextureStorage1D(texture, 1, config.internal_format,
+                     texture_info.size_1d.output_width);
+
+  auto allocation = scratch_buffer_->Acquire(unpack_length);
+
+  if (!texture_info.is_tiled) {
+    if (texture_info.size_1d.input_pitch == texture_info.size_1d.output_pitch) {
+      TextureSwap(texture_info.endianness, allocation.host_ptr, host_address,
+                  unpack_length);
+    } else {
+      assert_always();
+    }
+  } else {
+    assert_always();
+  }
+  size_t unpack_offset = allocation.offset;
+  scratch_buffer_->Commit(std::move(allocation));
+  // TODO(benvanik): avoid flush on entire buffer by using another texture
+  // buffer.
+  scratch_buffer_->Flush();
+
+  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, scratch_buffer_->handle());
+  if (texture_info.is_compressed()) {
+    glCompressedTextureSubImage1D(
+        texture, 0, 0, texture_info.size_1d.output_width, config.format,
+        static_cast<GLsizei>(unpack_length),
+        reinterpret_cast<void*>(unpack_offset));
+  } else {
+    // Most of these don't seem to have an effect on compressed images.
+    // glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
+    // glPixelStorei(GL_UNPACK_ALIGNMENT, texture_info.texel_pitch);
+    // glPixelStorei(GL_UNPACK_ROW_LENGTH, texture_info.size_2d.input_width);
+    glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+
+    glTextureSubImage1D(texture, 0, 0, texture_info.size_1d.output_width,
+                        config.format, config.type,
+                        reinterpret_cast<void*>(unpack_offset));
+  }
+  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+  return true;
+}
+
 bool TextureCache::UploadTexture2D(GLuint texture,
                                    const TextureInfo& texture_info) {
   SCOPE_profile_cpu_f("gpu");
diff --git a/src/xenia/gpu/gl4/texture_cache.h b/src/xenia/gpu/gl4/texture_cache.h
index d55aa37a1..4f018c329 100644
--- a/src/xenia/gpu/gl4/texture_cache.h
+++ b/src/xenia/gpu/gl4/texture_cache.h
@@ -96,6 +96,7 @@ class TextureCache {
                                       uint64_t opt_hash = 0);
   void EvictTexture(TextureEntry* entry);
 
+  bool UploadTexture1D(GLuint texture, const TextureInfo& texture_info);
   bool UploadTexture2D(GLuint texture, const TextureInfo& texture_info);
   bool UploadTextureCube(GLuint texture, const TextureInfo& texture_info);
 
diff --git a/src/xenia/gpu/texture_info.cc b/src/xenia/gpu/texture_info.cc
index 0d9cb91fd..8c226f538 100644
--- a/src/xenia/gpu/texture_info.cc
+++ b/src/xenia/gpu/texture_info.cc
@@ -165,7 +165,33 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
 
 void TextureInfo::CalculateTextureSizes1D(const xe_gpu_texture_fetch_t& fetch) {
   // ?
-  size_1d.width = fetch.size_1d.width;
+  size_1d.logical_width = 1 + fetch.size_1d.width;
+
+  uint32_t block_width =
+      xe::round_up(size_1d.logical_width, format_info->block_width) /
+      format_info->block_width;
+
+  uint32_t tile_width = uint32_t(std::ceil(block_width / 32.0f));
+  size_1d.block_width = tile_width * 32;
+
+  uint32_t bytes_per_block =
+      format_info->block_width * format_info->bits_per_pixel / 8;
+
+  uint32_t byte_pitch = tile_width * 32 * bytes_per_block;
+  if (!is_tiled) {
+    // Each row must be a multiple of 256 in linear textures.
+    byte_pitch = xe::round_up(byte_pitch, 256);
+  }
+
+  size_1d.input_width = tile_width * 32 * format_info->block_width;
+
+  size_1d.output_width = block_width * format_info->block_width;
+
+  size_1d.input_pitch = byte_pitch;
+  size_1d.output_pitch = block_width * bytes_per_block;
+
+  input_length = size_1d.input_pitch;
+  output_length = size_1d.output_pitch;
 }
 
 void TextureInfo::CalculateTextureSizes2D(const xe_gpu_texture_fetch_t& fetch) {
diff --git a/src/xenia/gpu/texture_info.h b/src/xenia/gpu/texture_info.h
index 0cb2ed2ba..1881a6e4b 100644
--- a/src/xenia/gpu/texture_info.h
+++ b/src/xenia/gpu/texture_info.h
@@ -231,7 +231,12 @@ struct TextureInfo {
 
   union {
     struct {
-      uint32_t width;
+      uint32_t logical_width;
+      uint32_t block_width;
+      uint32_t input_width;
+      uint32_t input_pitch;
+      uint32_t output_width;
+      uint32_t output_pitch;
     } size_1d;
     struct {
       uint32_t logical_width;
diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc
index 8091a295d..bc82b416d 100644
--- a/src/xenia/gpu/vulkan/texture_cache.cc
+++ b/src/xenia/gpu/vulkan/texture_cache.cc
@@ -420,6 +420,11 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info,
 
   bool uploaded = false;
   switch (texture_info.dimension) {
+    case Dimension::k1D: {
+      uploaded = UploadTexture1D(command_buffer, completion_fence, texture,
+                                 texture_info);
+    } break;
+
     case Dimension::k2D: {
       uploaded = UploadTexture2D(command_buffer, completion_fence, texture,
                                  texture_info);
@@ -822,6 +827,19 @@ void TextureCache::FlushPendingCommands(VkCommandBuffer command_buffer,
   vkBeginCommandBuffer(command_buffer, &begin_info);
 }
 
+void TextureCache::ConvertTexture1D(uint8_t* dest, const TextureInfo& src) {
+  void* host_address = memory_->TranslatePhysical(src.guest_address);
+  if (!src.is_tiled) {
+    if (src.size_1d.input_pitch == src.size_1d.output_pitch) {
+      TextureSwap(src.endianness, dest, host_address, src.output_length);
+    } else {
+      assert_always();
+    }
+  } else {
+    assert_always();
+  }
+}
+
 void TextureCache::ConvertTexture2D(uint8_t* dest, const TextureInfo& src) {
   void* host_address = memory_->TranslatePhysical(src.guest_address);
   if (!src.is_tiled) {
@@ -936,6 +954,86 @@ void TextureCache::ConvertTextureCube(uint8_t* dest, const TextureInfo& src) {
   }
 }
 
+bool TextureCache::UploadTexture1D(VkCommandBuffer command_buffer,
+                                   VkFence completion_fence, Texture* dest,
+                                   const TextureInfo& src) {
+#if FINE_GRAINED_DRAW_SCOPES
+  SCOPE_profile_cpu_f("gpu");
+#endif  // FINE_GRAINED_DRAW_SCOPES
+
+  assert_true(src.dimension == Dimension::k1D);
+
+  size_t unpack_length = src.output_length;
+  if (!staging_buffer_.CanAcquire(unpack_length)) {
+    // Need to have unique memory for every upload for at least one frame. If we
+    // run out of memory, we need to flush all queued upload commands to the
+    // GPU.
+    FlushPendingCommands(command_buffer, completion_fence);
+
+    // Uploads have been flushed. Continue.
+    if (!staging_buffer_.CanAcquire(unpack_length)) {
+      // The staging buffer isn't big enough to hold this texture.
+      XELOGE(
+          "TextureCache staging buffer is too small! (uploading 0x%.8X bytes)",
+          unpack_length);
+      assert_always();
+      return false;
+    }
+  }
+
+  // Grab some temporary memory for staging.
+  auto alloc = staging_buffer_.Acquire(unpack_length, completion_fence);
+  assert_not_null(alloc);
+
+  // Upload texture into GPU memory.
+  // TODO: If the GPU supports it, we can submit a compute batch to convert the
+  // texture and copy it to its destination. Otherwise, fallback to conversion
+  // on the CPU.
+  ConvertTexture1D(reinterpret_cast<uint8_t*>(alloc->host_ptr), src);
+  staging_buffer_.Flush(alloc);
+
+  // Transition the texture into a transfer destination layout.
+  VkImageMemoryBarrier barrier;
+  barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+  barrier.pNext = nullptr;
+  barrier.srcAccessMask = 0;
+  barrier.dstAccessMask =
+      VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_HOST_WRITE_BIT;
+  barrier.oldLayout = dest->image_layout;
+  barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+  barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  barrier.image = dest->image;
+  barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
+  vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                       VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+                       nullptr, 1, &barrier);
+
+  // Now move the converted texture into the destination.
+  VkBufferImageCopy copy_region;
+  copy_region.bufferOffset = alloc->offset;
+  copy_region.bufferRowLength = src.size_1d.output_width;
+  copy_region.bufferImageHeight = 1;
+  copy_region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
+  copy_region.imageOffset = {0, 0, 0};
+  copy_region.imageExtent = {src.size_1d.output_width, 1, 1};
+  vkCmdCopyBufferToImage(command_buffer, staging_buffer_.gpu_buffer(),
+                         dest->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
+                         &copy_region);
+
+  // Now transition the texture into a shader readonly source.
+  barrier.srcAccessMask = barrier.dstAccessMask;
+  barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+  barrier.oldLayout = barrier.newLayout;
+  barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+  vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                       VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+                       nullptr, 1, &barrier);
+
+  dest->image_layout = barrier.newLayout;
+  return true;
+}
+
 bool TextureCache::UploadTexture2D(VkCommandBuffer command_buffer,
                                    VkFence completion_fence, Texture* dest,
                                    const TextureInfo& src) {
diff --git a/src/xenia/gpu/vulkan/texture_cache.h b/src/xenia/gpu/vulkan/texture_cache.h
index a24ef7346..89ebc7ff0 100644
--- a/src/xenia/gpu/vulkan/texture_cache.h
+++ b/src/xenia/gpu/vulkan/texture_cache.h
@@ -148,12 +148,16 @@ class TextureCache {
   void FlushPendingCommands(VkCommandBuffer command_buffer,
                             VkFence completion_fence);
 
+  void ConvertTexture1D(uint8_t* dest, const TextureInfo& src);
   void ConvertTexture2D(uint8_t* dest, const TextureInfo& src);
   void ConvertTextureCube(uint8_t* dest, const TextureInfo& src);
 
   // Queues commands to upload a texture from system memory, applying any
   // conversions necessary. This may flush the command buffer to the GPU if we
   // run out of staging memory.
+  bool UploadTexture1D(VkCommandBuffer command_buffer, VkFence completion_fence,
+                       Texture* dest, const TextureInfo& src);
+
   bool UploadTexture2D(VkCommandBuffer command_buffer, VkFence completion_fence,
                        Texture* dest, const TextureInfo& src);