1D textures. Maybe.

2017-01-10 00:04:41 -06:00 · 2017-01-10 00:04:41 -06:00 · a95de67b8c
parent cec8932605
commit a95de67b8c
6 changed files with 195 additions and 3 deletions
--- a/src/xenia/gpu/gl4/texture_cache.cc
+++ b/src/xenia/gpu/gl4/texture_cache.cc
@ -477,13 +477,15 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture(
  // Upload/convert.
  bool uploaded = false;
  switch (texture_info.dimension) {
    case Dimension::k1D:
      uploaded = UploadTexture1D(entry->handle, texture_info);
      break;
    case Dimension::k2D:
      uploaded = UploadTexture2D(entry->handle, texture_info);
      break;
    case Dimension::kCube:
      uploaded = UploadTextureCube(entry->handle, texture_info);
      break;
    case Dimension::k1D:
    case Dimension::k3D:
      assert_unhandled_case(texture_info.dimension);
      return nullptr;
@ -706,6 +708,62 @@ void TextureSwap(Endian endianness, void* dest, const void* src,
  }
 }
 bool TextureCache::UploadTexture1D(GLuint texture,
                                   const TextureInfo& texture_info) {
  SCOPE_profile_cpu_f("gpu");
  const auto host_address =
      memory_->TranslatePhysical(texture_info.guest_address);
  const auto& config =
      texture_configs[uint32_t(texture_info.format_info->format)];
  if (config.format == GL_INVALID_ENUM) {
    assert_always("Unhandled texture format");
    return false;
  }
  size_t unpack_length = texture_info.output_length;
  glTextureStorage1D(texture, 1, config.internal_format,
                     texture_info.size_1d.output_width);
  auto allocation = scratch_buffer_->Acquire(unpack_length);
  if (!texture_info.is_tiled) {
    if (texture_info.size_1d.input_pitch == texture_info.size_1d.output_pitch) {
      TextureSwap(texture_info.endianness, allocation.host_ptr, host_address,
                  unpack_length);
    } else {
      assert_always();
    }
  } else {
    assert_always();
  }
  size_t unpack_offset = allocation.offset;
  scratch_buffer_->Commit(std::move(allocation));
  // TODO(benvanik): avoid flush on entire buffer by using another texture
  // buffer.
  scratch_buffer_->Flush();
  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, scratch_buffer_->handle());
  if (texture_info.is_compressed()) {
    glCompressedTextureSubImage1D(
        texture, 0, 0, texture_info.size_1d.output_width, config.format,
        static_cast<GLsizei>(unpack_length),
        reinterpret_cast<void*>(unpack_offset));
  } else {
    // Most of these don't seem to have an effect on compressed images.
    // glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE);
    // glPixelStorei(GL_UNPACK_ALIGNMENT, texture_info.texel_pitch);
    // glPixelStorei(GL_UNPACK_ROW_LENGTH, texture_info.size_2d.input_width);
    glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
    glTextureSubImage1D(texture, 0, 0, texture_info.size_1d.output_width,
                        config.format, config.type,
                        reinterpret_cast<void*>(unpack_offset));
  }
  glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
  return true;
 }
 bool TextureCache::UploadTexture2D(GLuint texture,
                                   const TextureInfo& texture_info) {
  SCOPE_profile_cpu_f("gpu");
--- a/src/xenia/gpu/gl4/texture_cache.h
+++ b/src/xenia/gpu/gl4/texture_cache.h
@ -96,6 +96,7 @@ class TextureCache {
                                      uint64_t opt_hash = 0);
  void EvictTexture(TextureEntry* entry);
  bool UploadTexture1D(GLuint texture, const TextureInfo& texture_info);
  bool UploadTexture2D(GLuint texture, const TextureInfo& texture_info);
  bool UploadTextureCube(GLuint texture, const TextureInfo& texture_info);
--- a/src/xenia/gpu/texture_info.cc
+++ b/src/xenia/gpu/texture_info.cc
@ -165,7 +165,33 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch,
 void TextureInfo::CalculateTextureSizes1D(const xe_gpu_texture_fetch_t& fetch) {
  // ?
-  size_1d.width = fetch.size_1d.width;
+  size_1d.logical_width = 1 + fetch.size_1d.width;
  uint32_t block_width =
      xe::round_up(size_1d.logical_width, format_info->block_width) /
      format_info->block_width;
  uint32_t tile_width = uint32_t(std::ceil(block_width / 32.0f));
  size_1d.block_width = tile_width * 32;
  uint32_t bytes_per_block =
      format_info->block_width * format_info->bits_per_pixel / 8;
  uint32_t byte_pitch = tile_width * 32 * bytes_per_block;
  if (!is_tiled) {
    // Each row must be a multiple of 256 in linear textures.
    byte_pitch = xe::round_up(byte_pitch, 256);
  }
  size_1d.input_width = tile_width * 32 * format_info->block_width;
  size_1d.output_width = block_width * format_info->block_width;
  size_1d.input_pitch = byte_pitch;
  size_1d.output_pitch = block_width * bytes_per_block;
  input_length = size_1d.input_pitch;
  output_length = size_1d.output_pitch;
 }
 void TextureInfo::CalculateTextureSizes2D(const xe_gpu_texture_fetch_t& fetch) {
--- a/src/xenia/gpu/texture_info.h
+++ b/src/xenia/gpu/texture_info.h
@ -231,7 +231,12 @@ struct TextureInfo {
  union {
    struct {
-      uint32_t width;
+      uint32_t logical_width;
      uint32_t block_width;
      uint32_t input_width;
      uint32_t input_pitch;
      uint32_t output_width;
      uint32_t output_pitch;
    } size_1d;
    struct {
      uint32_t logical_width;
--- a/src/xenia/gpu/vulkan/texture_cache.cc
+++ b/src/xenia/gpu/vulkan/texture_cache.cc
@ -420,6 +420,11 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info,
  bool uploaded = false;
  switch (texture_info.dimension) {
    case Dimension::k1D: {
      uploaded = UploadTexture1D(command_buffer, completion_fence, texture,
                                 texture_info);
    } break;
    case Dimension::k2D: {
      uploaded = UploadTexture2D(command_buffer, completion_fence, texture,
                                 texture_info);
@ -822,6 +827,19 @@ void TextureCache::FlushPendingCommands(VkCommandBuffer command_buffer,
  vkBeginCommandBuffer(command_buffer, &begin_info);
 }
 void TextureCache::ConvertTexture1D(uint8_t* dest, const TextureInfo& src) {
  void* host_address = memory_->TranslatePhysical(src.guest_address);
  if (!src.is_tiled) {
    if (src.size_1d.input_pitch == src.size_1d.output_pitch) {
      TextureSwap(src.endianness, dest, host_address, src.output_length);
    } else {
      assert_always();
    }
  } else {
    assert_always();
  }
 }
 void TextureCache::ConvertTexture2D(uint8_t* dest, const TextureInfo& src) {
  void* host_address = memory_->TranslatePhysical(src.guest_address);
  if (!src.is_tiled) {
@ -936,6 +954,86 @@ void TextureCache::ConvertTextureCube(uint8_t* dest, const TextureInfo& src) {
  }
 }
 bool TextureCache::UploadTexture1D(VkCommandBuffer command_buffer,
                                   VkFence completion_fence, Texture* dest,
                                   const TextureInfo& src) {
 #if FINE_GRAINED_DRAW_SCOPES
  SCOPE_profile_cpu_f("gpu");
 #endif  // FINE_GRAINED_DRAW_SCOPES
  assert_true(src.dimension == Dimension::k1D);
  size_t unpack_length = src.output_length;
  if (!staging_buffer_.CanAcquire(unpack_length)) {
    // Need to have unique memory for every upload for at least one frame. If we
    // run out of memory, we need to flush all queued upload commands to the
    // GPU.
    FlushPendingCommands(command_buffer, completion_fence);
    // Uploads have been flushed. Continue.
    if (!staging_buffer_.CanAcquire(unpack_length)) {
      // The staging buffer isn't big enough to hold this texture.
      XELOGE(
          "TextureCache staging buffer is too small! (uploading 0x%.8X bytes)",
          unpack_length);
      assert_always();
      return false;
    }
  }
  // Grab some temporary memory for staging.
  auto alloc = staging_buffer_.Acquire(unpack_length, completion_fence);
  assert_not_null(alloc);
  // Upload texture into GPU memory.
  // TODO: If the GPU supports it, we can submit a compute batch to convert the
  // texture and copy it to its destination. Otherwise, fallback to conversion
  // on the CPU.
  ConvertTexture1D(reinterpret_cast<uint8_t*>(alloc->host_ptr), src);
  staging_buffer_.Flush(alloc);
  // Transition the texture into a transfer destination layout.
  VkImageMemoryBarrier barrier;
  barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
  barrier.pNext = nullptr;
  barrier.srcAccessMask = 0;
  barrier.dstAccessMask =
      VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_HOST_WRITE_BIT;
  barrier.oldLayout = dest->image_layout;
  barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
  barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
  barrier.image = dest->image;
  barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
  vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
                       VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
                       nullptr, 1, &barrier);
  // Now move the converted texture into the destination.
  VkBufferImageCopy copy_region;
  copy_region.bufferOffset = alloc->offset;
  copy_region.bufferRowLength = src.size_1d.output_width;
  copy_region.bufferImageHeight = 1;
  copy_region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
  copy_region.imageOffset = {0, 0, 0};
  copy_region.imageExtent = {src.size_1d.output_width, 1, 1};
  vkCmdCopyBufferToImage(command_buffer, staging_buffer_.gpu_buffer(),
                         dest->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
                         &copy_region);
  // Now transition the texture into a shader readonly source.
  barrier.srcAccessMask = barrier.dstAccessMask;
  barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
  barrier.oldLayout = barrier.newLayout;
  barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
  vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
                       VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
                       nullptr, 1, &barrier);
  dest->image_layout = barrier.newLayout;
  return true;
 }
 bool TextureCache::UploadTexture2D(VkCommandBuffer command_buffer,
                                   VkFence completion_fence, Texture* dest,
                                   const TextureInfo& src) {
--- a/src/xenia/gpu/vulkan/texture_cache.h
+++ b/src/xenia/gpu/vulkan/texture_cache.h
@ -148,12 +148,16 @@ class TextureCache {
  void FlushPendingCommands(VkCommandBuffer command_buffer,
                            VkFence completion_fence);
  void ConvertTexture1D(uint8_t* dest, const TextureInfo& src);
  void ConvertTexture2D(uint8_t* dest, const TextureInfo& src);
  void ConvertTextureCube(uint8_t* dest, const TextureInfo& src);
  // Queues commands to upload a texture from system memory, applying any
  // conversions necessary. This may flush the command buffer to the GPU if we
  // run out of staging memory.
  bool UploadTexture1D(VkCommandBuffer command_buffer, VkFence completion_fence,
                       Texture* dest, const TextureInfo& src);
  bool UploadTexture2D(VkCommandBuffer command_buffer, VkFence completion_fence,
                       Texture* dest, const TextureInfo& src);