diff --git a/src/xenia/gpu/gl4/texture_cache.cc b/src/xenia/gpu/gl4/texture_cache.cc index 72e1c9639..7e9e68840 100644 --- a/src/xenia/gpu/gl4/texture_cache.cc +++ b/src/xenia/gpu/gl4/texture_cache.cc @@ -477,13 +477,15 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture( // Upload/convert. bool uploaded = false; switch (texture_info.dimension) { + case Dimension::k1D: + uploaded = UploadTexture1D(entry->handle, texture_info); + break; case Dimension::k2D: uploaded = UploadTexture2D(entry->handle, texture_info); break; case Dimension::kCube: uploaded = UploadTextureCube(entry->handle, texture_info); break; - case Dimension::k1D: case Dimension::k3D: assert_unhandled_case(texture_info.dimension); return nullptr; @@ -706,6 +708,62 @@ void TextureSwap(Endian endianness, void* dest, const void* src, } } +bool TextureCache::UploadTexture1D(GLuint texture, + const TextureInfo& texture_info) { + SCOPE_profile_cpu_f("gpu"); + const auto host_address = + memory_->TranslatePhysical(texture_info.guest_address); + + const auto& config = + texture_configs[uint32_t(texture_info.format_info->format)]; + if (config.format == GL_INVALID_ENUM) { + assert_always("Unhandled texture format"); + return false; + } + + size_t unpack_length = texture_info.output_length; + glTextureStorage1D(texture, 1, config.internal_format, + texture_info.size_1d.output_width); + + auto allocation = scratch_buffer_->Acquire(unpack_length); + + if (!texture_info.is_tiled) { + if (texture_info.size_1d.input_pitch == texture_info.size_1d.output_pitch) { + TextureSwap(texture_info.endianness, allocation.host_ptr, host_address, + unpack_length); + } else { + assert_always(); + } + } else { + assert_always(); + } + size_t unpack_offset = allocation.offset; + scratch_buffer_->Commit(std::move(allocation)); + // TODO(benvanik): avoid flush on entire buffer by using another texture + // buffer. + scratch_buffer_->Flush(); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, scratch_buffer_->handle()); + if (texture_info.is_compressed()) { + glCompressedTextureSubImage1D( + texture, 0, 0, texture_info.size_1d.output_width, config.format, + static_cast(unpack_length), + reinterpret_cast(unpack_offset)); + } else { + // Most of these don't seem to have an effect on compressed images. + // glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_TRUE); + // glPixelStorei(GL_UNPACK_ALIGNMENT, texture_info.texel_pitch); + // glPixelStorei(GL_UNPACK_ROW_LENGTH, texture_info.size_2d.input_width); + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + + glTextureSubImage1D(texture, 0, 0, texture_info.size_1d.output_width, + config.format, config.type, + reinterpret_cast(unpack_offset)); + } + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + return true; +} + bool TextureCache::UploadTexture2D(GLuint texture, const TextureInfo& texture_info) { SCOPE_profile_cpu_f("gpu"); diff --git a/src/xenia/gpu/gl4/texture_cache.h b/src/xenia/gpu/gl4/texture_cache.h index d55aa37a1..4f018c329 100644 --- a/src/xenia/gpu/gl4/texture_cache.h +++ b/src/xenia/gpu/gl4/texture_cache.h @@ -96,6 +96,7 @@ class TextureCache { uint64_t opt_hash = 0); void EvictTexture(TextureEntry* entry); + bool UploadTexture1D(GLuint texture, const TextureInfo& texture_info); bool UploadTexture2D(GLuint texture, const TextureInfo& texture_info); bool UploadTextureCube(GLuint texture, const TextureInfo& texture_info); diff --git a/src/xenia/gpu/texture_info.cc b/src/xenia/gpu/texture_info.cc index 0d9cb91fd..8c226f538 100644 --- a/src/xenia/gpu/texture_info.cc +++ b/src/xenia/gpu/texture_info.cc @@ -165,7 +165,33 @@ bool TextureInfo::Prepare(const xe_gpu_texture_fetch_t& fetch, void TextureInfo::CalculateTextureSizes1D(const xe_gpu_texture_fetch_t& fetch) { // ? - size_1d.width = fetch.size_1d.width; + size_1d.logical_width = 1 + fetch.size_1d.width; + + uint32_t block_width = + xe::round_up(size_1d.logical_width, format_info->block_width) / + format_info->block_width; + + uint32_t tile_width = uint32_t(std::ceil(block_width / 32.0f)); + size_1d.block_width = tile_width * 32; + + uint32_t bytes_per_block = + format_info->block_width * format_info->bits_per_pixel / 8; + + uint32_t byte_pitch = tile_width * 32 * bytes_per_block; + if (!is_tiled) { + // Each row must be a multiple of 256 in linear textures. + byte_pitch = xe::round_up(byte_pitch, 256); + } + + size_1d.input_width = tile_width * 32 * format_info->block_width; + + size_1d.output_width = block_width * format_info->block_width; + + size_1d.input_pitch = byte_pitch; + size_1d.output_pitch = block_width * bytes_per_block; + + input_length = size_1d.input_pitch; + output_length = size_1d.output_pitch; } void TextureInfo::CalculateTextureSizes2D(const xe_gpu_texture_fetch_t& fetch) { diff --git a/src/xenia/gpu/texture_info.h b/src/xenia/gpu/texture_info.h index 0cb2ed2ba..1881a6e4b 100644 --- a/src/xenia/gpu/texture_info.h +++ b/src/xenia/gpu/texture_info.h @@ -231,7 +231,12 @@ struct TextureInfo { union { struct { - uint32_t width; + uint32_t logical_width; + uint32_t block_width; + uint32_t input_width; + uint32_t input_pitch; + uint32_t output_width; + uint32_t output_pitch; } size_1d; struct { uint32_t logical_width; diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc index 8091a295d..bc82b416d 100644 --- a/src/xenia/gpu/vulkan/texture_cache.cc +++ b/src/xenia/gpu/vulkan/texture_cache.cc @@ -420,6 +420,11 @@ TextureCache::Texture* TextureCache::Demand(const TextureInfo& texture_info, bool uploaded = false; switch (texture_info.dimension) { + case Dimension::k1D: { + uploaded = UploadTexture1D(command_buffer, completion_fence, texture, + texture_info); + } break; + case Dimension::k2D: { uploaded = UploadTexture2D(command_buffer, completion_fence, texture, texture_info); @@ -822,6 +827,19 @@ void TextureCache::FlushPendingCommands(VkCommandBuffer command_buffer, vkBeginCommandBuffer(command_buffer, &begin_info); } +void TextureCache::ConvertTexture1D(uint8_t* dest, const TextureInfo& src) { + void* host_address = memory_->TranslatePhysical(src.guest_address); + if (!src.is_tiled) { + if (src.size_1d.input_pitch == src.size_1d.output_pitch) { + TextureSwap(src.endianness, dest, host_address, src.output_length); + } else { + assert_always(); + } + } else { + assert_always(); + } +} + void TextureCache::ConvertTexture2D(uint8_t* dest, const TextureInfo& src) { void* host_address = memory_->TranslatePhysical(src.guest_address); if (!src.is_tiled) { @@ -936,6 +954,86 @@ void TextureCache::ConvertTextureCube(uint8_t* dest, const TextureInfo& src) { } } +bool TextureCache::UploadTexture1D(VkCommandBuffer command_buffer, + VkFence completion_fence, Texture* dest, + const TextureInfo& src) { +#if FINE_GRAINED_DRAW_SCOPES + SCOPE_profile_cpu_f("gpu"); +#endif // FINE_GRAINED_DRAW_SCOPES + + assert_true(src.dimension == Dimension::k1D); + + size_t unpack_length = src.output_length; + if (!staging_buffer_.CanAcquire(unpack_length)) { + // Need to have unique memory for every upload for at least one frame. If we + // run out of memory, we need to flush all queued upload commands to the + // GPU. + FlushPendingCommands(command_buffer, completion_fence); + + // Uploads have been flushed. Continue. + if (!staging_buffer_.CanAcquire(unpack_length)) { + // The staging buffer isn't big enough to hold this texture. + XELOGE( + "TextureCache staging buffer is too small! (uploading 0x%.8X bytes)", + unpack_length); + assert_always(); + return false; + } + } + + // Grab some temporary memory for staging. + auto alloc = staging_buffer_.Acquire(unpack_length, completion_fence); + assert_not_null(alloc); + + // Upload texture into GPU memory. + // TODO: If the GPU supports it, we can submit a compute batch to convert the + // texture and copy it to its destination. Otherwise, fallback to conversion + // on the CPU. + ConvertTexture1D(reinterpret_cast(alloc->host_ptr), src); + staging_buffer_.Flush(alloc); + + // Transition the texture into a transfer destination layout. + VkImageMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = 0; + barrier.dstAccessMask = + VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_HOST_WRITE_BIT; + barrier.oldLayout = dest->image_layout; + barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = dest->image; + barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrier); + + // Now move the converted texture into the destination. + VkBufferImageCopy copy_region; + copy_region.bufferOffset = alloc->offset; + copy_region.bufferRowLength = src.size_1d.output_width; + copy_region.bufferImageHeight = 1; + copy_region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + copy_region.imageOffset = {0, 0, 0}; + copy_region.imageExtent = {src.size_1d.output_width, 1, 1}; + vkCmdCopyBufferToImage(command_buffer, staging_buffer_.gpu_buffer(), + dest->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, + ©_region); + + // Now transition the texture into a shader readonly source. + barrier.srcAccessMask = barrier.dstAccessMask; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier.oldLayout = barrier.newLayout; + barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, + nullptr, 1, &barrier); + + dest->image_layout = barrier.newLayout; + return true; +} + bool TextureCache::UploadTexture2D(VkCommandBuffer command_buffer, VkFence completion_fence, Texture* dest, const TextureInfo& src) { diff --git a/src/xenia/gpu/vulkan/texture_cache.h b/src/xenia/gpu/vulkan/texture_cache.h index a24ef7346..89ebc7ff0 100644 --- a/src/xenia/gpu/vulkan/texture_cache.h +++ b/src/xenia/gpu/vulkan/texture_cache.h @@ -148,12 +148,16 @@ class TextureCache { void FlushPendingCommands(VkCommandBuffer command_buffer, VkFence completion_fence); + void ConvertTexture1D(uint8_t* dest, const TextureInfo& src); void ConvertTexture2D(uint8_t* dest, const TextureInfo& src); void ConvertTextureCube(uint8_t* dest, const TextureInfo& src); // Queues commands to upload a texture from system memory, applying any // conversions necessary. This may flush the command buffer to the GPU if we // run out of staging memory. + bool UploadTexture1D(VkCommandBuffer command_buffer, VkFence completion_fence, + Texture* dest, const TextureInfo& src); + bool UploadTexture2D(VkCommandBuffer command_buffer, VkFence completion_fence, Texture* dest, const TextureInfo& src);