diff --git a/Source/Core/VideoBackends/Vulkan/Constants.h b/Source/Core/VideoBackends/Vulkan/Constants.h index 7b6b6d3583..9d1ab3fecd 100644 --- a/Source/Core/VideoBackends/Vulkan/Constants.h +++ b/Source/Core/VideoBackends/Vulkan/Constants.h @@ -109,7 +109,7 @@ constexpr size_t MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024; // streaming buffer and be blocking frequently. Games are unlikely to have textures this // large anyway, so it's only really an issue for HD texture packs, and memory is not // a limiting factor in these scenarios anyway. -constexpr size_t STAGING_TEXTURE_UPLOAD_THRESHOLD = 1024 * 1024 * 4; +constexpr size_t STAGING_TEXTURE_UPLOAD_THRESHOLD = 1024 * 1024 * 8; // Streaming uniform buffer size constexpr size_t INITIAL_UNIFORM_STREAM_BUFFER_SIZE = 16 * 1024 * 1024; diff --git a/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp b/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp index 6bd8170fc9..6924f41eca 100644 --- a/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp +++ b/Source/Core/VideoBackends/Vulkan/StagingBuffer.cpp @@ -127,7 +127,7 @@ void StagingBuffer::InvalidateCPUCache(VkDeviceSize offset, VkDeviceSize size) void StagingBuffer::Read(VkDeviceSize offset, void* data, size_t size, bool invalidate_caches) { _assert_((offset + size) <= m_size); - _assert_(offset >= m_map_offset && size < (m_map_size + (offset - m_map_offset))); + _assert_(offset >= m_map_offset && size <= (m_map_size + (offset - m_map_offset))); if (invalidate_caches) InvalidateCPUCache(offset, size); @@ -138,7 +138,7 @@ void StagingBuffer::Write(VkDeviceSize offset, const void* data, size_t size, bool invalidate_caches) { _assert_((offset + size) <= m_size); - _assert_(offset >= m_map_offset && size < (m_map_size + (offset - m_map_offset))); + _assert_(offset >= m_map_offset && size <= (m_map_size + (offset - m_map_offset))); memcpy(m_map_pointer + (offset - m_map_offset), data, size); if (invalidate_caches) diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp index 62c0228010..fad7e789cd 100644 --- a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp +++ b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp @@ -9,6 +9,7 @@ #include #include +#include "Common/Align.h" #include "Common/Assert.h" #include "Common/CommonFuncs.h" #include "Common/Logging/Log.h" @@ -238,9 +239,10 @@ TextureCacheBase::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntry usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; // Allocate texture object + VkFormat vk_format = Util::GetVkFormatForHostTextureFormat(config.format); std::unique_ptr texture = Texture2D::Create( - config.width, config.height, config.levels, config.layers, TEXTURECACHE_TEXTURE_FORMAT, - VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, usage); + config.width, config.height, config.levels, config.layers, vk_format, VK_SAMPLE_COUNT_1_BIT, + VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL, usage); if (!texture) return nullptr; @@ -366,87 +368,68 @@ void TextureCache::TCacheEntry::Load(u32 level, u32 width, u32 height, u32 row_l m_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - // Does this texture data fit within the streaming buffer? - u32 upload_width = width; - u32 upload_pitch = upload_width * sizeof(u32); - u32 upload_size = upload_pitch * height; + // For unaligned textures, we can save some memory in the transfer buffer by skipping the rows + // that lie outside of the texture's dimensions. u32 upload_alignment = static_cast(g_vulkan_context->GetBufferImageGranularity()); - u32 source_pitch = row_length * 4; - if ((upload_size + upload_alignment) <= STAGING_TEXTURE_UPLOAD_THRESHOLD && - (upload_size + upload_alignment) <= MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE) - { - // Assume tightly packed rows, with no padding as the buffer source. - StreamBuffer* upload_buffer = TextureCache::GetInstance()->m_texture_upload_buffer.get(); + u32 block_size = Util::GetBlockSize(m_texture->GetFormat()); + u32 num_rows = Common::AlignUp(height, block_size) / block_size; + size_t source_pitch = Util::GetPitchForTexture(m_texture->GetFormat(), row_length); + size_t upload_size = source_pitch * num_rows; + std::unique_ptr temp_buffer; + VkBuffer upload_buffer; + VkDeviceSize upload_buffer_offset; - // Allocate memory from the streaming buffer for the texture data. - if (!upload_buffer->ReserveMemory(upload_size, g_vulkan_context->GetBufferImageGranularity())) + // Does this texture data fit within the streaming buffer? + if (upload_size <= STAGING_TEXTURE_UPLOAD_THRESHOLD && + upload_size <= MAXIMUM_TEXTURE_UPLOAD_BUFFER_SIZE) + { + StreamBuffer* stream_buffer = TextureCache::GetInstance()->m_texture_upload_buffer.get(); + if (!stream_buffer->ReserveMemory(upload_size, upload_alignment)) { // Execute the command buffer first. WARN_LOG(VIDEO, "Executing command list while waiting for space in texture upload buffer"); Util::ExecuteCurrentCommandsAndRestoreState(false); // Try allocating again. This may cause a fence wait. - if (!upload_buffer->ReserveMemory(upload_size, g_vulkan_context->GetBufferImageGranularity())) + if (!stream_buffer->ReserveMemory(upload_size, upload_alignment)) PanicAlert("Failed to allocate space in texture upload buffer"); } - // Grab buffer pointers - VkBuffer image_upload_buffer = upload_buffer->GetBuffer(); - VkDeviceSize image_upload_buffer_offset = upload_buffer->GetCurrentOffset(); - u8* image_upload_buffer_pointer = upload_buffer->GetCurrentHostPointer(); - - // Copy to the buffer using the stride from the subresource layout - const u8* source_ptr = buffer; - if (upload_pitch != source_pitch) - { - VkDeviceSize copy_pitch = std::min(source_pitch, upload_pitch); - for (unsigned int row = 0; row < height; row++) - { - memcpy(image_upload_buffer_pointer + row * upload_pitch, source_ptr + row * source_pitch, - copy_pitch); - } - } - else - { - // Can copy the whole thing in one block, the pitch matches - memcpy(image_upload_buffer_pointer, source_ptr, upload_size); - } - - // Flush buffer memory if necessary - upload_buffer->CommitMemory(upload_size); - - // Copy from the streaming buffer to the actual image. - VkBufferImageCopy image_copy = { - image_upload_buffer_offset, // VkDeviceSize bufferOffset - 0, // uint32_t bufferRowLength - 0, // uint32_t bufferImageHeight - {VK_IMAGE_ASPECT_COLOR_BIT, level, 0, 1}, // VkImageSubresourceLayers imageSubresource - {0, 0, 0}, // VkOffset3D imageOffset - {width, height, 1} // VkExtent3D imageExtent - }; - vkCmdCopyBufferToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), image_upload_buffer, - m_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, - &image_copy); + // Copy to the streaming buffer. + upload_buffer = stream_buffer->GetBuffer(); + upload_buffer_offset = stream_buffer->GetCurrentOffset(); + std::memcpy(stream_buffer->GetCurrentHostPointer(), buffer, upload_size); + stream_buffer->CommitMemory(upload_size); } else { - // Slow path. The data for the image is too large to fit in the streaming buffer, so we need - // to allocate a temporary texture to store the data in, then copy to the real texture. - std::unique_ptr staging_texture = StagingTexture2D::Create( - STAGING_BUFFER_TYPE_UPLOAD, width, height, TEXTURECACHE_TEXTURE_FORMAT); - - if (!staging_texture || !staging_texture->Map()) + // Create a temporary staging buffer that is destroyed after the image is copied. + temp_buffer = StagingBuffer::Create(STAGING_BUFFER_TYPE_UPLOAD, upload_size, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT); + if (!temp_buffer || !temp_buffer->Map()) { PanicAlert("Failed to allocate staging texture for large texture upload."); return; } - // Copy data to staging texture first, then to the "real" texture. - staging_texture->WriteTexels(0, 0, width, height, buffer, source_pitch); - staging_texture->CopyToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), - m_texture->GetImage(), VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, width, - height, level, 0); + upload_buffer = temp_buffer->GetBuffer(); + upload_buffer_offset = 0; + temp_buffer->Write(0, buffer, upload_size, true); + temp_buffer->Unmap(); } + + // Copy from the streaming buffer to the actual image. + VkBufferImageCopy image_copy = { + upload_buffer_offset, // VkDeviceSize bufferOffset + row_length, // uint32_t bufferRowLength + 0, // uint32_t bufferImageHeight + {VK_IMAGE_ASPECT_COLOR_BIT, level, 0, 1}, // VkImageSubresourceLayers imageSubresource + {0, 0, 0}, // VkOffset3D imageOffset + {width, height, 1} // VkExtent3D imageExtent + }; + vkCmdCopyBufferToImage(g_command_buffer_mgr->GetCurrentInitCommandBuffer(), upload_buffer, + m_texture->GetImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, + &image_copy); } void TextureCache::TCacheEntry::FromRenderTarget(bool is_depth_copy, const EFBRectangle& src_rect, @@ -544,6 +527,11 @@ bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int l { _assert_(level < config.levels); + // We can't dump compressed textures currently (it would mean drawing them to a RGBA8 + // framebuffer, and saving that). TextureCache does not call Save for custom textures + // anyway, so this is fine for now. + _assert_(config.format == HostTextureFormat::RGBA8); + // Determine dimensions of image we want to save. u32 level_width = std::max(1u, config.width >> level); u32 level_height = std::max(1u, config.height >> level); @@ -582,7 +570,8 @@ bool TextureCache::TCacheEntry::Save(const std::string& filename, unsigned int l // It's okay to throw this texture away immediately, since we're done with it, and // we blocked until the copy completed on the GPU anyway. bool result = TextureToPng(reinterpret_cast(staging_texture->GetMapPointer()), - staging_texture->GetRowStride(), filename, level_width, level_height); + static_cast(staging_texture->GetRowStride()), filename, + level_width, level_height); staging_texture->Unmap(); return result; diff --git a/Source/Core/VideoBackends/Vulkan/Util.cpp b/Source/Core/VideoBackends/Vulkan/Util.cpp index 5de96e23cb..a060cf3485 100644 --- a/Source/Core/VideoBackends/Vulkan/Util.cpp +++ b/Source/Core/VideoBackends/Vulkan/Util.cpp @@ -53,6 +53,20 @@ bool IsDepthFormat(VkFormat format) } } +bool IsCompressedFormat(VkFormat format) +{ + switch (format) + { + case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: + case VK_FORMAT_BC2_UNORM_BLOCK: + case VK_FORMAT_BC3_UNORM_BLOCK: + return true; + + default: + return false; + } +} + VkFormat GetLinearFormat(VkFormat format) { switch (format) @@ -74,6 +88,25 @@ VkFormat GetLinearFormat(VkFormat format) } } +VkFormat GetVkFormatForHostTextureFormat(HostTextureFormat format) +{ + switch (format) + { + case HostTextureFormat::DXT1: + return VK_FORMAT_BC1_RGBA_UNORM_BLOCK; + + case HostTextureFormat::DXT3: + return VK_FORMAT_BC2_UNORM_BLOCK; + + case HostTextureFormat::DXT5: + return VK_FORMAT_BC3_UNORM_BLOCK; + + case HostTextureFormat::RGBA8: + default: + return VK_FORMAT_R8G8B8A8_UNORM; + } +} + u32 GetTexelSize(VkFormat format) { // Only contains pixel formats we use. @@ -91,12 +124,59 @@ u32 GetTexelSize(VkFormat format) case VK_FORMAT_B8G8R8A8_UNORM: return 4; + case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: + return 8; + + case VK_FORMAT_BC2_UNORM_BLOCK: + case VK_FORMAT_BC3_UNORM_BLOCK: + return 16; + default: PanicAlert("Unhandled pixel format"); return 1; } } +u32 GetBlockSize(VkFormat format) +{ + switch (format) + { + case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: + case VK_FORMAT_BC2_UNORM_BLOCK: + case VK_FORMAT_BC3_UNORM_BLOCK: + return 4; + + default: + return 1; + } +} + + +size_t GetPitchForTexture(VkFormat format, u32 row_length) +{ + switch (format) + { + case VK_FORMAT_BC1_RGBA_UNORM_BLOCK: + return static_cast(std::max(1u, row_length / 4)) * 8; + + case VK_FORMAT_BC2_UNORM_BLOCK: + return static_cast(std::max(1u, row_length / 4)) * 16; + + case VK_FORMAT_BC3_UNORM_BLOCK: + return static_cast(std::max(1u, row_length / 4)) * 16; + + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_R32_SFLOAT: + case VK_FORMAT_D32_SFLOAT: + return static_cast(row_length) * 4; + + default: + PanicAlert("Unhandled pixel format"); + return row_length; + } +} + VkRect2D ClampRect2D(const VkRect2D& rect, u32 width, u32 height) { VkRect2D out; diff --git a/Source/Core/VideoBackends/Vulkan/Util.h b/Source/Core/VideoBackends/Vulkan/Util.h index d61c699bfe..f4596b89f7 100644 --- a/Source/Core/VideoBackends/Vulkan/Util.h +++ b/Source/Core/VideoBackends/Vulkan/Util.h @@ -25,8 +25,13 @@ size_t AlignBufferOffset(size_t offset, size_t alignment); u32 MakeRGBA8Color(float r, float g, float b, float a); bool IsDepthFormat(VkFormat format); +bool IsCompressedFormat(VkFormat format); VkFormat GetLinearFormat(VkFormat format); +VkFormat GetVkFormatForHostTextureFormat(HostTextureFormat format); u32 GetTexelSize(VkFormat format); +u32 GetBlockSize(VkFormat format); + +size_t GetPitchForTexture(VkFormat format, u32 row_length); // Clamps a VkRect2D to the specified dimensions. VkRect2D ClampRect2D(const VkRect2D& rect, u32 width, u32 height); diff --git a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp index e51dad8091..8978cd2df6 100644 --- a/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp +++ b/Source/Core/VideoBackends/Vulkan/VulkanContext.cpp @@ -284,6 +284,9 @@ void VulkanContext::PopulateBackendInfoFeatures(VideoConfig* config, VkPhysicalD config->backend_info.bSupportsDepthClamp = (features.depthClamp == VK_TRUE && features.shaderClipDistance == VK_TRUE); + // textureCompressionBC implies BC1 through BC7, which is a superset of DXT1/3/5, which we need. + config->backend_info.bSupportsST3CTextures = features.textureCompressionBC == VK_TRUE; + // Our usage of primitive restart appears to be broken on AMD's binary drivers. // Seems to be fine on GCN Gen 1-2, unconfirmed on GCN Gen 3, causes driver resets on GCN Gen 4. if (DriverDetails::HasBug(DriverDetails::BUG_PRIMITIVE_RESTART)) @@ -460,6 +463,7 @@ bool VulkanContext::SelectDeviceFeatures() m_device_features.occlusionQueryPrecise = available_features.occlusionQueryPrecise; m_device_features.shaderClipDistance = available_features.shaderClipDistance; m_device_features.depthClamp = available_features.depthClamp; + m_device_features.textureCompressionBC = available_features.textureCompressionBC; return true; }