Vulkan: Faster path for decoding XFB data
Using a texel buffer as the copy destination removes the need to copy to an intermediate texture first.
This commit is contained in:
parent
804cd0ff03
commit
58978c1440
|
@ -48,7 +48,6 @@ public:
|
||||||
|
|
||||||
static TextureCache* GetInstance();
|
static TextureCache* GetInstance();
|
||||||
|
|
||||||
StreamBuffer* GetUploadBuffer() const { return m_texture_upload_buffer.get(); }
|
|
||||||
TextureConverter* GetTextureConverter() const { return m_texture_converter.get(); }
|
TextureConverter* GetTextureConverter() const { return m_texture_converter.get(); }
|
||||||
bool Initialize();
|
bool Initialize();
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,8 @@ TextureConverter::~TextureConverter()
|
||||||
|
|
||||||
if (m_texel_buffer_view_r16_uint != VK_NULL_HANDLE)
|
if (m_texel_buffer_view_r16_uint != VK_NULL_HANDLE)
|
||||||
vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r16_uint, nullptr);
|
vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r16_uint, nullptr);
|
||||||
|
if (m_texel_buffer_view_rgba8_unorm != VK_NULL_HANDLE)
|
||||||
|
vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_rgba8_unorm, nullptr);
|
||||||
|
|
||||||
if (m_encoding_render_pass != VK_NULL_HANDLE)
|
if (m_encoding_render_pass != VK_NULL_HANDLE)
|
||||||
vkDestroyRenderPass(g_vulkan_context->GetDevice(), m_encoding_render_pass, nullptr);
|
vkDestroyRenderPass(g_vulkan_context->GetDevice(), m_encoding_render_pass, nullptr);
|
||||||
|
@ -110,6 +112,48 @@ bool TextureConverter::Initialize()
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TextureConverter::ReserveTexelBufferStorage(size_t size, size_t alignment)
|
||||||
|
{
|
||||||
|
// Enforce the minimum alignment for texture buffers on the device.
|
||||||
|
size_t actual_alignment =
|
||||||
|
std::max(static_cast<size_t>(g_vulkan_context->GetTexelBufferAlignment()), alignment);
|
||||||
|
if (m_texel_buffer->ReserveMemory(size, actual_alignment))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
WARN_LOG(VIDEO, "Executing command list while waiting for space in palette buffer");
|
||||||
|
Util::ExecuteCurrentCommandsAndRestoreState(false);
|
||||||
|
|
||||||
|
// This next call should never fail, since a command buffer is now in-flight and we can
|
||||||
|
// wait on the fence for the GPU to finish. If this returns false, it's probably because
|
||||||
|
// the device has been lost, which is fatal anyway.
|
||||||
|
if (!m_texel_buffer->ReserveMemory(size, actual_alignment))
|
||||||
|
{
|
||||||
|
PanicAlert("Failed to allocate space for texture conversion");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkCommandBuffer
|
||||||
|
TextureConverter::GetCommandBufferForTextureConversion(const TextureCache::TCacheEntry* src_entry)
|
||||||
|
{
|
||||||
|
// EFB copies can be used as paletted textures as well. For these, we can't assume them to be
|
||||||
|
// contain the correct data before the frame begins (when the init command buffer is executed),
|
||||||
|
// so we must convert them at the appropriate time, during the drawing command buffer.
|
||||||
|
if (src_entry->IsEfbCopy())
|
||||||
|
{
|
||||||
|
StateTracker::GetInstance()->EndRenderPass();
|
||||||
|
StateTracker::GetInstance()->SetPendingRebind();
|
||||||
|
return g_command_buffer_mgr->GetCurrentCommandBuffer();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Use initialization command buffer and perform conversion before the drawing commands.
|
||||||
|
return g_command_buffer_mgr->GetCurrentInitCommandBuffer();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void TextureConverter::ConvertTexture(TextureCache::TCacheEntry* dst_entry,
|
void TextureConverter::ConvertTexture(TextureCache::TCacheEntry* dst_entry,
|
||||||
TextureCache::TCacheEntry* src_entry,
|
TextureCache::TCacheEntry* src_entry,
|
||||||
VkRenderPass render_pass, const void* palette,
|
VkRenderPass render_pass, const void* palette,
|
||||||
|
@ -126,44 +170,16 @@ void TextureConverter::ConvertTexture(TextureCache::TCacheEntry* dst_entry,
|
||||||
_assert_(dst_entry->config.rendertarget);
|
_assert_(dst_entry->config.rendertarget);
|
||||||
|
|
||||||
// We want to align to 2 bytes (R16) or the device's texel buffer alignment, whichever is greater.
|
// We want to align to 2 bytes (R16) or the device's texel buffer alignment, whichever is greater.
|
||||||
VkDeviceSize texel_buffer_alignment =
|
|
||||||
std::min(g_vulkan_context->GetTexelBufferAlignment(), sizeof(u16));
|
|
||||||
size_t palette_size = (src_entry->format & 0xF) == GX_TF_I4 ? 32 : 512;
|
size_t palette_size = (src_entry->format & 0xF) == GX_TF_I4 ? 32 : 512;
|
||||||
|
if (!ReserveTexelBufferStorage(palette_size, sizeof(u16)))
|
||||||
// Allocate memory for the palette, and descriptor sets for the buffer.
|
return;
|
||||||
// If any of these fail, execute a command buffer, and try again.
|
|
||||||
if (!m_texel_buffer->ReserveMemory(palette_size, texel_buffer_alignment))
|
|
||||||
{
|
|
||||||
WARN_LOG(VIDEO, "Executing command list while waiting for space in palette buffer");
|
|
||||||
Util::ExecuteCurrentCommandsAndRestoreState(false);
|
|
||||||
|
|
||||||
if (!m_texel_buffer->ReserveMemory(palette_size, texel_buffer_alignment))
|
|
||||||
{
|
|
||||||
PanicAlert("Failed to allocate space for texture conversion");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy in palette to texel buffer.
|
// Copy in palette to texel buffer.
|
||||||
u32 palette_offset = static_cast<u32>(m_texel_buffer->GetCurrentOffset());
|
u32 palette_offset = static_cast<u32>(m_texel_buffer->GetCurrentOffset());
|
||||||
memcpy(m_texel_buffer->GetCurrentHostPointer(), palette, palette_size);
|
memcpy(m_texel_buffer->GetCurrentHostPointer(), palette, palette_size);
|
||||||
m_texel_buffer->CommitMemory(palette_size);
|
m_texel_buffer->CommitMemory(palette_size);
|
||||||
|
|
||||||
// EFB copies can be used as paletted textures as well. For these, we can't assume them to be
|
VkCommandBuffer command_buffer = GetCommandBufferForTextureConversion(src_entry);
|
||||||
// contain the correct data before the frame begins (when the init command buffer is executed),
|
|
||||||
// so we must convert them at the appropriate time, during the drawing command buffer.
|
|
||||||
VkCommandBuffer command_buffer;
|
|
||||||
if (src_entry->IsEfbCopy())
|
|
||||||
{
|
|
||||||
command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer();
|
|
||||||
StateTracker::GetInstance()->EndRenderPass();
|
|
||||||
StateTracker::GetInstance()->SetPendingRebind();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Use initialization command buffer and perform conversion before the drawing commands.
|
|
||||||
command_buffer = g_command_buffer_mgr->GetCurrentInitCommandBuffer();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Bind and draw to the destination.
|
// Bind and draw to the destination.
|
||||||
UtilityShaderDraw draw(command_buffer,
|
UtilityShaderDraw draw(command_buffer,
|
||||||
|
@ -290,57 +306,58 @@ void TextureConverter::DecodeYUYVTextureFromMemory(TextureCache::TCacheEntry* ds
|
||||||
StateTracker::GetInstance()->EndRenderPass();
|
StateTracker::GetInstance()->EndRenderPass();
|
||||||
StateTracker::GetInstance()->SetPendingRebind();
|
StateTracker::GetInstance()->SetPendingRebind();
|
||||||
|
|
||||||
// We share the upload buffer with normal textures here, since the XFB buffers aren't very large.
|
// Pack each row without any padding in the texel buffer.
|
||||||
u32 upload_size = src_stride * src_height;
|
size_t upload_stride = src_width * sizeof(u16);
|
||||||
StreamBuffer* texture_upload_buffer = TextureCache::GetInstance()->GetUploadBuffer();
|
size_t upload_size = upload_stride * src_height;
|
||||||
if (!texture_upload_buffer->ReserveMemory(upload_size,
|
|
||||||
g_vulkan_context->GetBufferImageGranularity()))
|
// Reserve space in the texel buffer for storing the raw image.
|
||||||
|
if (!ReserveTexelBufferStorage(upload_size, sizeof(u16)))
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Handle pitch differences here.
|
||||||
|
if (src_stride != upload_stride)
|
||||||
{
|
{
|
||||||
// Execute the command buffer first.
|
const u8* src_row_ptr = reinterpret_cast<const u8*>(src_ptr);
|
||||||
WARN_LOG(VIDEO, "Executing command list while waiting for space in texture upload buffer");
|
u8* dst_row_ptr = m_texel_buffer->GetCurrentHostPointer();
|
||||||
Util::ExecuteCurrentCommandsAndRestoreState(false);
|
size_t copy_size = std::min(upload_stride, static_cast<size_t>(src_stride));
|
||||||
if (!texture_upload_buffer->ReserveMemory(upload_size,
|
for (u32 row = 0; row < src_height; row++)
|
||||||
g_vulkan_context->GetBufferImageGranularity()))
|
{
|
||||||
PanicAlert("Failed to allocate space in texture upload buffer");
|
std::memcpy(dst_row_ptr, src_row_ptr, copy_size);
|
||||||
|
src_row_ptr += src_stride;
|
||||||
|
dst_row_ptr += upload_stride;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::memcpy(m_texel_buffer->GetCurrentHostPointer(), src_ptr, upload_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assume that each source row is not padded.
|
VkDeviceSize texel_buffer_offset = m_texel_buffer->GetCurrentOffset();
|
||||||
_assert_(src_stride == (src_width * sizeof(u16)));
|
m_texel_buffer->CommitMemory(upload_size);
|
||||||
VkDeviceSize image_upload_buffer_offset = texture_upload_buffer->GetCurrentOffset();
|
|
||||||
std::memcpy(texture_upload_buffer->GetCurrentHostPointer(), src_ptr, upload_size);
|
|
||||||
texture_upload_buffer->CommitMemory(upload_size);
|
|
||||||
|
|
||||||
// Copy from the upload buffer to the intermediate texture. We borrow this from the encoder.
|
dst_texture->GetTexture()->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(),
|
||||||
// The width is specified as half here because we have two pixels packed in each RGBA texel.
|
|
||||||
// In the future this could be skipped by reading the upload buffer as a uniform texel buffer.
|
|
||||||
VkBufferImageCopy image_copy = {
|
|
||||||
image_upload_buffer_offset, // VkDeviceSize bufferOffset
|
|
||||||
0, // uint32_t bufferRowLength
|
|
||||||
0, // uint32_t bufferImageHeight
|
|
||||||
{VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}, // VkImageSubresourceLayers imageSubresource
|
|
||||||
{0, 0, 0}, // VkOffset3D imageOffset
|
|
||||||
{src_width / 2, src_height, 1} // VkExtent3D imageExtent
|
|
||||||
};
|
|
||||||
VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer();
|
|
||||||
m_encoding_render_texture->TransitionToLayout(command_buffer,
|
|
||||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
|
||||||
vkCmdCopyBufferToImage(command_buffer, texture_upload_buffer->GetBuffer(),
|
|
||||||
m_encoding_render_texture->GetImage(),
|
|
||||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy);
|
|
||||||
m_encoding_render_texture->TransitionToLayout(command_buffer,
|
|
||||||
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
|
||||||
dst_texture->GetTexture()->TransitionToLayout(command_buffer,
|
|
||||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
||||||
|
|
||||||
|
// We divide the offset by 4 here because we're fetching RGBA8 elements.
|
||||||
|
// The stride is in RGBA8 elements, so we divide by two because our data is two bytes per pixel.
|
||||||
|
struct PSUniformBlock
|
||||||
|
{
|
||||||
|
int buffer_offset;
|
||||||
|
int src_stride;
|
||||||
|
};
|
||||||
|
PSUniformBlock push_constants = {static_cast<int>(texel_buffer_offset / sizeof(u32)),
|
||||||
|
static_cast<int>(src_width / 2)};
|
||||||
|
|
||||||
// Convert from the YUYV data now in the intermediate texture to RGBA in the destination.
|
// Convert from the YUYV data now in the intermediate texture to RGBA in the destination.
|
||||||
UtilityShaderDraw draw(command_buffer,
|
UtilityShaderDraw draw(g_command_buffer_mgr->GetCurrentCommandBuffer(),
|
||||||
g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD),
|
g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_TEXTURE_CONVERSION),
|
||||||
m_encoding_render_pass, g_object_cache->GetScreenQuadVertexShader(),
|
m_encoding_render_pass, g_object_cache->GetScreenQuadVertexShader(),
|
||||||
VK_NULL_HANDLE, m_yuyv_to_rgb_shader);
|
VK_NULL_HANDLE, m_yuyv_to_rgb_shader);
|
||||||
VkRect2D region = {{0, 0}, {src_width, src_height}};
|
VkRect2D region = {{0, 0}, {src_width, src_height}};
|
||||||
draw.BeginRenderPass(dst_texture->GetFramebuffer(), region);
|
draw.BeginRenderPass(dst_texture->GetFramebuffer(), region);
|
||||||
draw.SetViewportAndScissor(0, 0, static_cast<int>(src_width), static_cast<int>(src_height));
|
draw.SetViewportAndScissor(0, 0, static_cast<int>(src_width), static_cast<int>(src_height));
|
||||||
draw.SetPSSampler(0, m_encoding_render_texture->GetView(), g_object_cache->GetPointSampler());
|
draw.SetPSTexelBuffer(m_texel_buffer_view_rgba8_unorm);
|
||||||
|
draw.SetPushConstants(&push_constants, sizeof(push_constants));
|
||||||
draw.DrawWithoutVertexBuffer(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, 4);
|
draw.DrawWithoutVertexBuffer(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, 4);
|
||||||
draw.EndRenderPass();
|
draw.EndRenderPass();
|
||||||
}
|
}
|
||||||
|
@ -361,7 +378,9 @@ bool TextureConverter::CreateTexelBuffer()
|
||||||
|
|
||||||
// Create views of the formats that we will be using.
|
// Create views of the formats that we will be using.
|
||||||
m_texel_buffer_view_r16_uint = CreateTexelBufferView(VK_FORMAT_R16_UINT);
|
m_texel_buffer_view_r16_uint = CreateTexelBufferView(VK_FORMAT_R16_UINT);
|
||||||
return m_texel_buffer_view_r16_uint != VK_NULL_HANDLE;
|
m_texel_buffer_view_rgba8_unorm = CreateTexelBufferView(VK_FORMAT_R8G8B8A8_UNORM);
|
||||||
|
return m_texel_buffer_view_r16_uint != VK_NULL_HANDLE &&
|
||||||
|
m_texel_buffer_view_rgba8_unorm != VK_NULL_HANDLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
VkBufferView TextureConverter::CreateTexelBufferView(VkFormat format) const
|
VkBufferView TextureConverter::CreateTexelBufferView(VkFormat format) const
|
||||||
|
@ -614,17 +633,21 @@ bool TextureConverter::CompileYUYVConversionShaders()
|
||||||
)";
|
)";
|
||||||
|
|
||||||
static const char YUYV_TO_RGB_SHADER_SOURCE[] = R"(
|
static const char YUYV_TO_RGB_SHADER_SOURCE[] = R"(
|
||||||
SAMPLER_BINDING(0) uniform sampler2D source;
|
layout(std140, push_constant) uniform PCBlock
|
||||||
|
{
|
||||||
|
int buffer_offset;
|
||||||
|
int src_stride;
|
||||||
|
} PC;
|
||||||
|
|
||||||
|
TEXEL_BUFFER_BINDING(0) uniform samplerBuffer source;
|
||||||
layout(location = 0) in vec3 uv0;
|
layout(location = 0) in vec3 uv0;
|
||||||
layout(location = 0) out vec4 ocol0;
|
layout(location = 0) out vec4 ocol0;
|
||||||
|
|
||||||
void main()
|
void main()
|
||||||
{
|
{
|
||||||
ivec2 uv = ivec2(gl_FragCoord.xy);
|
ivec2 uv = ivec2(gl_FragCoord.xy);
|
||||||
vec4 c0 = texelFetch(source, ivec2(uv.x / 2, uv.y), 0);
|
int buffer_pos = PC.buffer_offset + uv.y * PC.src_stride + (uv.x / 2);
|
||||||
|
vec4 c0 = texelFetch(source, buffer_pos);
|
||||||
// The texture used to stage the upload is in BGRA order.
|
|
||||||
c0 = c0.zyxw;
|
|
||||||
|
|
||||||
float y = mix(c0.r, c0.b, (uv.x & 1) == 1);
|
float y = mix(c0.r, c0.b, (uv.x & 1) == 1);
|
||||||
float yComp = 1.164 * (y - 0.0625);
|
float yComp = 1.164 * (y - 0.0625);
|
||||||
|
|
|
@ -65,9 +65,21 @@ private:
|
||||||
|
|
||||||
bool CompileYUYVConversionShaders();
|
bool CompileYUYVConversionShaders();
|
||||||
|
|
||||||
|
// Allocates storage in the texel command buffer of the specified size.
|
||||||
|
// If the buffer does not have enough space, executes the current command buffer and tries again.
|
||||||
|
// If this is done, g_command_buffer_mgr->GetCurrentCommandBuffer() will return a different value,
|
||||||
|
// so it always should be re-obtained after calling this method.
|
||||||
|
// Once the data copy is done, call m_texel_buffer->CommitMemory(size).
|
||||||
|
bool ReserveTexelBufferStorage(size_t size, size_t alignment);
|
||||||
|
|
||||||
|
// Returns the command buffer that the texture conversion should occur in for the given texture.
|
||||||
|
// This can be the initialization/copy command buffer, or the drawing command buffer.
|
||||||
|
VkCommandBuffer GetCommandBufferForTextureConversion(const TextureCache::TCacheEntry* src_entry);
|
||||||
|
|
||||||
// Shared between conversion types
|
// Shared between conversion types
|
||||||
std::unique_ptr<StreamBuffer> m_texel_buffer;
|
std::unique_ptr<StreamBuffer> m_texel_buffer;
|
||||||
VkBufferView m_texel_buffer_view_r16_uint = VK_NULL_HANDLE;
|
VkBufferView m_texel_buffer_view_r16_uint = VK_NULL_HANDLE;
|
||||||
|
VkBufferView m_texel_buffer_view_rgba8_unorm = VK_NULL_HANDLE;
|
||||||
size_t m_texel_buffer_size = 0;
|
size_t m_texel_buffer_size = 0;
|
||||||
|
|
||||||
// Palette conversion - taking an indexed texture and applying palette
|
// Palette conversion - taking an indexed texture and applying palette
|
||||||
|
|
Loading…
Reference in New Issue