GPU/HW: Use SSBO+compute shader for VRAM readbacks

This commit is contained in:
Connor McLaughlin 2021-03-04 14:12:16 +10:00
parent 3405041bda
commit 9fc4face66
11 changed files with 318 additions and 78 deletions

View File

@ -402,6 +402,46 @@ void GraphicsPipelineBuilder::SetRenderPass(VkRenderPass render_pass, u32 subpas
m_ci.subpass = subpass;
}
ComputePipelineBuilder::ComputePipelineBuilder()
{
Clear();
}
void ComputePipelineBuilder::Clear()
{
m_ci = {};
m_ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
}
VkPipeline ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear /* = true */)
{
VkPipeline pipeline;
VkResult res = vkCreateComputePipelines(device, pipeline_cache, 1, &m_ci, nullptr, &pipeline);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateComputePipelines() failed: ");
return VK_NULL_HANDLE;
}
if (clear)
Clear();
return pipeline;
}
void ComputePipelineBuilder::SetShader(VkShaderModule module, const char* entry_point)
{
m_ci.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
m_ci.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
m_ci.stage.module = module;
m_ci.stage.pName = entry_point;
}
void ComputePipelineBuilder::SetPipelineLayout(VkPipelineLayout layout)
{
m_ci.layout = layout;
}
SamplerBuilder::SamplerBuilder()
{
Clear();
@ -542,7 +582,7 @@ void DescriptorSetUpdateBuilder::AddCombinedImageSamplerDescriptorWrite(
}
void DescriptorSetUpdateBuilder::AddBufferDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype,
VkBuffer buffer, u32 offset, u32 size)
VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size)
{
Assert(m_num_writes < MAX_WRITES && m_num_infos < MAX_INFOS);

View File

@ -138,6 +138,23 @@ private:
VkPipelineMultisampleStateCreateInfo m_multisample_state;
};
class ComputePipelineBuilder
{
public:
ComputePipelineBuilder();
void Clear();
VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache = VK_NULL_HANDLE, bool clear = true);
void SetShader(VkShaderModule module, const char* entry_point);
void SetPipelineLayout(VkPipelineLayout layout);
private:
VkComputePipelineCreateInfo m_ci;
};
class SamplerBuilder
{
public:
@ -177,8 +194,8 @@ public:
void AddSamplerDescriptorWrite(VkDescriptorSet set, u32 binding, VkSampler sampler);
void AddCombinedImageSamplerDescriptorWrite(VkDescriptorSet set, u32 binding, VkImageView view, VkSampler sampler,
VkImageLayout layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
void AddBufferDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype, VkBuffer buffer, u32 offset,
u32 size);
void AddBufferDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype, VkBuffer buffer,
VkDeviceSize offset, VkDeviceSize size);
void AddBufferViewDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype, VkBufferView view);
private:

View File

@ -1,4 +1,5 @@
#include "gpu_hw_d3d11.h"
#include "common/align.h"
#include "common/assert.h"
#include "common/d3d11/shader_compiler.h"
#include "common/log.h"
@ -363,14 +364,37 @@ bool GPU_HW_D3D11::CreateTextureBuffer()
const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(D3D11_SRV_DIMENSION_BUFFER, DXGI_FORMAT_R16_UINT, 0,
VRAM_UPDATE_TEXTURE_BUFFER_SIZE / sizeof(u16));
const HRESULT hr = m_device->CreateShaderResourceView(m_texture_stream_buffer.GetD3DBuffer(), &srv_desc,
m_texture_stream_buffer_srv_r16ui.ReleaseAndGetAddressOf());
HRESULT hr = m_device->CreateShaderResourceView(m_texture_stream_buffer.GetD3DBuffer(), &srv_desc,
m_texture_stream_buffer_srv_r16ui.ReleaseAndGetAddressOf());
if (FAILED(hr))
{
Log_ErrorPrintf("Creation of texture buffer SRV failed: 0x%08X", hr);
return false;
}
const u32 buffer_elements = (VRAM_WIDTH / 2) * VRAM_HEIGHT;
const CD3D11_BUFFER_DESC read_buffer_desc(buffer_elements * sizeof(u32), D3D11_BIND_UNORDERED_ACCESS,
D3D11_USAGE_DEFAULT, 0, 0, sizeof(u32));
const CD3D11_BUFFER_DESC staging_buffer_desc(buffer_elements * sizeof(u32), 0, D3D11_USAGE_STAGING,
D3D11_CPU_ACCESS_READ, 0, 0);
const CD3D11_UNORDERED_ACCESS_VIEW_DESC uav_desc(D3D11_UAV_DIMENSION_BUFFER, DXGI_FORMAT_R32_UINT, 0, buffer_elements,
0);
hr = m_device->CreateBuffer(&read_buffer_desc, nullptr, m_vram_read_buffer.ReleaseAndGetAddressOf());
if (SUCCEEDED(hr))
{
hr = m_device->CreateBuffer(&staging_buffer_desc, nullptr, m_vram_read_staging_buffer.ReleaseAndGetAddressOf());
if (SUCCEEDED(hr))
{
hr = m_device->CreateUnorderedAccessView(m_vram_read_buffer.Get(), &uav_desc,
m_vram_read_buffer_view.ReleaseAndGetAddressOf());
}
}
if (FAILED(hr))
{
Log_ErrorPrintf("Creation of buffer/UAV failed: 0x%08X", hr);
return false;
}
return true;
}
@ -612,6 +636,10 @@ bool GPU_HW_D3D11::CompileShaders()
if (!m_vram_read_pixel_shader)
return false;
m_vram_read_compute_shader = shader_cache.GetComputeShader(m_device.Get(), shadergen.GenerateVRAMReadComputeShader());
if (!m_vram_read_compute_shader)
return false;
UPDATE_PROGRESS();
m_vram_write_pixel_shader =
@ -946,6 +974,7 @@ void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
const u32 encoded_height = copy_rect.GetHeight();
#if 0
// Encode the 24-bit texture as 16-bit.
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
m_context->RSSetState(m_cull_none_rasterizer_state_no_msaa.Get());
@ -971,6 +1000,50 @@ void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
}
RestoreGraphicsAPIState();
#else
// Encode the 24-bit texture as 16-bit.
const u32 uniforms[5] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight(), encoded_width};
const auto res = m_uniform_stream_buffer.Map(m_context.Get(), MAX_UNIFORM_BUFFER_SIZE, sizeof(uniforms));
std::memcpy(res.pointer, uniforms, sizeof(uniforms));
m_uniform_stream_buffer.Unmap(m_context.Get(), sizeof(uniforms));
m_context->CSSetConstantBuffers(0, 1, m_uniform_stream_buffer.GetD3DBufferArray());
m_context->OMSetRenderTargets(0, nullptr, nullptr);
m_context->CSSetUnorderedAccessViews(0, 1, m_vram_read_buffer_view.GetAddressOf(), nullptr);
m_context->CSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
m_context->CSSetShader(m_vram_read_compute_shader.Get(), nullptr, 0);
const u32 groups_x = (encoded_width + 7) / 8;
const u32 groups_y = (encoded_height + 7) / 8;
m_context->Dispatch(groups_x, groups_y, 1);
ID3D11ShaderResourceView* null_view[1] = {nullptr};
m_context->CSSetShaderResources(0, 1, null_view);
m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), m_vram_depth_view.Get());
const CD3D11_BOX copy_box(0, 0, 0, static_cast<LONG>(encoded_width * encoded_height * sizeof(u32)), 1, 1);
m_context->CopySubresourceRegion(m_vram_read_staging_buffer.Get(), 0, 0, 0, 0, m_vram_read_buffer.Get(), 0,
&copy_box);
D3D11_MAPPED_SUBRESOURCE msr;
HRESULT hr = m_context->Map(m_vram_read_staging_buffer.Get(), 0, D3D11_MAP_READ, 0, &msr);
if (FAILED(hr))
{
Log_ErrorPrintf("Failed to map VRAM readback buffer");
return;
}
u16* dst_ptr = &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left];
const u8* src_ptr = static_cast<const u8*>(msr.pData);
for (u32 row = 0; row < encoded_height; row++)
{
std::memcpy(dst_ptr, src_ptr, sizeof(u32) * encoded_width);
src_ptr += sizeof(u32) * encoded_width;
dst_ptr += VRAM_WIDTH;
}
m_context->Unmap(m_vram_read_staging_buffer.Get(), 0);
#endif
}
void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)

View File

@ -84,7 +84,6 @@ private:
D3D11::Texture m_vram_depth_texture;
ComPtr<ID3D11DepthStencilView> m_vram_depth_view;
D3D11::Texture m_vram_read_texture;
D3D11::Texture m_vram_encoding_texture;
D3D11::Texture m_display_texture;
D3D11::StreamBuffer m_vertex_stream_buffer;
@ -93,7 +92,9 @@ private:
D3D11::StreamBuffer m_texture_stream_buffer;
D3D11::StagingTexture m_vram_readback_texture;
ComPtr<ID3D11Buffer> m_vram_read_buffer;
ComPtr<ID3D11Buffer> m_vram_read_staging_buffer;
ComPtr<ID3D11UnorderedAccessView> m_vram_read_buffer_view;
ComPtr<ID3D11ShaderResourceView> m_texture_stream_buffer_srv_r16ui;
@ -123,7 +124,7 @@ private:
ComPtr<ID3D11PixelShader> m_copy_pixel_shader;
ComPtr<ID3D11PixelShader> m_vram_fill_pixel_shader;
ComPtr<ID3D11PixelShader> m_vram_interlaced_fill_pixel_shader;
ComPtr<ID3D11PixelShader> m_vram_read_pixel_shader;
ComPtr<ID3D11ComputeShader> m_vram_read_compute_shader;
ComPtr<ID3D11PixelShader> m_vram_write_pixel_shader;
ComPtr<ID3D11PixelShader> m_vram_copy_pixel_shader;
ComPtr<ID3D11PixelShader> m_vram_update_depth_pixel_shader;
@ -139,4 +140,9 @@ private:
D3D11::Texture m_downsample_texture;
D3D11::Texture m_downsample_weight_texture;
std::vector<std::pair<ComPtr<ID3D11ShaderResourceView>, ComPtr<ID3D11RenderTargetView>>> m_downsample_mip_views;
// fallback vram read
D3D11::Texture m_vram_encoding_texture;
D3D11::StagingTexture m_vram_readback_texture;
ComPtr<ID3D11PixelShader> m_vram_read_pixel_shader;
};

View File

@ -1192,6 +1192,88 @@ uint SampleVRAM(uint2 coords)
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateVRAMReadComputeShader()
{
std::stringstream ss;
WriteHeader(ss);
WriteCommonFunctions(ss);
DeclareUniformBuffer(ss, {"uint2 u_base_coords", "uint2 u_size", "uint u_buffer_stride"}, true);
DeclareTexture(ss, "samp0", 0, UsingMSAA());
if (m_glsl)
{
ss << "layout(std430";
if (IsVulkan())
ss << ", set = 0, binding = 2";
else if (m_use_glsl_binding_layout)
ss << ", binding = 1";
ss << ") restrict writeonly buffer SSBO {\n";
ss << " uint s_output_buffer[];\n";
ss << "};\n";
}
else
{
ss << "RWBuffer<uint> s_output_buffer : register(u0);\n";
}
ss << R"(
float4 LoadVRAM(int2 coords)
{
#if MULTISAMPLING
float4 value = LOAD_TEXTURE_MS(samp0, coords, 0u);
for (uint sample_index = 1u; sample_index < MULTISAMPLES; sample_index++)
value += LOAD_TEXTURE_MS(samp0, coords, sample_index);
value /= float(MULTISAMPLES);
return value;
#else
return LOAD_TEXTURE(samp0, coords, 0);
#endif
}
uint SampleVRAM(uint2 coords)
{
if (RESOLUTION_SCALE == 1u)
return RGBA8ToRGBA5551(LoadVRAM(int2(coords)));
// Box filter for downsampling.
float4 value = float4(0.0, 0.0, 0.0, 0.0);
uint2 base_coords = coords * uint2(RESOLUTION_SCALE, RESOLUTION_SCALE);
for (uint offset_x = 0u; offset_x < RESOLUTION_SCALE; offset_x++)
{
for (uint offset_y = 0u; offset_y < RESOLUTION_SCALE; offset_y++)
value += LoadVRAM(int2(base_coords + uint2(offset_x, offset_y)));
}
value /= float(RESOLUTION_SCALE * RESOLUTION_SCALE);
return RGBA8ToRGBA5551(value);
}
)";
DeclareComputeEntryPoint(ss, 8, 8, 1);
ss << R"(
{
uint2 sample_coords = uint2(uint(c_global_id.x) * 2u, uint(c_global_id.y));
#if API_OPENGL || API_OPENGL_ES
// Lower-left origin flip for OpenGL.
// We want to write the image out upside-down so we can read it top-to-bottom.
sample_coords.y = u_size.y - sample_coords.y - 1u;
#endif
sample_coords += u_base_coords;
// We're encoding as 32-bit, so the output width is halved and we pack two 16-bit pixels in one 32-bit pixel.
uint left = SampleVRAM(sample_coords);
uint right = SampleVRAM(uint2(sample_coords.x + 1u, sample_coords.y));
uint buffer_offset = c_global_id.y * u_buffer_stride + c_global_id.x;
s_output_buffer[buffer_offset] = left | (right << 16);
})";
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo)
{
std::stringstream ss;

View File

@ -17,6 +17,7 @@ public:
std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode,
bool smooth_chroma);
std::string GenerateVRAMReadFragmentShader();
std::string GenerateVRAMReadComputeShader();
std::string GenerateVRAMWriteFragmentShader(bool use_ssbo);
std::string GenerateVRAMCopyFragmentShader();
std::string GenerateVRAMUpdateDepthFragmentShader();

View File

@ -360,10 +360,12 @@ void GPU_HW_Vulkan::DestroyResources()
m_texture_stream_buffer.Destroy(false);
Vulkan::Util::SafeDestroyPipelineLayout(m_vram_write_pipeline_layout);
Vulkan::Util::SafeDestroyPipelineLayout(m_vram_read_pipeline_layout);
Vulkan::Util::SafeDestroyPipelineLayout(m_single_sampler_pipeline_layout);
Vulkan::Util::SafeDestroyPipelineLayout(m_no_samplers_pipeline_layout);
Vulkan::Util::SafeDestroyPipelineLayout(m_batch_pipeline_layout);
Vulkan::Util::SafeDestroyDescriptorSetLayout(m_vram_write_descriptor_set_layout);
Vulkan::Util::SafeDestroyDescriptorSetLayout(m_vram_read_descriptor_set_layout);
Vulkan::Util::SafeDestroyDescriptorSetLayout(m_single_sampler_descriptor_set_layout);
Vulkan::Util::SafeDestroyDescriptorSetLayout(m_batch_descriptor_set_layout);
Vulkan::Util::SafeDestroySampler(m_point_sampler);
@ -431,6 +433,12 @@ bool GPU_HW_Vulkan::CreatePipelineLayouts()
if (m_vram_write_descriptor_set_layout == VK_NULL_HANDLE)
return false;
dslbuilder.AddBinding(1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT);
dslbuilder.AddBinding(2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT);
m_vram_read_descriptor_set_layout = dslbuilder.Create(device);
if (m_vram_read_descriptor_set_layout == VK_NULL_HANDLE)
return false;
Vulkan::PipelineLayoutBuilder plbuilder;
plbuilder.AddDescriptorSet(m_batch_descriptor_set_layout);
m_batch_pipeline_layout = plbuilder.Create(device);
@ -448,6 +456,12 @@ bool GPU_HW_Vulkan::CreatePipelineLayouts()
if (m_no_samplers_pipeline_layout == VK_NULL_HANDLE)
return false;
plbuilder.AddDescriptorSet(m_vram_read_descriptor_set_layout);
plbuilder.AddPushConstants(VK_SHADER_STAGE_COMPUTE_BIT, 0, MAX_PUSH_CONSTANTS_SIZE);
m_vram_read_pipeline_layout = plbuilder.Create(device);
if (m_vram_read_pipeline_layout == VK_NULL_HANDLE)
return false;
plbuilder.AddDescriptorSet(m_vram_write_descriptor_set_layout);
plbuilder.AddPushConstants(VK_SHADER_STAGE_FRAGMENT_BIT, 0, MAX_PUSH_CONSTANTS_SIZE);
m_vram_write_pipeline_layout = plbuilder.Create(device);
@ -512,6 +526,7 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
const VkFormat texture_format = VK_FORMAT_R8G8B8A8_UNORM;
const VkFormat depth_format = VK_FORMAT_D16_UNORM;
const VkSampleCountFlagBits samples = static_cast<VkSampleCountFlagBits>(m_multisamples);
const u32 read_staging_buffer_size = (VRAM_WIDTH / 2) * VRAM_HEIGHT * sizeof(u32);
if (!m_vram_texture.Create(texture_width, texture_height, 1, 1, texture_format, samples, VK_IMAGE_VIEW_TYPE_2D,
VK_IMAGE_TILING_OPTIMAL,
@ -529,11 +544,9 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT) ||
!m_vram_readback_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT,
VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT) ||
!m_vram_readback_staging_texture.Create(Vulkan::StagingBuffer::Type::Readback, texture_format, VRAM_WIDTH / 2,
VRAM_HEIGHT))
!m_vram_read_staging_buffer.Create(Vulkan::StagingBuffer::Type::Readback, read_staging_buffer_size,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) ||
!m_vram_read_staging_buffer.Map())
{
return false;
}
@ -544,12 +557,9 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
g_vulkan_context->GetRenderPass(VK_FORMAT_UNDEFINED, depth_format, samples, VK_ATTACHMENT_LOAD_OP_DONT_CARE);
m_display_render_pass = g_vulkan_context->GetRenderPass(m_display_texture.GetFormat(), VK_FORMAT_UNDEFINED,
m_display_texture.GetSamples(), VK_ATTACHMENT_LOAD_OP_LOAD);
m_vram_readback_render_pass =
g_vulkan_context->GetRenderPass(m_vram_readback_texture.GetFormat(), VK_FORMAT_UNDEFINED,
m_vram_readback_texture.GetSamples(), VK_ATTACHMENT_LOAD_OP_DONT_CARE);
if (m_vram_render_pass == VK_NULL_HANDLE || m_vram_update_depth_render_pass == VK_NULL_HANDLE ||
m_display_render_pass == VK_NULL_HANDLE || m_vram_readback_render_pass == VK_NULL_HANDLE)
m_display_render_pass == VK_NULL_HANDLE)
{
return false;
}
@ -565,13 +575,9 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
return false;
m_vram_update_depth_framebuffer = m_vram_depth_texture.CreateFramebuffer(m_vram_update_depth_render_pass);
m_vram_readback_framebuffer = m_vram_readback_texture.CreateFramebuffer(m_vram_readback_render_pass);
m_display_framebuffer = m_display_texture.CreateFramebuffer(m_display_render_pass);
if (m_vram_update_depth_framebuffer == VK_NULL_HANDLE || m_vram_readback_framebuffer == VK_NULL_HANDLE ||
m_display_framebuffer == VK_NULL_HANDLE)
{
if (m_vram_update_depth_framebuffer == VK_NULL_HANDLE || m_display_framebuffer == VK_NULL_HANDLE)
return false;
}
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
@ -582,10 +588,13 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
m_batch_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_batch_descriptor_set_layout);
m_vram_copy_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout);
m_vram_read_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout);
m_vram_read_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_vram_read_descriptor_set_layout);
m_vram_update_depth_descriptor_set =
g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout);
m_display_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout);
if (m_batch_descriptor_set == VK_NULL_HANDLE || m_vram_copy_descriptor_set == VK_NULL_HANDLE ||
m_vram_read_descriptor_set == VK_NULL_HANDLE || m_display_descriptor_set == VK_NULL_HANDLE)
m_vram_read_descriptor_set == VK_NULL_HANDLE || m_vram_update_depth_descriptor_set == VK_NULL_HANDLE ||
m_display_descriptor_set == VK_NULL_HANDLE)
{
return false;
}
@ -598,6 +607,10 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_vram_read_descriptor_set, 1, m_vram_texture.GetView(),
m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
dsubuilder.AddBufferDescriptorWrite(m_vram_read_descriptor_set, 2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
m_vram_read_staging_buffer.GetBuffer(), 0, m_vram_read_staging_buffer.GetSize());
dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_vram_update_depth_descriptor_set, 1, m_vram_texture.GetView(),
m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_display_descriptor_set, 1, m_display_texture.GetView(),
m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
dsubuilder.Update(g_vulkan_context->GetDevice());
@ -743,21 +756,20 @@ void GPU_HW_Vulkan::DestroyFramebuffer()
m_downsample_weight_texture.Destroy(false);
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_batch_descriptor_set);
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_vram_update_depth_descriptor_set);
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_vram_copy_descriptor_set);
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_vram_read_descriptor_set);
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_display_descriptor_set);
Vulkan::Util::SafeDestroyFramebuffer(m_vram_framebuffer);
Vulkan::Util::SafeDestroyFramebuffer(m_vram_update_depth_framebuffer);
Vulkan::Util::SafeDestroyFramebuffer(m_vram_readback_framebuffer);
Vulkan::Util::SafeDestroyFramebuffer(m_display_framebuffer);
m_vram_read_texture.Destroy(false);
m_vram_depth_texture.Destroy(false);
m_vram_texture.Destroy(false);
m_vram_readback_texture.Destroy(false);
m_display_texture.Destroy(false);
m_vram_readback_staging_texture.Destroy(false);
m_vram_read_staging_buffer.Destroy(false);
}
bool GPU_HW_Vulkan::CreateVertexBuffer()
@ -883,6 +895,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
}
Vulkan::GraphicsPipelineBuilder gpbuilder;
Vulkan::ComputePipelineBuilder csbuilder;
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
for (u8 depth_test = 0; depth_test < 3; depth_test++)
@ -1104,22 +1117,16 @@ bool GPU_HW_Vulkan::CompilePipelines()
// VRAM read
{
VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMReadFragmentShader());
if (fs == VK_NULL_HANDLE)
VkShaderModule cs = g_vulkan_shader_cache->GetComputeShader(shadergen.GenerateVRAMReadComputeShader());
if (cs == VK_NULL_HANDLE)
return false;
gpbuilder.SetRenderPass(m_vram_readback_render_pass, 0);
gpbuilder.SetPipelineLayout(m_single_sampler_pipeline_layout);
gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader);
gpbuilder.SetFragmentShader(fs);
gpbuilder.SetNoCullRasterizationState();
gpbuilder.SetNoDepthTestState();
gpbuilder.SetNoBlendingState();
gpbuilder.SetDynamicViewportAndScissorState();
csbuilder.SetPipelineLayout(m_vram_read_pipeline_layout);
csbuilder.SetShader(cs, "main");
m_vram_readback_pipeline = gpbuilder.Create(device, pipeline_cache, false);
vkDestroyShaderModule(device, fs, nullptr);
if (m_vram_readback_pipeline == VK_NULL_HANDLE)
m_vram_read_pipeline = csbuilder.Create(device, pipeline_cache, false);
vkDestroyShaderModule(device, cs, nullptr);
if (m_vram_read_pipeline == VK_NULL_HANDLE)
return false;
UPDATE_PROGRESS();
@ -1257,7 +1264,7 @@ void GPU_HW_Vulkan::DestroyPipelines()
for (VkPipeline& p : m_vram_copy_pipelines)
Vulkan::Util::SafeDestroyPipeline(p);
Vulkan::Util::SafeDestroyPipeline(m_vram_readback_pipeline);
Vulkan::Util::SafeDestroyPipeline(m_vram_read_pipeline);
Vulkan::Util::SafeDestroyPipeline(m_vram_update_depth_pipeline);
Vulkan::Util::SafeDestroyPipeline(m_downsample_first_pass_pipeline);
@ -1427,41 +1434,37 @@ void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
const Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height);
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
const u32 encoded_height = copy_rect.GetHeight();
const u32 encoded_size = encoded_width * encoded_height * sizeof(u32);
EndRenderPass();
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
// Work around Mali driver bug: set full framebuffer size for render area. The GPU crashes with a page fault if we use
// the actual size we're rendering to...
BeginRenderPass(m_vram_readback_render_pass, m_vram_readback_framebuffer, 0, 0, m_vram_readback_texture.GetWidth(),
m_vram_readback_texture.GetHeight());
// Encode the 24-bit texture as 16-bit.
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_readback_pipeline);
vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
uniforms);
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1,
const u32 uniforms[5] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight(), encoded_width};
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_vram_read_pipeline);
vkCmdPushConstants(cmdbuf, m_vram_read_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uniforms), uniforms);
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_vram_read_pipeline_layout, 0, 1,
&m_vram_read_descriptor_set, 0, nullptr);
Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, encoded_width, encoded_height);
vkCmdDraw(cmdbuf, 3, 1, 0, 0);
EndRenderPass();
const u32 groups_x = (encoded_width + 7) / 8;
const u32 groups_y = (encoded_height + 7) / 8;
vkCmdDispatch(cmdbuf, groups_x, groups_y, 1);
m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
m_vram_read_staging_buffer.FlushGPUCache(cmdbuf, VK_ACCESS_SHADER_WRITE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
encoded_size);
g_vulkan_context->ExecuteCommandBuffer(true);
m_vram_read_staging_buffer.InvalidateCPUCache(0, encoded_size);
// Stage the readback.
m_vram_readback_staging_texture.CopyFromTexture(m_vram_readback_texture, 0, 0, 0, 0, 0, 0, encoded_width,
encoded_height);
// And copy it into our shadow buffer (will execute command buffer and stall).
m_vram_readback_staging_texture.ReadTexels(0, 0, encoded_width, encoded_height,
&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left],
VRAM_WIDTH * sizeof(u16));
u16* dst_ptr = &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left];
const char* src_ptr = static_cast<const char*>(m_vram_read_staging_buffer.GetMapPointer());
for (u32 row = 0; row < encoded_height; row++)
{
std::memcpy(dst_ptr, src_ptr, sizeof(u32) * encoded_width);
src_ptr += sizeof(u32) * encoded_width;
dst_ptr += VRAM_WIDTH;
}
RestoreGraphicsAPIState();
}
@ -1667,7 +1670,7 @@ void GPU_HW_Vulkan::UpdateDepthBufferFromMaskBit()
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_update_depth_pipeline);
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1,
&m_vram_read_descriptor_set, 0, nullptr);
&m_vram_update_depth_descriptor_set, 0, nullptr);
Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
vkCmdDraw(cmdbuf, 3, 1, 0, 0);

View File

@ -81,28 +81,27 @@ private:
VkRenderPass m_vram_render_pass = VK_NULL_HANDLE;
VkRenderPass m_vram_update_depth_render_pass = VK_NULL_HANDLE;
VkRenderPass m_display_render_pass = VK_NULL_HANDLE;
VkRenderPass m_vram_readback_render_pass = VK_NULL_HANDLE;
VkDescriptorSetLayout m_batch_descriptor_set_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_single_sampler_descriptor_set_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_vram_read_descriptor_set_layout = VK_NULL_HANDLE;
VkDescriptorSetLayout m_vram_write_descriptor_set_layout = VK_NULL_HANDLE;
VkPipelineLayout m_batch_pipeline_layout = VK_NULL_HANDLE;
VkPipelineLayout m_no_samplers_pipeline_layout = VK_NULL_HANDLE;
VkPipelineLayout m_single_sampler_pipeline_layout = VK_NULL_HANDLE;
VkPipelineLayout m_vram_read_pipeline_layout = VK_NULL_HANDLE;
VkPipelineLayout m_vram_write_pipeline_layout = VK_NULL_HANDLE;
Vulkan::Texture m_vram_texture;
Vulkan::Texture m_vram_depth_texture;
Vulkan::Texture m_vram_read_texture;
Vulkan::Texture m_vram_readback_texture;
Vulkan::StagingTexture m_vram_readback_staging_texture;
Vulkan::StagingBuffer m_vram_read_staging_buffer;
Vulkan::Texture m_display_texture;
bool m_use_ssbos_for_vram_writes = false;
VkFramebuffer m_vram_framebuffer = VK_NULL_HANDLE;
VkFramebuffer m_vram_update_depth_framebuffer = VK_NULL_HANDLE;
VkFramebuffer m_vram_readback_framebuffer = VK_NULL_HANDLE;
VkFramebuffer m_display_framebuffer = VK_NULL_HANDLE;
VkSampler m_point_sampler = VK_NULL_HANDLE;
@ -113,6 +112,7 @@ private:
VkDescriptorSet m_vram_copy_descriptor_set = VK_NULL_HANDLE;
VkDescriptorSet m_vram_read_descriptor_set = VK_NULL_HANDLE;
VkDescriptorSet m_vram_write_descriptor_set = VK_NULL_HANDLE;
VkDescriptorSet m_vram_update_depth_descriptor_set = VK_NULL_HANDLE;
VkDescriptorSet m_display_descriptor_set = VK_NULL_HANDLE;
Vulkan::StreamBuffer m_vertex_stream_buffer;
@ -132,7 +132,7 @@ private:
std::array<VkPipeline, 2> m_vram_write_pipelines{};
std::array<VkPipeline, 2> m_vram_copy_pipelines{};
VkPipeline m_vram_readback_pipeline = VK_NULL_HANDLE;
VkPipeline m_vram_read_pipeline = VK_NULL_HANDLE;
VkPipeline m_vram_update_depth_pipeline = VK_NULL_HANDLE;
// [depth_24][interlace_mode]

View File

@ -340,7 +340,7 @@ void ShaderGen::DeclareVertexEntryPoint(
for (u32 i = 0; i < num_texcoord_outputs; i++)
ss << " " << qualifier << "float2 v_tex" << i << ";\n";
for (const auto &[qualifiers, name] : additional_outputs)
for (const auto& [qualifiers, name] : additional_outputs)
{
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
ss << " " << qualifier_to_use << " " << name << ";\n";
@ -357,7 +357,7 @@ void ShaderGen::DeclareVertexEntryPoint(
for (u32 i = 0; i < num_texcoord_outputs; i++)
ss << qualifier << "out float2 v_tex" << i << ";\n";
for (const auto &[qualifiers, name] : additional_outputs)
for (const auto& [qualifiers, name] : additional_outputs)
{
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
ss << qualifier_to_use << " out " << name << ";\n";
@ -399,7 +399,7 @@ void ShaderGen::DeclareVertexEntryPoint(
ss << " " << qualifier << "out float2 v_tex" << i << " : TEXCOORD" << i << ",\n";
u32 additional_counter = num_texcoord_outputs;
for (const auto &[qualifiers, name] : additional_outputs)
for (const auto& [qualifiers, name] : additional_outputs)
{
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
ss << " " << qualifier_to_use << " out " << name << " : TEXCOORD" << additional_counter << ",\n";
@ -433,7 +433,7 @@ void ShaderGen::DeclareFragmentEntryPoint(
for (u32 i = 0; i < num_texcoord_inputs; i++)
ss << " " << qualifier << "float2 v_tex" << i << ";\n";
for (const auto &[qualifiers, name] : additional_inputs)
for (const auto& [qualifiers, name] : additional_inputs)
{
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
ss << " " << qualifier_to_use << " " << name << ";\n";
@ -450,7 +450,7 @@ void ShaderGen::DeclareFragmentEntryPoint(
for (u32 i = 0; i < num_texcoord_inputs; i++)
ss << qualifier << "in float2 v_tex" << i << ";\n";
for (const auto &[qualifiers, name] : additional_inputs)
for (const auto& [qualifiers, name] : additional_inputs)
{
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
ss << qualifier_to_use << " in " << name << ";\n";
@ -503,7 +503,7 @@ void ShaderGen::DeclareFragmentEntryPoint(
ss << " " << qualifier << "in float2 v_tex" << i << " : TEXCOORD" << i << ",\n";
u32 additional_counter = num_texcoord_inputs;
for (const auto &[qualifiers, name] : additional_inputs)
for (const auto& [qualifiers, name] : additional_inputs)
{
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
ss << " " << qualifier_to_use << " in " << name << " : TEXCOORD" << additional_counter << ",\n";
@ -536,6 +536,23 @@ void ShaderGen::DeclareFragmentEntryPoint(
}
}
void ShaderGen::DeclareComputeEntryPoint(std::stringstream& ss, u32 local_size_x, u32 local_size_y, u32 local_size_z)
{
if (m_glsl)
{
ss << "#define c_local_id gl_LocalInvocationID\n";
ss << "#define c_global_id gl_GlobalInvocationID\n";
ss << "layout(local_size_x = " << local_size_x << ", local_size_y = " << local_size_y
<< ", local_size_z = " << local_size_z << ") in;\n";
ss << "void main()\n";
}
else
{
ss << "[numthreads(" << local_size_x << ", " << local_size_y << ", " << local_size_z << ")]\n";
ss << "void main(uint3 c_local_id : SV_GroupID, uint3 c_global_id : SV_DispatchThreadID)\n";
}
}
std::string ShaderGen::GenerateScreenQuadVertexShader()
{
std::stringstream ss;

View File

@ -40,6 +40,7 @@ protected:
const std::initializer_list<std::pair<const char*, const char*>>& additional_inputs,
bool declare_fragcoord = false, u32 num_color_outputs = 1, bool depth_output = false,
bool msaa = false, bool ssaa = false, bool declare_sample_id = false);
void DeclareComputeEntryPoint(std::stringstream& ss, u32 local_size_x, u32 local_size_y, u32 local_size_z);
HostDisplay::RenderAPI m_render_api;
bool m_glsl;

View File

@ -1970,7 +1970,7 @@ void DrawSettingsWindow()
"to the hardware renderers.",
&s_settings_copy.gpu_24bit_chroma_smoothing);
MenuHeading("PGXP (Precision Geometry Transform Pipeline");
MenuHeading("PGXP (Precision Geometry Transform Pipeline)");
settings_changed |=
ToggleButton("PGXP Geometry Correction",