GPU/HW: Use SSBO+compute shader for VRAM readbacks
This commit is contained in:
parent
3405041bda
commit
9fc4face66
|
@ -402,6 +402,46 @@ void GraphicsPipelineBuilder::SetRenderPass(VkRenderPass render_pass, u32 subpas
|
|||
m_ci.subpass = subpass;
|
||||
}
|
||||
|
||||
ComputePipelineBuilder::ComputePipelineBuilder()
|
||||
{
|
||||
Clear();
|
||||
}
|
||||
|
||||
void ComputePipelineBuilder::Clear()
|
||||
{
|
||||
m_ci = {};
|
||||
m_ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
|
||||
}
|
||||
|
||||
VkPipeline ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear /* = true */)
|
||||
{
|
||||
VkPipeline pipeline;
|
||||
VkResult res = vkCreateComputePipelines(device, pipeline_cache, 1, &m_ci, nullptr, &pipeline);
|
||||
if (res != VK_SUCCESS)
|
||||
{
|
||||
LOG_VULKAN_ERROR(res, "vkCreateComputePipelines() failed: ");
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
if (clear)
|
||||
Clear();
|
||||
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
void ComputePipelineBuilder::SetShader(VkShaderModule module, const char* entry_point)
|
||||
{
|
||||
m_ci.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
||||
m_ci.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
m_ci.stage.module = module;
|
||||
m_ci.stage.pName = entry_point;
|
||||
}
|
||||
|
||||
void ComputePipelineBuilder::SetPipelineLayout(VkPipelineLayout layout)
|
||||
{
|
||||
m_ci.layout = layout;
|
||||
}
|
||||
|
||||
SamplerBuilder::SamplerBuilder()
|
||||
{
|
||||
Clear();
|
||||
|
@ -542,7 +582,7 @@ void DescriptorSetUpdateBuilder::AddCombinedImageSamplerDescriptorWrite(
|
|||
}
|
||||
|
||||
void DescriptorSetUpdateBuilder::AddBufferDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype,
|
||||
VkBuffer buffer, u32 offset, u32 size)
|
||||
VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size)
|
||||
{
|
||||
Assert(m_num_writes < MAX_WRITES && m_num_infos < MAX_INFOS);
|
||||
|
||||
|
|
|
@ -138,6 +138,23 @@ private:
|
|||
VkPipelineMultisampleStateCreateInfo m_multisample_state;
|
||||
};
|
||||
|
||||
class ComputePipelineBuilder
|
||||
{
|
||||
public:
|
||||
ComputePipelineBuilder();
|
||||
|
||||
void Clear();
|
||||
|
||||
VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache = VK_NULL_HANDLE, bool clear = true);
|
||||
|
||||
void SetShader(VkShaderModule module, const char* entry_point);
|
||||
|
||||
void SetPipelineLayout(VkPipelineLayout layout);
|
||||
|
||||
private:
|
||||
VkComputePipelineCreateInfo m_ci;
|
||||
};
|
||||
|
||||
class SamplerBuilder
|
||||
{
|
||||
public:
|
||||
|
@ -177,8 +194,8 @@ public:
|
|||
void AddSamplerDescriptorWrite(VkDescriptorSet set, u32 binding, VkSampler sampler);
|
||||
void AddCombinedImageSamplerDescriptorWrite(VkDescriptorSet set, u32 binding, VkImageView view, VkSampler sampler,
|
||||
VkImageLayout layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
void AddBufferDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype, VkBuffer buffer, u32 offset,
|
||||
u32 size);
|
||||
void AddBufferDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype, VkBuffer buffer,
|
||||
VkDeviceSize offset, VkDeviceSize size);
|
||||
void AddBufferViewDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype, VkBufferView view);
|
||||
|
||||
private:
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include "gpu_hw_d3d11.h"
|
||||
#include "common/align.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/d3d11/shader_compiler.h"
|
||||
#include "common/log.h"
|
||||
|
@ -363,7 +364,7 @@ bool GPU_HW_D3D11::CreateTextureBuffer()
|
|||
|
||||
const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(D3D11_SRV_DIMENSION_BUFFER, DXGI_FORMAT_R16_UINT, 0,
|
||||
VRAM_UPDATE_TEXTURE_BUFFER_SIZE / sizeof(u16));
|
||||
const HRESULT hr = m_device->CreateShaderResourceView(m_texture_stream_buffer.GetD3DBuffer(), &srv_desc,
|
||||
HRESULT hr = m_device->CreateShaderResourceView(m_texture_stream_buffer.GetD3DBuffer(), &srv_desc,
|
||||
m_texture_stream_buffer_srv_r16ui.ReleaseAndGetAddressOf());
|
||||
if (FAILED(hr))
|
||||
{
|
||||
|
@ -371,6 +372,29 @@ bool GPU_HW_D3D11::CreateTextureBuffer()
|
|||
return false;
|
||||
}
|
||||
|
||||
const u32 buffer_elements = (VRAM_WIDTH / 2) * VRAM_HEIGHT;
|
||||
const CD3D11_BUFFER_DESC read_buffer_desc(buffer_elements * sizeof(u32), D3D11_BIND_UNORDERED_ACCESS,
|
||||
D3D11_USAGE_DEFAULT, 0, 0, sizeof(u32));
|
||||
const CD3D11_BUFFER_DESC staging_buffer_desc(buffer_elements * sizeof(u32), 0, D3D11_USAGE_STAGING,
|
||||
D3D11_CPU_ACCESS_READ, 0, 0);
|
||||
const CD3D11_UNORDERED_ACCESS_VIEW_DESC uav_desc(D3D11_UAV_DIMENSION_BUFFER, DXGI_FORMAT_R32_UINT, 0, buffer_elements,
|
||||
0);
|
||||
hr = m_device->CreateBuffer(&read_buffer_desc, nullptr, m_vram_read_buffer.ReleaseAndGetAddressOf());
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
hr = m_device->CreateBuffer(&staging_buffer_desc, nullptr, m_vram_read_staging_buffer.ReleaseAndGetAddressOf());
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
hr = m_device->CreateUnorderedAccessView(m_vram_read_buffer.Get(), &uav_desc,
|
||||
m_vram_read_buffer_view.ReleaseAndGetAddressOf());
|
||||
}
|
||||
}
|
||||
if (FAILED(hr))
|
||||
{
|
||||
Log_ErrorPrintf("Creation of buffer/UAV failed: 0x%08X", hr);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -612,6 +636,10 @@ bool GPU_HW_D3D11::CompileShaders()
|
|||
if (!m_vram_read_pixel_shader)
|
||||
return false;
|
||||
|
||||
m_vram_read_compute_shader = shader_cache.GetComputeShader(m_device.Get(), shadergen.GenerateVRAMReadComputeShader());
|
||||
if (!m_vram_read_compute_shader)
|
||||
return false;
|
||||
|
||||
UPDATE_PROGRESS();
|
||||
|
||||
m_vram_write_pixel_shader =
|
||||
|
@ -946,6 +974,7 @@ void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
|
|||
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
|
||||
const u32 encoded_height = copy_rect.GetHeight();
|
||||
|
||||
#if 0
|
||||
// Encode the 24-bit texture as 16-bit.
|
||||
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
|
||||
m_context->RSSetState(m_cull_none_rasterizer_state_no_msaa.Get());
|
||||
|
@ -971,6 +1000,50 @@ void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
|
|||
}
|
||||
|
||||
RestoreGraphicsAPIState();
|
||||
#else
|
||||
// Encode the 24-bit texture as 16-bit.
|
||||
const u32 uniforms[5] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight(), encoded_width};
|
||||
const auto res = m_uniform_stream_buffer.Map(m_context.Get(), MAX_UNIFORM_BUFFER_SIZE, sizeof(uniforms));
|
||||
std::memcpy(res.pointer, uniforms, sizeof(uniforms));
|
||||
m_uniform_stream_buffer.Unmap(m_context.Get(), sizeof(uniforms));
|
||||
m_context->CSSetConstantBuffers(0, 1, m_uniform_stream_buffer.GetD3DBufferArray());
|
||||
|
||||
m_context->OMSetRenderTargets(0, nullptr, nullptr);
|
||||
m_context->CSSetUnorderedAccessViews(0, 1, m_vram_read_buffer_view.GetAddressOf(), nullptr);
|
||||
m_context->CSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
|
||||
m_context->CSSetShader(m_vram_read_compute_shader.Get(), nullptr, 0);
|
||||
|
||||
const u32 groups_x = (encoded_width + 7) / 8;
|
||||
const u32 groups_y = (encoded_height + 7) / 8;
|
||||
m_context->Dispatch(groups_x, groups_y, 1);
|
||||
|
||||
ID3D11ShaderResourceView* null_view[1] = {nullptr};
|
||||
m_context->CSSetShaderResources(0, 1, null_view);
|
||||
m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), m_vram_depth_view.Get());
|
||||
|
||||
const CD3D11_BOX copy_box(0, 0, 0, static_cast<LONG>(encoded_width * encoded_height * sizeof(u32)), 1, 1);
|
||||
m_context->CopySubresourceRegion(m_vram_read_staging_buffer.Get(), 0, 0, 0, 0, m_vram_read_buffer.Get(), 0,
|
||||
©_box);
|
||||
|
||||
D3D11_MAPPED_SUBRESOURCE msr;
|
||||
HRESULT hr = m_context->Map(m_vram_read_staging_buffer.Get(), 0, D3D11_MAP_READ, 0, &msr);
|
||||
if (FAILED(hr))
|
||||
{
|
||||
Log_ErrorPrintf("Failed to map VRAM readback buffer");
|
||||
return;
|
||||
}
|
||||
|
||||
u16* dst_ptr = &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left];
|
||||
const u8* src_ptr = static_cast<const u8*>(msr.pData);
|
||||
for (u32 row = 0; row < encoded_height; row++)
|
||||
{
|
||||
std::memcpy(dst_ptr, src_ptr, sizeof(u32) * encoded_width);
|
||||
src_ptr += sizeof(u32) * encoded_width;
|
||||
dst_ptr += VRAM_WIDTH;
|
||||
}
|
||||
|
||||
m_context->Unmap(m_vram_read_staging_buffer.Get(), 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||
|
|
|
@ -84,7 +84,6 @@ private:
|
|||
D3D11::Texture m_vram_depth_texture;
|
||||
ComPtr<ID3D11DepthStencilView> m_vram_depth_view;
|
||||
D3D11::Texture m_vram_read_texture;
|
||||
D3D11::Texture m_vram_encoding_texture;
|
||||
D3D11::Texture m_display_texture;
|
||||
|
||||
D3D11::StreamBuffer m_vertex_stream_buffer;
|
||||
|
@ -93,7 +92,9 @@ private:
|
|||
|
||||
D3D11::StreamBuffer m_texture_stream_buffer;
|
||||
|
||||
D3D11::StagingTexture m_vram_readback_texture;
|
||||
ComPtr<ID3D11Buffer> m_vram_read_buffer;
|
||||
ComPtr<ID3D11Buffer> m_vram_read_staging_buffer;
|
||||
ComPtr<ID3D11UnorderedAccessView> m_vram_read_buffer_view;
|
||||
|
||||
ComPtr<ID3D11ShaderResourceView> m_texture_stream_buffer_srv_r16ui;
|
||||
|
||||
|
@ -123,7 +124,7 @@ private:
|
|||
ComPtr<ID3D11PixelShader> m_copy_pixel_shader;
|
||||
ComPtr<ID3D11PixelShader> m_vram_fill_pixel_shader;
|
||||
ComPtr<ID3D11PixelShader> m_vram_interlaced_fill_pixel_shader;
|
||||
ComPtr<ID3D11PixelShader> m_vram_read_pixel_shader;
|
||||
ComPtr<ID3D11ComputeShader> m_vram_read_compute_shader;
|
||||
ComPtr<ID3D11PixelShader> m_vram_write_pixel_shader;
|
||||
ComPtr<ID3D11PixelShader> m_vram_copy_pixel_shader;
|
||||
ComPtr<ID3D11PixelShader> m_vram_update_depth_pixel_shader;
|
||||
|
@ -139,4 +140,9 @@ private:
|
|||
D3D11::Texture m_downsample_texture;
|
||||
D3D11::Texture m_downsample_weight_texture;
|
||||
std::vector<std::pair<ComPtr<ID3D11ShaderResourceView>, ComPtr<ID3D11RenderTargetView>>> m_downsample_mip_views;
|
||||
|
||||
// fallback vram read
|
||||
D3D11::Texture m_vram_encoding_texture;
|
||||
D3D11::StagingTexture m_vram_readback_texture;
|
||||
ComPtr<ID3D11PixelShader> m_vram_read_pixel_shader;
|
||||
};
|
||||
|
|
|
@ -1192,6 +1192,88 @@ uint SampleVRAM(uint2 coords)
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateVRAMReadComputeShader()
|
||||
{
|
||||
std::stringstream ss;
|
||||
WriteHeader(ss);
|
||||
WriteCommonFunctions(ss);
|
||||
DeclareUniformBuffer(ss, {"uint2 u_base_coords", "uint2 u_size", "uint u_buffer_stride"}, true);
|
||||
|
||||
DeclareTexture(ss, "samp0", 0, UsingMSAA());
|
||||
|
||||
if (m_glsl)
|
||||
{
|
||||
ss << "layout(std430";
|
||||
if (IsVulkan())
|
||||
ss << ", set = 0, binding = 2";
|
||||
else if (m_use_glsl_binding_layout)
|
||||
ss << ", binding = 1";
|
||||
|
||||
ss << ") restrict writeonly buffer SSBO {\n";
|
||||
ss << " uint s_output_buffer[];\n";
|
||||
ss << "};\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
ss << "RWBuffer<uint> s_output_buffer : register(u0);\n";
|
||||
}
|
||||
|
||||
ss << R"(
|
||||
float4 LoadVRAM(int2 coords)
|
||||
{
|
||||
#if MULTISAMPLING
|
||||
float4 value = LOAD_TEXTURE_MS(samp0, coords, 0u);
|
||||
for (uint sample_index = 1u; sample_index < MULTISAMPLES; sample_index++)
|
||||
value += LOAD_TEXTURE_MS(samp0, coords, sample_index);
|
||||
value /= float(MULTISAMPLES);
|
||||
return value;
|
||||
#else
|
||||
return LOAD_TEXTURE(samp0, coords, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
uint SampleVRAM(uint2 coords)
|
||||
{
|
||||
if (RESOLUTION_SCALE == 1u)
|
||||
return RGBA8ToRGBA5551(LoadVRAM(int2(coords)));
|
||||
|
||||
// Box filter for downsampling.
|
||||
float4 value = float4(0.0, 0.0, 0.0, 0.0);
|
||||
uint2 base_coords = coords * uint2(RESOLUTION_SCALE, RESOLUTION_SCALE);
|
||||
for (uint offset_x = 0u; offset_x < RESOLUTION_SCALE; offset_x++)
|
||||
{
|
||||
for (uint offset_y = 0u; offset_y < RESOLUTION_SCALE; offset_y++)
|
||||
value += LoadVRAM(int2(base_coords + uint2(offset_x, offset_y)));
|
||||
}
|
||||
value /= float(RESOLUTION_SCALE * RESOLUTION_SCALE);
|
||||
return RGBA8ToRGBA5551(value);
|
||||
}
|
||||
)";
|
||||
|
||||
DeclareComputeEntryPoint(ss, 8, 8, 1);
|
||||
ss << R"(
|
||||
{
|
||||
uint2 sample_coords = uint2(uint(c_global_id.x) * 2u, uint(c_global_id.y));
|
||||
|
||||
#if API_OPENGL || API_OPENGL_ES
|
||||
// Lower-left origin flip for OpenGL.
|
||||
// We want to write the image out upside-down so we can read it top-to-bottom.
|
||||
sample_coords.y = u_size.y - sample_coords.y - 1u;
|
||||
#endif
|
||||
|
||||
sample_coords += u_base_coords;
|
||||
|
||||
// We're encoding as 32-bit, so the output width is halved and we pack two 16-bit pixels in one 32-bit pixel.
|
||||
uint left = SampleVRAM(sample_coords);
|
||||
uint right = SampleVRAM(uint2(sample_coords.x + 1u, sample_coords.y));
|
||||
|
||||
uint buffer_offset = c_global_id.y * u_buffer_stride + c_global_id.x;
|
||||
s_output_buffer[buffer_offset] = left | (right << 16);
|
||||
})";
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo)
|
||||
{
|
||||
std::stringstream ss;
|
||||
|
|
|
@ -17,6 +17,7 @@ public:
|
|||
std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode,
|
||||
bool smooth_chroma);
|
||||
std::string GenerateVRAMReadFragmentShader();
|
||||
std::string GenerateVRAMReadComputeShader();
|
||||
std::string GenerateVRAMWriteFragmentShader(bool use_ssbo);
|
||||
std::string GenerateVRAMCopyFragmentShader();
|
||||
std::string GenerateVRAMUpdateDepthFragmentShader();
|
||||
|
|
|
@ -360,10 +360,12 @@ void GPU_HW_Vulkan::DestroyResources()
|
|||
m_texture_stream_buffer.Destroy(false);
|
||||
|
||||
Vulkan::Util::SafeDestroyPipelineLayout(m_vram_write_pipeline_layout);
|
||||
Vulkan::Util::SafeDestroyPipelineLayout(m_vram_read_pipeline_layout);
|
||||
Vulkan::Util::SafeDestroyPipelineLayout(m_single_sampler_pipeline_layout);
|
||||
Vulkan::Util::SafeDestroyPipelineLayout(m_no_samplers_pipeline_layout);
|
||||
Vulkan::Util::SafeDestroyPipelineLayout(m_batch_pipeline_layout);
|
||||
Vulkan::Util::SafeDestroyDescriptorSetLayout(m_vram_write_descriptor_set_layout);
|
||||
Vulkan::Util::SafeDestroyDescriptorSetLayout(m_vram_read_descriptor_set_layout);
|
||||
Vulkan::Util::SafeDestroyDescriptorSetLayout(m_single_sampler_descriptor_set_layout);
|
||||
Vulkan::Util::SafeDestroyDescriptorSetLayout(m_batch_descriptor_set_layout);
|
||||
Vulkan::Util::SafeDestroySampler(m_point_sampler);
|
||||
|
@ -431,6 +433,12 @@ bool GPU_HW_Vulkan::CreatePipelineLayouts()
|
|||
if (m_vram_write_descriptor_set_layout == VK_NULL_HANDLE)
|
||||
return false;
|
||||
|
||||
dslbuilder.AddBinding(1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
dslbuilder.AddBinding(2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
m_vram_read_descriptor_set_layout = dslbuilder.Create(device);
|
||||
if (m_vram_read_descriptor_set_layout == VK_NULL_HANDLE)
|
||||
return false;
|
||||
|
||||
Vulkan::PipelineLayoutBuilder plbuilder;
|
||||
plbuilder.AddDescriptorSet(m_batch_descriptor_set_layout);
|
||||
m_batch_pipeline_layout = plbuilder.Create(device);
|
||||
|
@ -448,6 +456,12 @@ bool GPU_HW_Vulkan::CreatePipelineLayouts()
|
|||
if (m_no_samplers_pipeline_layout == VK_NULL_HANDLE)
|
||||
return false;
|
||||
|
||||
plbuilder.AddDescriptorSet(m_vram_read_descriptor_set_layout);
|
||||
plbuilder.AddPushConstants(VK_SHADER_STAGE_COMPUTE_BIT, 0, MAX_PUSH_CONSTANTS_SIZE);
|
||||
m_vram_read_pipeline_layout = plbuilder.Create(device);
|
||||
if (m_vram_read_pipeline_layout == VK_NULL_HANDLE)
|
||||
return false;
|
||||
|
||||
plbuilder.AddDescriptorSet(m_vram_write_descriptor_set_layout);
|
||||
plbuilder.AddPushConstants(VK_SHADER_STAGE_FRAGMENT_BIT, 0, MAX_PUSH_CONSTANTS_SIZE);
|
||||
m_vram_write_pipeline_layout = plbuilder.Create(device);
|
||||
|
@ -512,6 +526,7 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
|
|||
const VkFormat texture_format = VK_FORMAT_R8G8B8A8_UNORM;
|
||||
const VkFormat depth_format = VK_FORMAT_D16_UNORM;
|
||||
const VkSampleCountFlagBits samples = static_cast<VkSampleCountFlagBits>(m_multisamples);
|
||||
const u32 read_staging_buffer_size = (VRAM_WIDTH / 2) * VRAM_HEIGHT * sizeof(u32);
|
||||
|
||||
if (!m_vram_texture.Create(texture_width, texture_height, 1, 1, texture_format, samples, VK_IMAGE_VIEW_TYPE_2D,
|
||||
VK_IMAGE_TILING_OPTIMAL,
|
||||
|
@ -529,11 +544,9 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
|
|||
VK_IMAGE_TILING_OPTIMAL,
|
||||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT) ||
|
||||
!m_vram_readback_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT,
|
||||
VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL,
|
||||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT) ||
|
||||
!m_vram_readback_staging_texture.Create(Vulkan::StagingBuffer::Type::Readback, texture_format, VRAM_WIDTH / 2,
|
||||
VRAM_HEIGHT))
|
||||
!m_vram_read_staging_buffer.Create(Vulkan::StagingBuffer::Type::Readback, read_staging_buffer_size,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) ||
|
||||
!m_vram_read_staging_buffer.Map())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -544,12 +557,9 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
|
|||
g_vulkan_context->GetRenderPass(VK_FORMAT_UNDEFINED, depth_format, samples, VK_ATTACHMENT_LOAD_OP_DONT_CARE);
|
||||
m_display_render_pass = g_vulkan_context->GetRenderPass(m_display_texture.GetFormat(), VK_FORMAT_UNDEFINED,
|
||||
m_display_texture.GetSamples(), VK_ATTACHMENT_LOAD_OP_LOAD);
|
||||
m_vram_readback_render_pass =
|
||||
g_vulkan_context->GetRenderPass(m_vram_readback_texture.GetFormat(), VK_FORMAT_UNDEFINED,
|
||||
m_vram_readback_texture.GetSamples(), VK_ATTACHMENT_LOAD_OP_DONT_CARE);
|
||||
|
||||
if (m_vram_render_pass == VK_NULL_HANDLE || m_vram_update_depth_render_pass == VK_NULL_HANDLE ||
|
||||
m_display_render_pass == VK_NULL_HANDLE || m_vram_readback_render_pass == VK_NULL_HANDLE)
|
||||
m_display_render_pass == VK_NULL_HANDLE)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -565,13 +575,9 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
|
|||
return false;
|
||||
|
||||
m_vram_update_depth_framebuffer = m_vram_depth_texture.CreateFramebuffer(m_vram_update_depth_render_pass);
|
||||
m_vram_readback_framebuffer = m_vram_readback_texture.CreateFramebuffer(m_vram_readback_render_pass);
|
||||
m_display_framebuffer = m_display_texture.CreateFramebuffer(m_display_render_pass);
|
||||
if (m_vram_update_depth_framebuffer == VK_NULL_HANDLE || m_vram_readback_framebuffer == VK_NULL_HANDLE ||
|
||||
m_display_framebuffer == VK_NULL_HANDLE)
|
||||
{
|
||||
if (m_vram_update_depth_framebuffer == VK_NULL_HANDLE || m_display_framebuffer == VK_NULL_HANDLE)
|
||||
return false;
|
||||
}
|
||||
|
||||
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
|
||||
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
||||
|
@ -582,10 +588,13 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
|
|||
|
||||
m_batch_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_batch_descriptor_set_layout);
|
||||
m_vram_copy_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout);
|
||||
m_vram_read_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout);
|
||||
m_vram_read_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_vram_read_descriptor_set_layout);
|
||||
m_vram_update_depth_descriptor_set =
|
||||
g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout);
|
||||
m_display_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout);
|
||||
if (m_batch_descriptor_set == VK_NULL_HANDLE || m_vram_copy_descriptor_set == VK_NULL_HANDLE ||
|
||||
m_vram_read_descriptor_set == VK_NULL_HANDLE || m_display_descriptor_set == VK_NULL_HANDLE)
|
||||
m_vram_read_descriptor_set == VK_NULL_HANDLE || m_vram_update_depth_descriptor_set == VK_NULL_HANDLE ||
|
||||
m_display_descriptor_set == VK_NULL_HANDLE)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -598,6 +607,10 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
|
|||
m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_vram_read_descriptor_set, 1, m_vram_texture.GetView(),
|
||||
m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
dsubuilder.AddBufferDescriptorWrite(m_vram_read_descriptor_set, 2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
m_vram_read_staging_buffer.GetBuffer(), 0, m_vram_read_staging_buffer.GetSize());
|
||||
dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_vram_update_depth_descriptor_set, 1, m_vram_texture.GetView(),
|
||||
m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_display_descriptor_set, 1, m_display_texture.GetView(),
|
||||
m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
dsubuilder.Update(g_vulkan_context->GetDevice());
|
||||
|
@ -743,21 +756,20 @@ void GPU_HW_Vulkan::DestroyFramebuffer()
|
|||
m_downsample_weight_texture.Destroy(false);
|
||||
|
||||
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_batch_descriptor_set);
|
||||
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_vram_update_depth_descriptor_set);
|
||||
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_vram_copy_descriptor_set);
|
||||
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_vram_read_descriptor_set);
|
||||
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_display_descriptor_set);
|
||||
|
||||
Vulkan::Util::SafeDestroyFramebuffer(m_vram_framebuffer);
|
||||
Vulkan::Util::SafeDestroyFramebuffer(m_vram_update_depth_framebuffer);
|
||||
Vulkan::Util::SafeDestroyFramebuffer(m_vram_readback_framebuffer);
|
||||
Vulkan::Util::SafeDestroyFramebuffer(m_display_framebuffer);
|
||||
|
||||
m_vram_read_texture.Destroy(false);
|
||||
m_vram_depth_texture.Destroy(false);
|
||||
m_vram_texture.Destroy(false);
|
||||
m_vram_readback_texture.Destroy(false);
|
||||
m_display_texture.Destroy(false);
|
||||
m_vram_readback_staging_texture.Destroy(false);
|
||||
m_vram_read_staging_buffer.Destroy(false);
|
||||
}
|
||||
|
||||
bool GPU_HW_Vulkan::CreateVertexBuffer()
|
||||
|
@ -883,6 +895,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
|
|||
}
|
||||
|
||||
Vulkan::GraphicsPipelineBuilder gpbuilder;
|
||||
Vulkan::ComputePipelineBuilder csbuilder;
|
||||
|
||||
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
|
||||
for (u8 depth_test = 0; depth_test < 3; depth_test++)
|
||||
|
@ -1104,22 +1117,16 @@ bool GPU_HW_Vulkan::CompilePipelines()
|
|||
|
||||
// VRAM read
|
||||
{
|
||||
VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMReadFragmentShader());
|
||||
if (fs == VK_NULL_HANDLE)
|
||||
VkShaderModule cs = g_vulkan_shader_cache->GetComputeShader(shadergen.GenerateVRAMReadComputeShader());
|
||||
if (cs == VK_NULL_HANDLE)
|
||||
return false;
|
||||
|
||||
gpbuilder.SetRenderPass(m_vram_readback_render_pass, 0);
|
||||
gpbuilder.SetPipelineLayout(m_single_sampler_pipeline_layout);
|
||||
gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader);
|
||||
gpbuilder.SetFragmentShader(fs);
|
||||
gpbuilder.SetNoCullRasterizationState();
|
||||
gpbuilder.SetNoDepthTestState();
|
||||
gpbuilder.SetNoBlendingState();
|
||||
gpbuilder.SetDynamicViewportAndScissorState();
|
||||
csbuilder.SetPipelineLayout(m_vram_read_pipeline_layout);
|
||||
csbuilder.SetShader(cs, "main");
|
||||
|
||||
m_vram_readback_pipeline = gpbuilder.Create(device, pipeline_cache, false);
|
||||
vkDestroyShaderModule(device, fs, nullptr);
|
||||
if (m_vram_readback_pipeline == VK_NULL_HANDLE)
|
||||
m_vram_read_pipeline = csbuilder.Create(device, pipeline_cache, false);
|
||||
vkDestroyShaderModule(device, cs, nullptr);
|
||||
if (m_vram_read_pipeline == VK_NULL_HANDLE)
|
||||
return false;
|
||||
|
||||
UPDATE_PROGRESS();
|
||||
|
@ -1257,7 +1264,7 @@ void GPU_HW_Vulkan::DestroyPipelines()
|
|||
for (VkPipeline& p : m_vram_copy_pipelines)
|
||||
Vulkan::Util::SafeDestroyPipeline(p);
|
||||
|
||||
Vulkan::Util::SafeDestroyPipeline(m_vram_readback_pipeline);
|
||||
Vulkan::Util::SafeDestroyPipeline(m_vram_read_pipeline);
|
||||
Vulkan::Util::SafeDestroyPipeline(m_vram_update_depth_pipeline);
|
||||
|
||||
Vulkan::Util::SafeDestroyPipeline(m_downsample_first_pass_pipeline);
|
||||
|
@ -1427,41 +1434,37 @@ void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
|
|||
const Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height);
|
||||
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
|
||||
const u32 encoded_height = copy_rect.GetHeight();
|
||||
const u32 encoded_size = encoded_width * encoded_height * sizeof(u32);
|
||||
|
||||
EndRenderPass();
|
||||
|
||||
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
|
||||
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
||||
|
||||
// Work around Mali driver bug: set full framebuffer size for render area. The GPU crashes with a page fault if we use
|
||||
// the actual size we're rendering to...
|
||||
BeginRenderPass(m_vram_readback_render_pass, m_vram_readback_framebuffer, 0, 0, m_vram_readback_texture.GetWidth(),
|
||||
m_vram_readback_texture.GetHeight());
|
||||
|
||||
// Encode the 24-bit texture as 16-bit.
|
||||
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
|
||||
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_readback_pipeline);
|
||||
vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
|
||||
uniforms);
|
||||
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1,
|
||||
const u32 uniforms[5] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight(), encoded_width};
|
||||
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_vram_read_pipeline);
|
||||
vkCmdPushConstants(cmdbuf, m_vram_read_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uniforms), uniforms);
|
||||
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_vram_read_pipeline_layout, 0, 1,
|
||||
&m_vram_read_descriptor_set, 0, nullptr);
|
||||
Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, encoded_width, encoded_height);
|
||||
vkCmdDraw(cmdbuf, 3, 1, 0, 0);
|
||||
|
||||
EndRenderPass();
|
||||
const u32 groups_x = (encoded_width + 7) / 8;
|
||||
const u32 groups_y = (encoded_height + 7) / 8;
|
||||
vkCmdDispatch(cmdbuf, groups_x, groups_y, 1);
|
||||
|
||||
m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
||||
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
||||
m_vram_read_staging_buffer.FlushGPUCache(cmdbuf, VK_ACCESS_SHADER_WRITE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
|
||||
encoded_size);
|
||||
g_vulkan_context->ExecuteCommandBuffer(true);
|
||||
m_vram_read_staging_buffer.InvalidateCPUCache(0, encoded_size);
|
||||
|
||||
// Stage the readback.
|
||||
m_vram_readback_staging_texture.CopyFromTexture(m_vram_readback_texture, 0, 0, 0, 0, 0, 0, encoded_width,
|
||||
encoded_height);
|
||||
|
||||
// And copy it into our shadow buffer (will execute command buffer and stall).
|
||||
m_vram_readback_staging_texture.ReadTexels(0, 0, encoded_width, encoded_height,
|
||||
&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left],
|
||||
VRAM_WIDTH * sizeof(u16));
|
||||
u16* dst_ptr = &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left];
|
||||
const char* src_ptr = static_cast<const char*>(m_vram_read_staging_buffer.GetMapPointer());
|
||||
for (u32 row = 0; row < encoded_height; row++)
|
||||
{
|
||||
std::memcpy(dst_ptr, src_ptr, sizeof(u32) * encoded_width);
|
||||
src_ptr += sizeof(u32) * encoded_width;
|
||||
dst_ptr += VRAM_WIDTH;
|
||||
}
|
||||
|
||||
RestoreGraphicsAPIState();
|
||||
}
|
||||
|
@ -1667,7 +1670,7 @@ void GPU_HW_Vulkan::UpdateDepthBufferFromMaskBit()
|
|||
|
||||
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_update_depth_pipeline);
|
||||
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1,
|
||||
&m_vram_read_descriptor_set, 0, nullptr);
|
||||
&m_vram_update_depth_descriptor_set, 0, nullptr);
|
||||
Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
|
||||
vkCmdDraw(cmdbuf, 3, 1, 0, 0);
|
||||
|
||||
|
|
|
@ -81,28 +81,27 @@ private:
|
|||
VkRenderPass m_vram_render_pass = VK_NULL_HANDLE;
|
||||
VkRenderPass m_vram_update_depth_render_pass = VK_NULL_HANDLE;
|
||||
VkRenderPass m_display_render_pass = VK_NULL_HANDLE;
|
||||
VkRenderPass m_vram_readback_render_pass = VK_NULL_HANDLE;
|
||||
|
||||
VkDescriptorSetLayout m_batch_descriptor_set_layout = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout m_single_sampler_descriptor_set_layout = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout m_vram_read_descriptor_set_layout = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout m_vram_write_descriptor_set_layout = VK_NULL_HANDLE;
|
||||
|
||||
VkPipelineLayout m_batch_pipeline_layout = VK_NULL_HANDLE;
|
||||
VkPipelineLayout m_no_samplers_pipeline_layout = VK_NULL_HANDLE;
|
||||
VkPipelineLayout m_single_sampler_pipeline_layout = VK_NULL_HANDLE;
|
||||
VkPipelineLayout m_vram_read_pipeline_layout = VK_NULL_HANDLE;
|
||||
VkPipelineLayout m_vram_write_pipeline_layout = VK_NULL_HANDLE;
|
||||
|
||||
Vulkan::Texture m_vram_texture;
|
||||
Vulkan::Texture m_vram_depth_texture;
|
||||
Vulkan::Texture m_vram_read_texture;
|
||||
Vulkan::Texture m_vram_readback_texture;
|
||||
Vulkan::StagingTexture m_vram_readback_staging_texture;
|
||||
Vulkan::StagingBuffer m_vram_read_staging_buffer;
|
||||
Vulkan::Texture m_display_texture;
|
||||
bool m_use_ssbos_for_vram_writes = false;
|
||||
|
||||
VkFramebuffer m_vram_framebuffer = VK_NULL_HANDLE;
|
||||
VkFramebuffer m_vram_update_depth_framebuffer = VK_NULL_HANDLE;
|
||||
VkFramebuffer m_vram_readback_framebuffer = VK_NULL_HANDLE;
|
||||
VkFramebuffer m_display_framebuffer = VK_NULL_HANDLE;
|
||||
|
||||
VkSampler m_point_sampler = VK_NULL_HANDLE;
|
||||
|
@ -113,6 +112,7 @@ private:
|
|||
VkDescriptorSet m_vram_copy_descriptor_set = VK_NULL_HANDLE;
|
||||
VkDescriptorSet m_vram_read_descriptor_set = VK_NULL_HANDLE;
|
||||
VkDescriptorSet m_vram_write_descriptor_set = VK_NULL_HANDLE;
|
||||
VkDescriptorSet m_vram_update_depth_descriptor_set = VK_NULL_HANDLE;
|
||||
VkDescriptorSet m_display_descriptor_set = VK_NULL_HANDLE;
|
||||
|
||||
Vulkan::StreamBuffer m_vertex_stream_buffer;
|
||||
|
@ -132,7 +132,7 @@ private:
|
|||
std::array<VkPipeline, 2> m_vram_write_pipelines{};
|
||||
std::array<VkPipeline, 2> m_vram_copy_pipelines{};
|
||||
|
||||
VkPipeline m_vram_readback_pipeline = VK_NULL_HANDLE;
|
||||
VkPipeline m_vram_read_pipeline = VK_NULL_HANDLE;
|
||||
VkPipeline m_vram_update_depth_pipeline = VK_NULL_HANDLE;
|
||||
|
||||
// [depth_24][interlace_mode]
|
||||
|
|
|
@ -536,6 +536,23 @@ void ShaderGen::DeclareFragmentEntryPoint(
|
|||
}
|
||||
}
|
||||
|
||||
void ShaderGen::DeclareComputeEntryPoint(std::stringstream& ss, u32 local_size_x, u32 local_size_y, u32 local_size_z)
|
||||
{
|
||||
if (m_glsl)
|
||||
{
|
||||
ss << "#define c_local_id gl_LocalInvocationID\n";
|
||||
ss << "#define c_global_id gl_GlobalInvocationID\n";
|
||||
ss << "layout(local_size_x = " << local_size_x << ", local_size_y = " << local_size_y
|
||||
<< ", local_size_z = " << local_size_z << ") in;\n";
|
||||
ss << "void main()\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
ss << "[numthreads(" << local_size_x << ", " << local_size_y << ", " << local_size_z << ")]\n";
|
||||
ss << "void main(uint3 c_local_id : SV_GroupID, uint3 c_global_id : SV_DispatchThreadID)\n";
|
||||
}
|
||||
}
|
||||
|
||||
std::string ShaderGen::GenerateScreenQuadVertexShader()
|
||||
{
|
||||
std::stringstream ss;
|
||||
|
|
|
@ -40,6 +40,7 @@ protected:
|
|||
const std::initializer_list<std::pair<const char*, const char*>>& additional_inputs,
|
||||
bool declare_fragcoord = false, u32 num_color_outputs = 1, bool depth_output = false,
|
||||
bool msaa = false, bool ssaa = false, bool declare_sample_id = false);
|
||||
void DeclareComputeEntryPoint(std::stringstream& ss, u32 local_size_x, u32 local_size_y, u32 local_size_z);
|
||||
|
||||
HostDisplay::RenderAPI m_render_api;
|
||||
bool m_glsl;
|
||||
|
|
|
@ -1970,7 +1970,7 @@ void DrawSettingsWindow()
|
|||
"to the hardware renderers.",
|
||||
&s_settings_copy.gpu_24bit_chroma_smoothing);
|
||||
|
||||
MenuHeading("PGXP (Precision Geometry Transform Pipeline");
|
||||
MenuHeading("PGXP (Precision Geometry Transform Pipeline)");
|
||||
|
||||
settings_changed |=
|
||||
ToggleButton("PGXP Geometry Correction",
|
||||
|
|
Loading…
Reference in New Issue