GPU/HW: Use SSBO+compute shader for VRAM readbacks
This commit is contained in:
parent
3405041bda
commit
9fc4face66
|
@ -402,6 +402,46 @@ void GraphicsPipelineBuilder::SetRenderPass(VkRenderPass render_pass, u32 subpas
|
||||||
m_ci.subpass = subpass;
|
m_ci.subpass = subpass;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ComputePipelineBuilder::ComputePipelineBuilder()
|
||||||
|
{
|
||||||
|
Clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ComputePipelineBuilder::Clear()
|
||||||
|
{
|
||||||
|
m_ci = {};
|
||||||
|
m_ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkPipeline ComputePipelineBuilder::Create(VkDevice device, VkPipelineCache pipeline_cache, bool clear /* = true */)
|
||||||
|
{
|
||||||
|
VkPipeline pipeline;
|
||||||
|
VkResult res = vkCreateComputePipelines(device, pipeline_cache, 1, &m_ci, nullptr, &pipeline);
|
||||||
|
if (res != VK_SUCCESS)
|
||||||
|
{
|
||||||
|
LOG_VULKAN_ERROR(res, "vkCreateComputePipelines() failed: ");
|
||||||
|
return VK_NULL_HANDLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (clear)
|
||||||
|
Clear();
|
||||||
|
|
||||||
|
return pipeline;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ComputePipelineBuilder::SetShader(VkShaderModule module, const char* entry_point)
|
||||||
|
{
|
||||||
|
m_ci.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
||||||
|
m_ci.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||||
|
m_ci.stage.module = module;
|
||||||
|
m_ci.stage.pName = entry_point;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ComputePipelineBuilder::SetPipelineLayout(VkPipelineLayout layout)
|
||||||
|
{
|
||||||
|
m_ci.layout = layout;
|
||||||
|
}
|
||||||
|
|
||||||
SamplerBuilder::SamplerBuilder()
|
SamplerBuilder::SamplerBuilder()
|
||||||
{
|
{
|
||||||
Clear();
|
Clear();
|
||||||
|
@ -542,7 +582,7 @@ void DescriptorSetUpdateBuilder::AddCombinedImageSamplerDescriptorWrite(
|
||||||
}
|
}
|
||||||
|
|
||||||
void DescriptorSetUpdateBuilder::AddBufferDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype,
|
void DescriptorSetUpdateBuilder::AddBufferDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype,
|
||||||
VkBuffer buffer, u32 offset, u32 size)
|
VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size)
|
||||||
{
|
{
|
||||||
Assert(m_num_writes < MAX_WRITES && m_num_infos < MAX_INFOS);
|
Assert(m_num_writes < MAX_WRITES && m_num_infos < MAX_INFOS);
|
||||||
|
|
||||||
|
|
|
@ -138,6 +138,23 @@ private:
|
||||||
VkPipelineMultisampleStateCreateInfo m_multisample_state;
|
VkPipelineMultisampleStateCreateInfo m_multisample_state;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class ComputePipelineBuilder
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
ComputePipelineBuilder();
|
||||||
|
|
||||||
|
void Clear();
|
||||||
|
|
||||||
|
VkPipeline Create(VkDevice device, VkPipelineCache pipeline_cache = VK_NULL_HANDLE, bool clear = true);
|
||||||
|
|
||||||
|
void SetShader(VkShaderModule module, const char* entry_point);
|
||||||
|
|
||||||
|
void SetPipelineLayout(VkPipelineLayout layout);
|
||||||
|
|
||||||
|
private:
|
||||||
|
VkComputePipelineCreateInfo m_ci;
|
||||||
|
};
|
||||||
|
|
||||||
class SamplerBuilder
|
class SamplerBuilder
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -177,8 +194,8 @@ public:
|
||||||
void AddSamplerDescriptorWrite(VkDescriptorSet set, u32 binding, VkSampler sampler);
|
void AddSamplerDescriptorWrite(VkDescriptorSet set, u32 binding, VkSampler sampler);
|
||||||
void AddCombinedImageSamplerDescriptorWrite(VkDescriptorSet set, u32 binding, VkImageView view, VkSampler sampler,
|
void AddCombinedImageSamplerDescriptorWrite(VkDescriptorSet set, u32 binding, VkImageView view, VkSampler sampler,
|
||||||
VkImageLayout layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
VkImageLayout layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||||
void AddBufferDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype, VkBuffer buffer, u32 offset,
|
void AddBufferDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype, VkBuffer buffer,
|
||||||
u32 size);
|
VkDeviceSize offset, VkDeviceSize size);
|
||||||
void AddBufferViewDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype, VkBufferView view);
|
void AddBufferViewDescriptorWrite(VkDescriptorSet set, u32 binding, VkDescriptorType dtype, VkBufferView view);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#include "gpu_hw_d3d11.h"
|
#include "gpu_hw_d3d11.h"
|
||||||
|
#include "common/align.h"
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/d3d11/shader_compiler.h"
|
#include "common/d3d11/shader_compiler.h"
|
||||||
#include "common/log.h"
|
#include "common/log.h"
|
||||||
|
@ -363,7 +364,7 @@ bool GPU_HW_D3D11::CreateTextureBuffer()
|
||||||
|
|
||||||
const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(D3D11_SRV_DIMENSION_BUFFER, DXGI_FORMAT_R16_UINT, 0,
|
const CD3D11_SHADER_RESOURCE_VIEW_DESC srv_desc(D3D11_SRV_DIMENSION_BUFFER, DXGI_FORMAT_R16_UINT, 0,
|
||||||
VRAM_UPDATE_TEXTURE_BUFFER_SIZE / sizeof(u16));
|
VRAM_UPDATE_TEXTURE_BUFFER_SIZE / sizeof(u16));
|
||||||
const HRESULT hr = m_device->CreateShaderResourceView(m_texture_stream_buffer.GetD3DBuffer(), &srv_desc,
|
HRESULT hr = m_device->CreateShaderResourceView(m_texture_stream_buffer.GetD3DBuffer(), &srv_desc,
|
||||||
m_texture_stream_buffer_srv_r16ui.ReleaseAndGetAddressOf());
|
m_texture_stream_buffer_srv_r16ui.ReleaseAndGetAddressOf());
|
||||||
if (FAILED(hr))
|
if (FAILED(hr))
|
||||||
{
|
{
|
||||||
|
@ -371,6 +372,29 @@ bool GPU_HW_D3D11::CreateTextureBuffer()
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const u32 buffer_elements = (VRAM_WIDTH / 2) * VRAM_HEIGHT;
|
||||||
|
const CD3D11_BUFFER_DESC read_buffer_desc(buffer_elements * sizeof(u32), D3D11_BIND_UNORDERED_ACCESS,
|
||||||
|
D3D11_USAGE_DEFAULT, 0, 0, sizeof(u32));
|
||||||
|
const CD3D11_BUFFER_DESC staging_buffer_desc(buffer_elements * sizeof(u32), 0, D3D11_USAGE_STAGING,
|
||||||
|
D3D11_CPU_ACCESS_READ, 0, 0);
|
||||||
|
const CD3D11_UNORDERED_ACCESS_VIEW_DESC uav_desc(D3D11_UAV_DIMENSION_BUFFER, DXGI_FORMAT_R32_UINT, 0, buffer_elements,
|
||||||
|
0);
|
||||||
|
hr = m_device->CreateBuffer(&read_buffer_desc, nullptr, m_vram_read_buffer.ReleaseAndGetAddressOf());
|
||||||
|
if (SUCCEEDED(hr))
|
||||||
|
{
|
||||||
|
hr = m_device->CreateBuffer(&staging_buffer_desc, nullptr, m_vram_read_staging_buffer.ReleaseAndGetAddressOf());
|
||||||
|
if (SUCCEEDED(hr))
|
||||||
|
{
|
||||||
|
hr = m_device->CreateUnorderedAccessView(m_vram_read_buffer.Get(), &uav_desc,
|
||||||
|
m_vram_read_buffer_view.ReleaseAndGetAddressOf());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (FAILED(hr))
|
||||||
|
{
|
||||||
|
Log_ErrorPrintf("Creation of buffer/UAV failed: 0x%08X", hr);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -612,6 +636,10 @@ bool GPU_HW_D3D11::CompileShaders()
|
||||||
if (!m_vram_read_pixel_shader)
|
if (!m_vram_read_pixel_shader)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
m_vram_read_compute_shader = shader_cache.GetComputeShader(m_device.Get(), shadergen.GenerateVRAMReadComputeShader());
|
||||||
|
if (!m_vram_read_compute_shader)
|
||||||
|
return false;
|
||||||
|
|
||||||
UPDATE_PROGRESS();
|
UPDATE_PROGRESS();
|
||||||
|
|
||||||
m_vram_write_pixel_shader =
|
m_vram_write_pixel_shader =
|
||||||
|
@ -946,6 +974,7 @@ void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
|
||||||
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
|
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
|
||||||
const u32 encoded_height = copy_rect.GetHeight();
|
const u32 encoded_height = copy_rect.GetHeight();
|
||||||
|
|
||||||
|
#if 0
|
||||||
// Encode the 24-bit texture as 16-bit.
|
// Encode the 24-bit texture as 16-bit.
|
||||||
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
|
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
|
||||||
m_context->RSSetState(m_cull_none_rasterizer_state_no_msaa.Get());
|
m_context->RSSetState(m_cull_none_rasterizer_state_no_msaa.Get());
|
||||||
|
@ -971,6 +1000,50 @@ void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
|
||||||
}
|
}
|
||||||
|
|
||||||
RestoreGraphicsAPIState();
|
RestoreGraphicsAPIState();
|
||||||
|
#else
|
||||||
|
// Encode the 24-bit texture as 16-bit.
|
||||||
|
const u32 uniforms[5] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight(), encoded_width};
|
||||||
|
const auto res = m_uniform_stream_buffer.Map(m_context.Get(), MAX_UNIFORM_BUFFER_SIZE, sizeof(uniforms));
|
||||||
|
std::memcpy(res.pointer, uniforms, sizeof(uniforms));
|
||||||
|
m_uniform_stream_buffer.Unmap(m_context.Get(), sizeof(uniforms));
|
||||||
|
m_context->CSSetConstantBuffers(0, 1, m_uniform_stream_buffer.GetD3DBufferArray());
|
||||||
|
|
||||||
|
m_context->OMSetRenderTargets(0, nullptr, nullptr);
|
||||||
|
m_context->CSSetUnorderedAccessViews(0, 1, m_vram_read_buffer_view.GetAddressOf(), nullptr);
|
||||||
|
m_context->CSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
|
||||||
|
m_context->CSSetShader(m_vram_read_compute_shader.Get(), nullptr, 0);
|
||||||
|
|
||||||
|
const u32 groups_x = (encoded_width + 7) / 8;
|
||||||
|
const u32 groups_y = (encoded_height + 7) / 8;
|
||||||
|
m_context->Dispatch(groups_x, groups_y, 1);
|
||||||
|
|
||||||
|
ID3D11ShaderResourceView* null_view[1] = {nullptr};
|
||||||
|
m_context->CSSetShaderResources(0, 1, null_view);
|
||||||
|
m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), m_vram_depth_view.Get());
|
||||||
|
|
||||||
|
const CD3D11_BOX copy_box(0, 0, 0, static_cast<LONG>(encoded_width * encoded_height * sizeof(u32)), 1, 1);
|
||||||
|
m_context->CopySubresourceRegion(m_vram_read_staging_buffer.Get(), 0, 0, 0, 0, m_vram_read_buffer.Get(), 0,
|
||||||
|
©_box);
|
||||||
|
|
||||||
|
D3D11_MAPPED_SUBRESOURCE msr;
|
||||||
|
HRESULT hr = m_context->Map(m_vram_read_staging_buffer.Get(), 0, D3D11_MAP_READ, 0, &msr);
|
||||||
|
if (FAILED(hr))
|
||||||
|
{
|
||||||
|
Log_ErrorPrintf("Failed to map VRAM readback buffer");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
u16* dst_ptr = &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left];
|
||||||
|
const u8* src_ptr = static_cast<const u8*>(msr.pData);
|
||||||
|
for (u32 row = 0; row < encoded_height; row++)
|
||||||
|
{
|
||||||
|
std::memcpy(dst_ptr, src_ptr, sizeof(u32) * encoded_width);
|
||||||
|
src_ptr += sizeof(u32) * encoded_width;
|
||||||
|
dst_ptr += VRAM_WIDTH;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_context->Unmap(m_vram_read_staging_buffer.Get(), 0);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
|
||||||
|
|
|
@ -84,7 +84,6 @@ private:
|
||||||
D3D11::Texture m_vram_depth_texture;
|
D3D11::Texture m_vram_depth_texture;
|
||||||
ComPtr<ID3D11DepthStencilView> m_vram_depth_view;
|
ComPtr<ID3D11DepthStencilView> m_vram_depth_view;
|
||||||
D3D11::Texture m_vram_read_texture;
|
D3D11::Texture m_vram_read_texture;
|
||||||
D3D11::Texture m_vram_encoding_texture;
|
|
||||||
D3D11::Texture m_display_texture;
|
D3D11::Texture m_display_texture;
|
||||||
|
|
||||||
D3D11::StreamBuffer m_vertex_stream_buffer;
|
D3D11::StreamBuffer m_vertex_stream_buffer;
|
||||||
|
@ -93,7 +92,9 @@ private:
|
||||||
|
|
||||||
D3D11::StreamBuffer m_texture_stream_buffer;
|
D3D11::StreamBuffer m_texture_stream_buffer;
|
||||||
|
|
||||||
D3D11::StagingTexture m_vram_readback_texture;
|
ComPtr<ID3D11Buffer> m_vram_read_buffer;
|
||||||
|
ComPtr<ID3D11Buffer> m_vram_read_staging_buffer;
|
||||||
|
ComPtr<ID3D11UnorderedAccessView> m_vram_read_buffer_view;
|
||||||
|
|
||||||
ComPtr<ID3D11ShaderResourceView> m_texture_stream_buffer_srv_r16ui;
|
ComPtr<ID3D11ShaderResourceView> m_texture_stream_buffer_srv_r16ui;
|
||||||
|
|
||||||
|
@ -123,7 +124,7 @@ private:
|
||||||
ComPtr<ID3D11PixelShader> m_copy_pixel_shader;
|
ComPtr<ID3D11PixelShader> m_copy_pixel_shader;
|
||||||
ComPtr<ID3D11PixelShader> m_vram_fill_pixel_shader;
|
ComPtr<ID3D11PixelShader> m_vram_fill_pixel_shader;
|
||||||
ComPtr<ID3D11PixelShader> m_vram_interlaced_fill_pixel_shader;
|
ComPtr<ID3D11PixelShader> m_vram_interlaced_fill_pixel_shader;
|
||||||
ComPtr<ID3D11PixelShader> m_vram_read_pixel_shader;
|
ComPtr<ID3D11ComputeShader> m_vram_read_compute_shader;
|
||||||
ComPtr<ID3D11PixelShader> m_vram_write_pixel_shader;
|
ComPtr<ID3D11PixelShader> m_vram_write_pixel_shader;
|
||||||
ComPtr<ID3D11PixelShader> m_vram_copy_pixel_shader;
|
ComPtr<ID3D11PixelShader> m_vram_copy_pixel_shader;
|
||||||
ComPtr<ID3D11PixelShader> m_vram_update_depth_pixel_shader;
|
ComPtr<ID3D11PixelShader> m_vram_update_depth_pixel_shader;
|
||||||
|
@ -139,4 +140,9 @@ private:
|
||||||
D3D11::Texture m_downsample_texture;
|
D3D11::Texture m_downsample_texture;
|
||||||
D3D11::Texture m_downsample_weight_texture;
|
D3D11::Texture m_downsample_weight_texture;
|
||||||
std::vector<std::pair<ComPtr<ID3D11ShaderResourceView>, ComPtr<ID3D11RenderTargetView>>> m_downsample_mip_views;
|
std::vector<std::pair<ComPtr<ID3D11ShaderResourceView>, ComPtr<ID3D11RenderTargetView>>> m_downsample_mip_views;
|
||||||
|
|
||||||
|
// fallback vram read
|
||||||
|
D3D11::Texture m_vram_encoding_texture;
|
||||||
|
D3D11::StagingTexture m_vram_readback_texture;
|
||||||
|
ComPtr<ID3D11PixelShader> m_vram_read_pixel_shader;
|
||||||
};
|
};
|
||||||
|
|
|
@ -1192,6 +1192,88 @@ uint SampleVRAM(uint2 coords)
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string GPU_HW_ShaderGen::GenerateVRAMReadComputeShader()
|
||||||
|
{
|
||||||
|
std::stringstream ss;
|
||||||
|
WriteHeader(ss);
|
||||||
|
WriteCommonFunctions(ss);
|
||||||
|
DeclareUniformBuffer(ss, {"uint2 u_base_coords", "uint2 u_size", "uint u_buffer_stride"}, true);
|
||||||
|
|
||||||
|
DeclareTexture(ss, "samp0", 0, UsingMSAA());
|
||||||
|
|
||||||
|
if (m_glsl)
|
||||||
|
{
|
||||||
|
ss << "layout(std430";
|
||||||
|
if (IsVulkan())
|
||||||
|
ss << ", set = 0, binding = 2";
|
||||||
|
else if (m_use_glsl_binding_layout)
|
||||||
|
ss << ", binding = 1";
|
||||||
|
|
||||||
|
ss << ") restrict writeonly buffer SSBO {\n";
|
||||||
|
ss << " uint s_output_buffer[];\n";
|
||||||
|
ss << "};\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ss << "RWBuffer<uint> s_output_buffer : register(u0);\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
ss << R"(
|
||||||
|
float4 LoadVRAM(int2 coords)
|
||||||
|
{
|
||||||
|
#if MULTISAMPLING
|
||||||
|
float4 value = LOAD_TEXTURE_MS(samp0, coords, 0u);
|
||||||
|
for (uint sample_index = 1u; sample_index < MULTISAMPLES; sample_index++)
|
||||||
|
value += LOAD_TEXTURE_MS(samp0, coords, sample_index);
|
||||||
|
value /= float(MULTISAMPLES);
|
||||||
|
return value;
|
||||||
|
#else
|
||||||
|
return LOAD_TEXTURE(samp0, coords, 0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
uint SampleVRAM(uint2 coords)
|
||||||
|
{
|
||||||
|
if (RESOLUTION_SCALE == 1u)
|
||||||
|
return RGBA8ToRGBA5551(LoadVRAM(int2(coords)));
|
||||||
|
|
||||||
|
// Box filter for downsampling.
|
||||||
|
float4 value = float4(0.0, 0.0, 0.0, 0.0);
|
||||||
|
uint2 base_coords = coords * uint2(RESOLUTION_SCALE, RESOLUTION_SCALE);
|
||||||
|
for (uint offset_x = 0u; offset_x < RESOLUTION_SCALE; offset_x++)
|
||||||
|
{
|
||||||
|
for (uint offset_y = 0u; offset_y < RESOLUTION_SCALE; offset_y++)
|
||||||
|
value += LoadVRAM(int2(base_coords + uint2(offset_x, offset_y)));
|
||||||
|
}
|
||||||
|
value /= float(RESOLUTION_SCALE * RESOLUTION_SCALE);
|
||||||
|
return RGBA8ToRGBA5551(value);
|
||||||
|
}
|
||||||
|
)";
|
||||||
|
|
||||||
|
DeclareComputeEntryPoint(ss, 8, 8, 1);
|
||||||
|
ss << R"(
|
||||||
|
{
|
||||||
|
uint2 sample_coords = uint2(uint(c_global_id.x) * 2u, uint(c_global_id.y));
|
||||||
|
|
||||||
|
#if API_OPENGL || API_OPENGL_ES
|
||||||
|
// Lower-left origin flip for OpenGL.
|
||||||
|
// We want to write the image out upside-down so we can read it top-to-bottom.
|
||||||
|
sample_coords.y = u_size.y - sample_coords.y - 1u;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
sample_coords += u_base_coords;
|
||||||
|
|
||||||
|
// We're encoding as 32-bit, so the output width is halved and we pack two 16-bit pixels in one 32-bit pixel.
|
||||||
|
uint left = SampleVRAM(sample_coords);
|
||||||
|
uint right = SampleVRAM(uint2(sample_coords.x + 1u, sample_coords.y));
|
||||||
|
|
||||||
|
uint buffer_offset = c_global_id.y * u_buffer_stride + c_global_id.x;
|
||||||
|
s_output_buffer[buffer_offset] = left | (right << 16);
|
||||||
|
})";
|
||||||
|
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo)
|
std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo)
|
||||||
{
|
{
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
|
|
|
@ -17,6 +17,7 @@ public:
|
||||||
std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode,
|
std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode,
|
||||||
bool smooth_chroma);
|
bool smooth_chroma);
|
||||||
std::string GenerateVRAMReadFragmentShader();
|
std::string GenerateVRAMReadFragmentShader();
|
||||||
|
std::string GenerateVRAMReadComputeShader();
|
||||||
std::string GenerateVRAMWriteFragmentShader(bool use_ssbo);
|
std::string GenerateVRAMWriteFragmentShader(bool use_ssbo);
|
||||||
std::string GenerateVRAMCopyFragmentShader();
|
std::string GenerateVRAMCopyFragmentShader();
|
||||||
std::string GenerateVRAMUpdateDepthFragmentShader();
|
std::string GenerateVRAMUpdateDepthFragmentShader();
|
||||||
|
|
|
@ -360,10 +360,12 @@ void GPU_HW_Vulkan::DestroyResources()
|
||||||
m_texture_stream_buffer.Destroy(false);
|
m_texture_stream_buffer.Destroy(false);
|
||||||
|
|
||||||
Vulkan::Util::SafeDestroyPipelineLayout(m_vram_write_pipeline_layout);
|
Vulkan::Util::SafeDestroyPipelineLayout(m_vram_write_pipeline_layout);
|
||||||
|
Vulkan::Util::SafeDestroyPipelineLayout(m_vram_read_pipeline_layout);
|
||||||
Vulkan::Util::SafeDestroyPipelineLayout(m_single_sampler_pipeline_layout);
|
Vulkan::Util::SafeDestroyPipelineLayout(m_single_sampler_pipeline_layout);
|
||||||
Vulkan::Util::SafeDestroyPipelineLayout(m_no_samplers_pipeline_layout);
|
Vulkan::Util::SafeDestroyPipelineLayout(m_no_samplers_pipeline_layout);
|
||||||
Vulkan::Util::SafeDestroyPipelineLayout(m_batch_pipeline_layout);
|
Vulkan::Util::SafeDestroyPipelineLayout(m_batch_pipeline_layout);
|
||||||
Vulkan::Util::SafeDestroyDescriptorSetLayout(m_vram_write_descriptor_set_layout);
|
Vulkan::Util::SafeDestroyDescriptorSetLayout(m_vram_write_descriptor_set_layout);
|
||||||
|
Vulkan::Util::SafeDestroyDescriptorSetLayout(m_vram_read_descriptor_set_layout);
|
||||||
Vulkan::Util::SafeDestroyDescriptorSetLayout(m_single_sampler_descriptor_set_layout);
|
Vulkan::Util::SafeDestroyDescriptorSetLayout(m_single_sampler_descriptor_set_layout);
|
||||||
Vulkan::Util::SafeDestroyDescriptorSetLayout(m_batch_descriptor_set_layout);
|
Vulkan::Util::SafeDestroyDescriptorSetLayout(m_batch_descriptor_set_layout);
|
||||||
Vulkan::Util::SafeDestroySampler(m_point_sampler);
|
Vulkan::Util::SafeDestroySampler(m_point_sampler);
|
||||||
|
@ -431,6 +433,12 @@ bool GPU_HW_Vulkan::CreatePipelineLayouts()
|
||||||
if (m_vram_write_descriptor_set_layout == VK_NULL_HANDLE)
|
if (m_vram_write_descriptor_set_layout == VK_NULL_HANDLE)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
dslbuilder.AddBinding(1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT);
|
||||||
|
dslbuilder.AddBinding(2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT);
|
||||||
|
m_vram_read_descriptor_set_layout = dslbuilder.Create(device);
|
||||||
|
if (m_vram_read_descriptor_set_layout == VK_NULL_HANDLE)
|
||||||
|
return false;
|
||||||
|
|
||||||
Vulkan::PipelineLayoutBuilder plbuilder;
|
Vulkan::PipelineLayoutBuilder plbuilder;
|
||||||
plbuilder.AddDescriptorSet(m_batch_descriptor_set_layout);
|
plbuilder.AddDescriptorSet(m_batch_descriptor_set_layout);
|
||||||
m_batch_pipeline_layout = plbuilder.Create(device);
|
m_batch_pipeline_layout = plbuilder.Create(device);
|
||||||
|
@ -448,6 +456,12 @@ bool GPU_HW_Vulkan::CreatePipelineLayouts()
|
||||||
if (m_no_samplers_pipeline_layout == VK_NULL_HANDLE)
|
if (m_no_samplers_pipeline_layout == VK_NULL_HANDLE)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
plbuilder.AddDescriptorSet(m_vram_read_descriptor_set_layout);
|
||||||
|
plbuilder.AddPushConstants(VK_SHADER_STAGE_COMPUTE_BIT, 0, MAX_PUSH_CONSTANTS_SIZE);
|
||||||
|
m_vram_read_pipeline_layout = plbuilder.Create(device);
|
||||||
|
if (m_vram_read_pipeline_layout == VK_NULL_HANDLE)
|
||||||
|
return false;
|
||||||
|
|
||||||
plbuilder.AddDescriptorSet(m_vram_write_descriptor_set_layout);
|
plbuilder.AddDescriptorSet(m_vram_write_descriptor_set_layout);
|
||||||
plbuilder.AddPushConstants(VK_SHADER_STAGE_FRAGMENT_BIT, 0, MAX_PUSH_CONSTANTS_SIZE);
|
plbuilder.AddPushConstants(VK_SHADER_STAGE_FRAGMENT_BIT, 0, MAX_PUSH_CONSTANTS_SIZE);
|
||||||
m_vram_write_pipeline_layout = plbuilder.Create(device);
|
m_vram_write_pipeline_layout = plbuilder.Create(device);
|
||||||
|
@ -512,6 +526,7 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
|
||||||
const VkFormat texture_format = VK_FORMAT_R8G8B8A8_UNORM;
|
const VkFormat texture_format = VK_FORMAT_R8G8B8A8_UNORM;
|
||||||
const VkFormat depth_format = VK_FORMAT_D16_UNORM;
|
const VkFormat depth_format = VK_FORMAT_D16_UNORM;
|
||||||
const VkSampleCountFlagBits samples = static_cast<VkSampleCountFlagBits>(m_multisamples);
|
const VkSampleCountFlagBits samples = static_cast<VkSampleCountFlagBits>(m_multisamples);
|
||||||
|
const u32 read_staging_buffer_size = (VRAM_WIDTH / 2) * VRAM_HEIGHT * sizeof(u32);
|
||||||
|
|
||||||
if (!m_vram_texture.Create(texture_width, texture_height, 1, 1, texture_format, samples, VK_IMAGE_VIEW_TYPE_2D,
|
if (!m_vram_texture.Create(texture_width, texture_height, 1, 1, texture_format, samples, VK_IMAGE_VIEW_TYPE_2D,
|
||||||
VK_IMAGE_TILING_OPTIMAL,
|
VK_IMAGE_TILING_OPTIMAL,
|
||||||
|
@ -529,11 +544,9 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
|
||||||
VK_IMAGE_TILING_OPTIMAL,
|
VK_IMAGE_TILING_OPTIMAL,
|
||||||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
||||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT) ||
|
VK_IMAGE_USAGE_TRANSFER_DST_BIT) ||
|
||||||
!m_vram_readback_texture.Create(VRAM_WIDTH, VRAM_HEIGHT, 1, 1, texture_format, VK_SAMPLE_COUNT_1_BIT,
|
!m_vram_read_staging_buffer.Create(Vulkan::StagingBuffer::Type::Readback, read_staging_buffer_size,
|
||||||
VK_IMAGE_VIEW_TYPE_2D, VK_IMAGE_TILING_OPTIMAL,
|
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) ||
|
||||||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT) ||
|
!m_vram_read_staging_buffer.Map())
|
||||||
!m_vram_readback_staging_texture.Create(Vulkan::StagingBuffer::Type::Readback, texture_format, VRAM_WIDTH / 2,
|
|
||||||
VRAM_HEIGHT))
|
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -544,12 +557,9 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
|
||||||
g_vulkan_context->GetRenderPass(VK_FORMAT_UNDEFINED, depth_format, samples, VK_ATTACHMENT_LOAD_OP_DONT_CARE);
|
g_vulkan_context->GetRenderPass(VK_FORMAT_UNDEFINED, depth_format, samples, VK_ATTACHMENT_LOAD_OP_DONT_CARE);
|
||||||
m_display_render_pass = g_vulkan_context->GetRenderPass(m_display_texture.GetFormat(), VK_FORMAT_UNDEFINED,
|
m_display_render_pass = g_vulkan_context->GetRenderPass(m_display_texture.GetFormat(), VK_FORMAT_UNDEFINED,
|
||||||
m_display_texture.GetSamples(), VK_ATTACHMENT_LOAD_OP_LOAD);
|
m_display_texture.GetSamples(), VK_ATTACHMENT_LOAD_OP_LOAD);
|
||||||
m_vram_readback_render_pass =
|
|
||||||
g_vulkan_context->GetRenderPass(m_vram_readback_texture.GetFormat(), VK_FORMAT_UNDEFINED,
|
|
||||||
m_vram_readback_texture.GetSamples(), VK_ATTACHMENT_LOAD_OP_DONT_CARE);
|
|
||||||
|
|
||||||
if (m_vram_render_pass == VK_NULL_HANDLE || m_vram_update_depth_render_pass == VK_NULL_HANDLE ||
|
if (m_vram_render_pass == VK_NULL_HANDLE || m_vram_update_depth_render_pass == VK_NULL_HANDLE ||
|
||||||
m_display_render_pass == VK_NULL_HANDLE || m_vram_readback_render_pass == VK_NULL_HANDLE)
|
m_display_render_pass == VK_NULL_HANDLE)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -565,13 +575,9 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
m_vram_update_depth_framebuffer = m_vram_depth_texture.CreateFramebuffer(m_vram_update_depth_render_pass);
|
m_vram_update_depth_framebuffer = m_vram_depth_texture.CreateFramebuffer(m_vram_update_depth_render_pass);
|
||||||
m_vram_readback_framebuffer = m_vram_readback_texture.CreateFramebuffer(m_vram_readback_render_pass);
|
|
||||||
m_display_framebuffer = m_display_texture.CreateFramebuffer(m_display_render_pass);
|
m_display_framebuffer = m_display_texture.CreateFramebuffer(m_display_render_pass);
|
||||||
if (m_vram_update_depth_framebuffer == VK_NULL_HANDLE || m_vram_readback_framebuffer == VK_NULL_HANDLE ||
|
if (m_vram_update_depth_framebuffer == VK_NULL_HANDLE || m_display_framebuffer == VK_NULL_HANDLE)
|
||||||
m_display_framebuffer == VK_NULL_HANDLE)
|
|
||||||
{
|
|
||||||
return false;
|
return false;
|
||||||
}
|
|
||||||
|
|
||||||
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
|
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
|
||||||
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
||||||
|
@ -582,10 +588,13 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
|
||||||
|
|
||||||
m_batch_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_batch_descriptor_set_layout);
|
m_batch_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_batch_descriptor_set_layout);
|
||||||
m_vram_copy_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout);
|
m_vram_copy_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout);
|
||||||
m_vram_read_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout);
|
m_vram_read_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_vram_read_descriptor_set_layout);
|
||||||
|
m_vram_update_depth_descriptor_set =
|
||||||
|
g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout);
|
||||||
m_display_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout);
|
m_display_descriptor_set = g_vulkan_context->AllocateGlobalDescriptorSet(m_single_sampler_descriptor_set_layout);
|
||||||
if (m_batch_descriptor_set == VK_NULL_HANDLE || m_vram_copy_descriptor_set == VK_NULL_HANDLE ||
|
if (m_batch_descriptor_set == VK_NULL_HANDLE || m_vram_copy_descriptor_set == VK_NULL_HANDLE ||
|
||||||
m_vram_read_descriptor_set == VK_NULL_HANDLE || m_display_descriptor_set == VK_NULL_HANDLE)
|
m_vram_read_descriptor_set == VK_NULL_HANDLE || m_vram_update_depth_descriptor_set == VK_NULL_HANDLE ||
|
||||||
|
m_display_descriptor_set == VK_NULL_HANDLE)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -598,6 +607,10 @@ bool GPU_HW_Vulkan::CreateFramebuffer()
|
||||||
m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||||
dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_vram_read_descriptor_set, 1, m_vram_texture.GetView(),
|
dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_vram_read_descriptor_set, 1, m_vram_texture.GetView(),
|
||||||
m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||||
|
dsubuilder.AddBufferDescriptorWrite(m_vram_read_descriptor_set, 2, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||||
|
m_vram_read_staging_buffer.GetBuffer(), 0, m_vram_read_staging_buffer.GetSize());
|
||||||
|
dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_vram_update_depth_descriptor_set, 1, m_vram_texture.GetView(),
|
||||||
|
m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||||
dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_display_descriptor_set, 1, m_display_texture.GetView(),
|
dsubuilder.AddCombinedImageSamplerDescriptorWrite(m_display_descriptor_set, 1, m_display_texture.GetView(),
|
||||||
m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
m_point_sampler, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||||
dsubuilder.Update(g_vulkan_context->GetDevice());
|
dsubuilder.Update(g_vulkan_context->GetDevice());
|
||||||
|
@ -743,21 +756,20 @@ void GPU_HW_Vulkan::DestroyFramebuffer()
|
||||||
m_downsample_weight_texture.Destroy(false);
|
m_downsample_weight_texture.Destroy(false);
|
||||||
|
|
||||||
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_batch_descriptor_set);
|
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_batch_descriptor_set);
|
||||||
|
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_vram_update_depth_descriptor_set);
|
||||||
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_vram_copy_descriptor_set);
|
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_vram_copy_descriptor_set);
|
||||||
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_vram_read_descriptor_set);
|
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_vram_read_descriptor_set);
|
||||||
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_display_descriptor_set);
|
Vulkan::Util::SafeFreeGlobalDescriptorSet(m_display_descriptor_set);
|
||||||
|
|
||||||
Vulkan::Util::SafeDestroyFramebuffer(m_vram_framebuffer);
|
Vulkan::Util::SafeDestroyFramebuffer(m_vram_framebuffer);
|
||||||
Vulkan::Util::SafeDestroyFramebuffer(m_vram_update_depth_framebuffer);
|
Vulkan::Util::SafeDestroyFramebuffer(m_vram_update_depth_framebuffer);
|
||||||
Vulkan::Util::SafeDestroyFramebuffer(m_vram_readback_framebuffer);
|
|
||||||
Vulkan::Util::SafeDestroyFramebuffer(m_display_framebuffer);
|
Vulkan::Util::SafeDestroyFramebuffer(m_display_framebuffer);
|
||||||
|
|
||||||
m_vram_read_texture.Destroy(false);
|
m_vram_read_texture.Destroy(false);
|
||||||
m_vram_depth_texture.Destroy(false);
|
m_vram_depth_texture.Destroy(false);
|
||||||
m_vram_texture.Destroy(false);
|
m_vram_texture.Destroy(false);
|
||||||
m_vram_readback_texture.Destroy(false);
|
|
||||||
m_display_texture.Destroy(false);
|
m_display_texture.Destroy(false);
|
||||||
m_vram_readback_staging_texture.Destroy(false);
|
m_vram_read_staging_buffer.Destroy(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GPU_HW_Vulkan::CreateVertexBuffer()
|
bool GPU_HW_Vulkan::CreateVertexBuffer()
|
||||||
|
@ -883,6 +895,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
|
||||||
}
|
}
|
||||||
|
|
||||||
Vulkan::GraphicsPipelineBuilder gpbuilder;
|
Vulkan::GraphicsPipelineBuilder gpbuilder;
|
||||||
|
Vulkan::ComputePipelineBuilder csbuilder;
|
||||||
|
|
||||||
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
|
// [depth_test][render_mode][texture_mode][transparency_mode][dithering][interlacing]
|
||||||
for (u8 depth_test = 0; depth_test < 3; depth_test++)
|
for (u8 depth_test = 0; depth_test < 3; depth_test++)
|
||||||
|
@ -1104,22 +1117,16 @@ bool GPU_HW_Vulkan::CompilePipelines()
|
||||||
|
|
||||||
// VRAM read
|
// VRAM read
|
||||||
{
|
{
|
||||||
VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMReadFragmentShader());
|
VkShaderModule cs = g_vulkan_shader_cache->GetComputeShader(shadergen.GenerateVRAMReadComputeShader());
|
||||||
if (fs == VK_NULL_HANDLE)
|
if (cs == VK_NULL_HANDLE)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
gpbuilder.SetRenderPass(m_vram_readback_render_pass, 0);
|
csbuilder.SetPipelineLayout(m_vram_read_pipeline_layout);
|
||||||
gpbuilder.SetPipelineLayout(m_single_sampler_pipeline_layout);
|
csbuilder.SetShader(cs, "main");
|
||||||
gpbuilder.SetVertexShader(fullscreen_quad_vertex_shader);
|
|
||||||
gpbuilder.SetFragmentShader(fs);
|
|
||||||
gpbuilder.SetNoCullRasterizationState();
|
|
||||||
gpbuilder.SetNoDepthTestState();
|
|
||||||
gpbuilder.SetNoBlendingState();
|
|
||||||
gpbuilder.SetDynamicViewportAndScissorState();
|
|
||||||
|
|
||||||
m_vram_readback_pipeline = gpbuilder.Create(device, pipeline_cache, false);
|
m_vram_read_pipeline = csbuilder.Create(device, pipeline_cache, false);
|
||||||
vkDestroyShaderModule(device, fs, nullptr);
|
vkDestroyShaderModule(device, cs, nullptr);
|
||||||
if (m_vram_readback_pipeline == VK_NULL_HANDLE)
|
if (m_vram_read_pipeline == VK_NULL_HANDLE)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
UPDATE_PROGRESS();
|
UPDATE_PROGRESS();
|
||||||
|
@ -1257,7 +1264,7 @@ void GPU_HW_Vulkan::DestroyPipelines()
|
||||||
for (VkPipeline& p : m_vram_copy_pipelines)
|
for (VkPipeline& p : m_vram_copy_pipelines)
|
||||||
Vulkan::Util::SafeDestroyPipeline(p);
|
Vulkan::Util::SafeDestroyPipeline(p);
|
||||||
|
|
||||||
Vulkan::Util::SafeDestroyPipeline(m_vram_readback_pipeline);
|
Vulkan::Util::SafeDestroyPipeline(m_vram_read_pipeline);
|
||||||
Vulkan::Util::SafeDestroyPipeline(m_vram_update_depth_pipeline);
|
Vulkan::Util::SafeDestroyPipeline(m_vram_update_depth_pipeline);
|
||||||
|
|
||||||
Vulkan::Util::SafeDestroyPipeline(m_downsample_first_pass_pipeline);
|
Vulkan::Util::SafeDestroyPipeline(m_downsample_first_pass_pipeline);
|
||||||
|
@ -1427,41 +1434,37 @@ void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
|
||||||
const Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height);
|
const Common::Rectangle<u32> copy_rect = GetVRAMTransferBounds(x, y, width, height);
|
||||||
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
|
const u32 encoded_width = (copy_rect.GetWidth() + 1) / 2;
|
||||||
const u32 encoded_height = copy_rect.GetHeight();
|
const u32 encoded_height = copy_rect.GetHeight();
|
||||||
|
const u32 encoded_size = encoded_width * encoded_height * sizeof(u32);
|
||||||
|
|
||||||
EndRenderPass();
|
EndRenderPass();
|
||||||
|
|
||||||
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
|
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
|
||||||
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||||
m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
|
||||||
|
|
||||||
// Work around Mali driver bug: set full framebuffer size for render area. The GPU crashes with a page fault if we use
|
const u32 uniforms[5] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight(), encoded_width};
|
||||||
// the actual size we're rendering to...
|
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_vram_read_pipeline);
|
||||||
BeginRenderPass(m_vram_readback_render_pass, m_vram_readback_framebuffer, 0, 0, m_vram_readback_texture.GetWidth(),
|
vkCmdPushConstants(cmdbuf, m_vram_read_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uniforms), uniforms);
|
||||||
m_vram_readback_texture.GetHeight());
|
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_vram_read_pipeline_layout, 0, 1,
|
||||||
|
|
||||||
// Encode the 24-bit texture as 16-bit.
|
|
||||||
const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()};
|
|
||||||
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_readback_pipeline);
|
|
||||||
vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
|
|
||||||
uniforms);
|
|
||||||
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1,
|
|
||||||
&m_vram_read_descriptor_set, 0, nullptr);
|
&m_vram_read_descriptor_set, 0, nullptr);
|
||||||
Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, encoded_width, encoded_height);
|
|
||||||
vkCmdDraw(cmdbuf, 3, 1, 0, 0);
|
|
||||||
|
|
||||||
EndRenderPass();
|
const u32 groups_x = (encoded_width + 7) / 8;
|
||||||
|
const u32 groups_y = (encoded_height + 7) / 8;
|
||||||
|
vkCmdDispatch(cmdbuf, groups_x, groups_y, 1);
|
||||||
|
|
||||||
m_vram_readback_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
|
||||||
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
||||||
|
m_vram_read_staging_buffer.FlushGPUCache(cmdbuf, VK_ACCESS_SHADER_WRITE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
|
||||||
|
encoded_size);
|
||||||
|
g_vulkan_context->ExecuteCommandBuffer(true);
|
||||||
|
m_vram_read_staging_buffer.InvalidateCPUCache(0, encoded_size);
|
||||||
|
|
||||||
// Stage the readback.
|
u16* dst_ptr = &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left];
|
||||||
m_vram_readback_staging_texture.CopyFromTexture(m_vram_readback_texture, 0, 0, 0, 0, 0, 0, encoded_width,
|
const char* src_ptr = static_cast<const char*>(m_vram_read_staging_buffer.GetMapPointer());
|
||||||
encoded_height);
|
for (u32 row = 0; row < encoded_height; row++)
|
||||||
|
{
|
||||||
// And copy it into our shadow buffer (will execute command buffer and stall).
|
std::memcpy(dst_ptr, src_ptr, sizeof(u32) * encoded_width);
|
||||||
m_vram_readback_staging_texture.ReadTexels(0, 0, encoded_width, encoded_height,
|
src_ptr += sizeof(u32) * encoded_width;
|
||||||
&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left],
|
dst_ptr += VRAM_WIDTH;
|
||||||
VRAM_WIDTH * sizeof(u16));
|
}
|
||||||
|
|
||||||
RestoreGraphicsAPIState();
|
RestoreGraphicsAPIState();
|
||||||
}
|
}
|
||||||
|
@ -1667,7 +1670,7 @@ void GPU_HW_Vulkan::UpdateDepthBufferFromMaskBit()
|
||||||
|
|
||||||
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_update_depth_pipeline);
|
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_update_depth_pipeline);
|
||||||
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1,
|
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1,
|
||||||
&m_vram_read_descriptor_set, 0, nullptr);
|
&m_vram_update_depth_descriptor_set, 0, nullptr);
|
||||||
Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
|
Vulkan::Util::SetViewportAndScissor(cmdbuf, 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
|
||||||
vkCmdDraw(cmdbuf, 3, 1, 0, 0);
|
vkCmdDraw(cmdbuf, 3, 1, 0, 0);
|
||||||
|
|
||||||
|
|
|
@ -81,28 +81,27 @@ private:
|
||||||
VkRenderPass m_vram_render_pass = VK_NULL_HANDLE;
|
VkRenderPass m_vram_render_pass = VK_NULL_HANDLE;
|
||||||
VkRenderPass m_vram_update_depth_render_pass = VK_NULL_HANDLE;
|
VkRenderPass m_vram_update_depth_render_pass = VK_NULL_HANDLE;
|
||||||
VkRenderPass m_display_render_pass = VK_NULL_HANDLE;
|
VkRenderPass m_display_render_pass = VK_NULL_HANDLE;
|
||||||
VkRenderPass m_vram_readback_render_pass = VK_NULL_HANDLE;
|
|
||||||
|
|
||||||
VkDescriptorSetLayout m_batch_descriptor_set_layout = VK_NULL_HANDLE;
|
VkDescriptorSetLayout m_batch_descriptor_set_layout = VK_NULL_HANDLE;
|
||||||
VkDescriptorSetLayout m_single_sampler_descriptor_set_layout = VK_NULL_HANDLE;
|
VkDescriptorSetLayout m_single_sampler_descriptor_set_layout = VK_NULL_HANDLE;
|
||||||
|
VkDescriptorSetLayout m_vram_read_descriptor_set_layout = VK_NULL_HANDLE;
|
||||||
VkDescriptorSetLayout m_vram_write_descriptor_set_layout = VK_NULL_HANDLE;
|
VkDescriptorSetLayout m_vram_write_descriptor_set_layout = VK_NULL_HANDLE;
|
||||||
|
|
||||||
VkPipelineLayout m_batch_pipeline_layout = VK_NULL_HANDLE;
|
VkPipelineLayout m_batch_pipeline_layout = VK_NULL_HANDLE;
|
||||||
VkPipelineLayout m_no_samplers_pipeline_layout = VK_NULL_HANDLE;
|
VkPipelineLayout m_no_samplers_pipeline_layout = VK_NULL_HANDLE;
|
||||||
VkPipelineLayout m_single_sampler_pipeline_layout = VK_NULL_HANDLE;
|
VkPipelineLayout m_single_sampler_pipeline_layout = VK_NULL_HANDLE;
|
||||||
|
VkPipelineLayout m_vram_read_pipeline_layout = VK_NULL_HANDLE;
|
||||||
VkPipelineLayout m_vram_write_pipeline_layout = VK_NULL_HANDLE;
|
VkPipelineLayout m_vram_write_pipeline_layout = VK_NULL_HANDLE;
|
||||||
|
|
||||||
Vulkan::Texture m_vram_texture;
|
Vulkan::Texture m_vram_texture;
|
||||||
Vulkan::Texture m_vram_depth_texture;
|
Vulkan::Texture m_vram_depth_texture;
|
||||||
Vulkan::Texture m_vram_read_texture;
|
Vulkan::Texture m_vram_read_texture;
|
||||||
Vulkan::Texture m_vram_readback_texture;
|
Vulkan::StagingBuffer m_vram_read_staging_buffer;
|
||||||
Vulkan::StagingTexture m_vram_readback_staging_texture;
|
|
||||||
Vulkan::Texture m_display_texture;
|
Vulkan::Texture m_display_texture;
|
||||||
bool m_use_ssbos_for_vram_writes = false;
|
bool m_use_ssbos_for_vram_writes = false;
|
||||||
|
|
||||||
VkFramebuffer m_vram_framebuffer = VK_NULL_HANDLE;
|
VkFramebuffer m_vram_framebuffer = VK_NULL_HANDLE;
|
||||||
VkFramebuffer m_vram_update_depth_framebuffer = VK_NULL_HANDLE;
|
VkFramebuffer m_vram_update_depth_framebuffer = VK_NULL_HANDLE;
|
||||||
VkFramebuffer m_vram_readback_framebuffer = VK_NULL_HANDLE;
|
|
||||||
VkFramebuffer m_display_framebuffer = VK_NULL_HANDLE;
|
VkFramebuffer m_display_framebuffer = VK_NULL_HANDLE;
|
||||||
|
|
||||||
VkSampler m_point_sampler = VK_NULL_HANDLE;
|
VkSampler m_point_sampler = VK_NULL_HANDLE;
|
||||||
|
@ -113,6 +112,7 @@ private:
|
||||||
VkDescriptorSet m_vram_copy_descriptor_set = VK_NULL_HANDLE;
|
VkDescriptorSet m_vram_copy_descriptor_set = VK_NULL_HANDLE;
|
||||||
VkDescriptorSet m_vram_read_descriptor_set = VK_NULL_HANDLE;
|
VkDescriptorSet m_vram_read_descriptor_set = VK_NULL_HANDLE;
|
||||||
VkDescriptorSet m_vram_write_descriptor_set = VK_NULL_HANDLE;
|
VkDescriptorSet m_vram_write_descriptor_set = VK_NULL_HANDLE;
|
||||||
|
VkDescriptorSet m_vram_update_depth_descriptor_set = VK_NULL_HANDLE;
|
||||||
VkDescriptorSet m_display_descriptor_set = VK_NULL_HANDLE;
|
VkDescriptorSet m_display_descriptor_set = VK_NULL_HANDLE;
|
||||||
|
|
||||||
Vulkan::StreamBuffer m_vertex_stream_buffer;
|
Vulkan::StreamBuffer m_vertex_stream_buffer;
|
||||||
|
@ -132,7 +132,7 @@ private:
|
||||||
std::array<VkPipeline, 2> m_vram_write_pipelines{};
|
std::array<VkPipeline, 2> m_vram_write_pipelines{};
|
||||||
std::array<VkPipeline, 2> m_vram_copy_pipelines{};
|
std::array<VkPipeline, 2> m_vram_copy_pipelines{};
|
||||||
|
|
||||||
VkPipeline m_vram_readback_pipeline = VK_NULL_HANDLE;
|
VkPipeline m_vram_read_pipeline = VK_NULL_HANDLE;
|
||||||
VkPipeline m_vram_update_depth_pipeline = VK_NULL_HANDLE;
|
VkPipeline m_vram_update_depth_pipeline = VK_NULL_HANDLE;
|
||||||
|
|
||||||
// [depth_24][interlace_mode]
|
// [depth_24][interlace_mode]
|
||||||
|
|
|
@ -340,7 +340,7 @@ void ShaderGen::DeclareVertexEntryPoint(
|
||||||
for (u32 i = 0; i < num_texcoord_outputs; i++)
|
for (u32 i = 0; i < num_texcoord_outputs; i++)
|
||||||
ss << " " << qualifier << "float2 v_tex" << i << ";\n";
|
ss << " " << qualifier << "float2 v_tex" << i << ";\n";
|
||||||
|
|
||||||
for (const auto &[qualifiers, name] : additional_outputs)
|
for (const auto& [qualifiers, name] : additional_outputs)
|
||||||
{
|
{
|
||||||
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
|
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
|
||||||
ss << " " << qualifier_to_use << " " << name << ";\n";
|
ss << " " << qualifier_to_use << " " << name << ";\n";
|
||||||
|
@ -357,7 +357,7 @@ void ShaderGen::DeclareVertexEntryPoint(
|
||||||
for (u32 i = 0; i < num_texcoord_outputs; i++)
|
for (u32 i = 0; i < num_texcoord_outputs; i++)
|
||||||
ss << qualifier << "out float2 v_tex" << i << ";\n";
|
ss << qualifier << "out float2 v_tex" << i << ";\n";
|
||||||
|
|
||||||
for (const auto &[qualifiers, name] : additional_outputs)
|
for (const auto& [qualifiers, name] : additional_outputs)
|
||||||
{
|
{
|
||||||
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
|
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
|
||||||
ss << qualifier_to_use << " out " << name << ";\n";
|
ss << qualifier_to_use << " out " << name << ";\n";
|
||||||
|
@ -399,7 +399,7 @@ void ShaderGen::DeclareVertexEntryPoint(
|
||||||
ss << " " << qualifier << "out float2 v_tex" << i << " : TEXCOORD" << i << ",\n";
|
ss << " " << qualifier << "out float2 v_tex" << i << " : TEXCOORD" << i << ",\n";
|
||||||
|
|
||||||
u32 additional_counter = num_texcoord_outputs;
|
u32 additional_counter = num_texcoord_outputs;
|
||||||
for (const auto &[qualifiers, name] : additional_outputs)
|
for (const auto& [qualifiers, name] : additional_outputs)
|
||||||
{
|
{
|
||||||
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
|
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
|
||||||
ss << " " << qualifier_to_use << " out " << name << " : TEXCOORD" << additional_counter << ",\n";
|
ss << " " << qualifier_to_use << " out " << name << " : TEXCOORD" << additional_counter << ",\n";
|
||||||
|
@ -433,7 +433,7 @@ void ShaderGen::DeclareFragmentEntryPoint(
|
||||||
for (u32 i = 0; i < num_texcoord_inputs; i++)
|
for (u32 i = 0; i < num_texcoord_inputs; i++)
|
||||||
ss << " " << qualifier << "float2 v_tex" << i << ";\n";
|
ss << " " << qualifier << "float2 v_tex" << i << ";\n";
|
||||||
|
|
||||||
for (const auto &[qualifiers, name] : additional_inputs)
|
for (const auto& [qualifiers, name] : additional_inputs)
|
||||||
{
|
{
|
||||||
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
|
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
|
||||||
ss << " " << qualifier_to_use << " " << name << ";\n";
|
ss << " " << qualifier_to_use << " " << name << ";\n";
|
||||||
|
@ -450,7 +450,7 @@ void ShaderGen::DeclareFragmentEntryPoint(
|
||||||
for (u32 i = 0; i < num_texcoord_inputs; i++)
|
for (u32 i = 0; i < num_texcoord_inputs; i++)
|
||||||
ss << qualifier << "in float2 v_tex" << i << ";\n";
|
ss << qualifier << "in float2 v_tex" << i << ";\n";
|
||||||
|
|
||||||
for (const auto &[qualifiers, name] : additional_inputs)
|
for (const auto& [qualifiers, name] : additional_inputs)
|
||||||
{
|
{
|
||||||
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
|
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
|
||||||
ss << qualifier_to_use << " in " << name << ";\n";
|
ss << qualifier_to_use << " in " << name << ";\n";
|
||||||
|
@ -503,7 +503,7 @@ void ShaderGen::DeclareFragmentEntryPoint(
|
||||||
ss << " " << qualifier << "in float2 v_tex" << i << " : TEXCOORD" << i << ",\n";
|
ss << " " << qualifier << "in float2 v_tex" << i << " : TEXCOORD" << i << ",\n";
|
||||||
|
|
||||||
u32 additional_counter = num_texcoord_inputs;
|
u32 additional_counter = num_texcoord_inputs;
|
||||||
for (const auto &[qualifiers, name] : additional_inputs)
|
for (const auto& [qualifiers, name] : additional_inputs)
|
||||||
{
|
{
|
||||||
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
|
const char* qualifier_to_use = (std::strlen(qualifiers) > 0) ? qualifiers : qualifier;
|
||||||
ss << " " << qualifier_to_use << " in " << name << " : TEXCOORD" << additional_counter << ",\n";
|
ss << " " << qualifier_to_use << " in " << name << " : TEXCOORD" << additional_counter << ",\n";
|
||||||
|
@ -536,6 +536,23 @@ void ShaderGen::DeclareFragmentEntryPoint(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ShaderGen::DeclareComputeEntryPoint(std::stringstream& ss, u32 local_size_x, u32 local_size_y, u32 local_size_z)
|
||||||
|
{
|
||||||
|
if (m_glsl)
|
||||||
|
{
|
||||||
|
ss << "#define c_local_id gl_LocalInvocationID\n";
|
||||||
|
ss << "#define c_global_id gl_GlobalInvocationID\n";
|
||||||
|
ss << "layout(local_size_x = " << local_size_x << ", local_size_y = " << local_size_y
|
||||||
|
<< ", local_size_z = " << local_size_z << ") in;\n";
|
||||||
|
ss << "void main()\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ss << "[numthreads(" << local_size_x << ", " << local_size_y << ", " << local_size_z << ")]\n";
|
||||||
|
ss << "void main(uint3 c_local_id : SV_GroupID, uint3 c_global_id : SV_DispatchThreadID)\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::string ShaderGen::GenerateScreenQuadVertexShader()
|
std::string ShaderGen::GenerateScreenQuadVertexShader()
|
||||||
{
|
{
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
|
|
|
@ -40,6 +40,7 @@ protected:
|
||||||
const std::initializer_list<std::pair<const char*, const char*>>& additional_inputs,
|
const std::initializer_list<std::pair<const char*, const char*>>& additional_inputs,
|
||||||
bool declare_fragcoord = false, u32 num_color_outputs = 1, bool depth_output = false,
|
bool declare_fragcoord = false, u32 num_color_outputs = 1, bool depth_output = false,
|
||||||
bool msaa = false, bool ssaa = false, bool declare_sample_id = false);
|
bool msaa = false, bool ssaa = false, bool declare_sample_id = false);
|
||||||
|
void DeclareComputeEntryPoint(std::stringstream& ss, u32 local_size_x, u32 local_size_y, u32 local_size_z);
|
||||||
|
|
||||||
HostDisplay::RenderAPI m_render_api;
|
HostDisplay::RenderAPI m_render_api;
|
||||||
bool m_glsl;
|
bool m_glsl;
|
||||||
|
|
|
@ -1970,7 +1970,7 @@ void DrawSettingsWindow()
|
||||||
"to the hardware renderers.",
|
"to the hardware renderers.",
|
||||||
&s_settings_copy.gpu_24bit_chroma_smoothing);
|
&s_settings_copy.gpu_24bit_chroma_smoothing);
|
||||||
|
|
||||||
MenuHeading("PGXP (Precision Geometry Transform Pipeline");
|
MenuHeading("PGXP (Precision Geometry Transform Pipeline)");
|
||||||
|
|
||||||
settings_changed |=
|
settings_changed |=
|
||||||
ToggleButton("PGXP Geometry Correction",
|
ToggleButton("PGXP Geometry Correction",
|
||||||
|
|
Loading…
Reference in New Issue