GPU/HW: Support SSBOs instead of texture buffers for VRAM writes
This commit is contained in:
parent
eec37df1e0
commit
08ef8c1e8d
|
@ -433,7 +433,7 @@ bool GPU_HW_D3D11::CompileShaders()
|
|||
return false;
|
||||
|
||||
m_vram_write_pixel_shader =
|
||||
m_shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMWriteFragmentShader());
|
||||
m_shader_cache.GetPixelShader(m_device.Get(), shadergen.GenerateVRAMWriteFragmentShader(false));
|
||||
if (!m_vram_write_pixel_shader)
|
||||
return false;
|
||||
|
||||
|
|
|
@ -181,7 +181,16 @@ void GPU_HW_OpenGL::SetCapabilities(HostDisplay* host_display)
|
|||
}
|
||||
else
|
||||
{
|
||||
Log_WarningPrintf("Texture buffers are not supported, VRAM writes will be slower.");
|
||||
// Try SSBOs.
|
||||
GLint64 max_ssbo_size = 0;
|
||||
if (GLAD_GL_VERSION_4_3 || GLAD_GL_ES_VERSION_3_1 || GLAD_GL_ARB_shader_storage_buffer_object)
|
||||
glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size);
|
||||
|
||||
m_use_ssbo_for_vram_writes = (max_ssbo_size >= (VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)));
|
||||
if (m_use_ssbo_for_vram_writes)
|
||||
Log_InfoPrintf("Using shader storage buffers for VRAM writes.");
|
||||
else
|
||||
Log_WarningPrintf("Texture buffers are not supported, VRAM writes will be slower.");
|
||||
}
|
||||
|
||||
int max_dual_source_draw_buffers = 0;
|
||||
|
@ -484,10 +493,10 @@ bool GPU_HW_OpenGL::CompilePrograms()
|
|||
prog->Uniform1i("samp0", 0);
|
||||
m_vram_update_depth_program = std::move(*prog);
|
||||
|
||||
if (m_supports_texture_buffer)
|
||||
if (m_supports_texture_buffer || m_use_ssbo_for_vram_writes)
|
||||
{
|
||||
prog = m_shader_cache.GetProgram(shadergen.GenerateScreenQuadVertexShader(), {},
|
||||
shadergen.GenerateVRAMWriteFragmentShader(),
|
||||
shadergen.GenerateVRAMWriteFragmentShader(m_use_ssbo_for_vram_writes),
|
||||
[this, use_binding_layout](GL::Program& prog) {
|
||||
if (!IsGLES() && !use_binding_layout)
|
||||
prog.BindFragData(0, "o_col0");
|
||||
|
@ -751,7 +760,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
|
|||
GPU_HW::UpdateVRAM(x, y, width, height, data);
|
||||
|
||||
const u32 num_pixels = width * height;
|
||||
if (num_pixels < m_max_texture_buffer_size)
|
||||
if (num_pixels < m_max_texture_buffer_size || m_use_ssbo_for_vram_writes)
|
||||
{
|
||||
const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16));
|
||||
std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16));
|
||||
|
@ -771,7 +780,10 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
|
|||
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
|
||||
|
||||
m_vram_write_program.Bind();
|
||||
glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture);
|
||||
if (m_use_ssbo_for_vram_writes)
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_texture_stream_buffer->GetGLBufferId());
|
||||
else
|
||||
glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture);
|
||||
|
||||
const VRAMWriteUBOData uniforms = {x,
|
||||
flipped_y,
|
||||
|
|
|
@ -97,4 +97,5 @@ private:
|
|||
|
||||
bool m_supports_texture_buffer = false;
|
||||
bool m_supports_geometry_shaders = false;
|
||||
bool m_use_ssbo_for_vram_writes = false;
|
||||
};
|
||||
|
|
|
@ -705,8 +705,7 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
|
|||
|
||||
if (textured)
|
||||
{
|
||||
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}},
|
||||
true, use_dual_source ? 2 : 1, true);
|
||||
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1167,7 +1166,7 @@ uint SampleVRAM(uint2 coords)
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
|
||||
std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader(bool use_ssbo)
|
||||
{
|
||||
std::stringstream ss;
|
||||
WriteHeader(ss);
|
||||
|
@ -1177,7 +1176,26 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
|
|||
{"uint2 u_base_coords", "uint2 u_size", "uint u_buffer_base_offset", "uint u_mask_or_bits", "float u_depth_value"},
|
||||
true);
|
||||
|
||||
DeclareTextureBuffer(ss, "samp0", 0, true, true);
|
||||
if (use_ssbo && m_glsl)
|
||||
{
|
||||
ss << "layout(std430";
|
||||
if (IsVulkan())
|
||||
ss << ", set = 0, binding = 0";
|
||||
else if (m_use_glsl_binding_layout)
|
||||
ss << ", binding = 0";
|
||||
|
||||
ss << ") buffer SSBO {\n";
|
||||
ss << " uint ssbo_data[];\n";
|
||||
ss << "};\n\n";
|
||||
|
||||
ss << "#define GET_VALUE(buffer_offset) (ssbo_data[(buffer_offset) / 2u] >> (((buffer_offset) % 2u) * 16u))\n\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
DeclareTextureBuffer(ss, "samp0", 0, true, true);
|
||||
ss << "#define GET_VALUE(buffer_offset) (LOAD_TEXTURE_BUFFER(samp0, int(buffer_offset)).r)\n\n";
|
||||
}
|
||||
|
||||
DeclareFragmentEntryPoint(ss, 0, 1, {}, true, 1, true);
|
||||
ss << R"(
|
||||
{
|
||||
|
@ -1190,7 +1208,7 @@ std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader()
|
|||
#endif
|
||||
|
||||
uint buffer_offset = u_buffer_base_offset + (offset.y * u_size.x) + offset.x;
|
||||
uint value = LOAD_TEXTURE_BUFFER(samp0, int(buffer_offset)).r | u_mask_or_bits;
|
||||
uint value = GET_VALUE(buffer_offset) | u_mask_or_bits;
|
||||
|
||||
o_col0 = RGBA5551ToRGBA8(value);
|
||||
o_depth = (o_col0.a == 1.0) ? u_depth_value : 0.0;
|
||||
|
|
|
@ -23,7 +23,7 @@ public:
|
|||
std::string GenerateCopyFragmentShader();
|
||||
std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode);
|
||||
std::string GenerateVRAMReadFragmentShader();
|
||||
std::string GenerateVRAMWriteFragmentShader();
|
||||
std::string GenerateVRAMWriteFragmentShader(bool use_ssbo);
|
||||
std::string GenerateVRAMCopyFragmentShader();
|
||||
std::string GenerateVRAMUpdateDepthFragmentShader();
|
||||
|
||||
|
|
|
@ -747,7 +747,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
|
|||
|
||||
// VRAM write
|
||||
{
|
||||
VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMWriteFragmentShader());
|
||||
VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateVRAMWriteFragmentShader(false));
|
||||
if (fs == VK_NULL_HANDLE)
|
||||
return false;
|
||||
|
||||
|
|
Loading…
Reference in New Issue