mirror of https://github.com/RPCS3/rpcs3.git
gl: Compute optimizations
- Keep buffers around longer to allow driver heurestics to work - Properly initialize the shaders to allow optimal workgroup dispatch size
This commit is contained in:
parent
60a2a39e88
commit
943752db30
|
@ -24,10 +24,17 @@ namespace gl
|
|||
optimal_kernel_size = 256 / optimal_group_size;
|
||||
|
||||
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, reinterpret_cast<GLint*>(&max_invocations_x));
|
||||
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
void compute_task::create()
|
||||
{
|
||||
if (!initialized)
|
||||
{
|
||||
initialize();
|
||||
}
|
||||
|
||||
if (!compiled)
|
||||
{
|
||||
m_shader.create(::glsl::program_domain::glsl_compute_program, m_src);
|
||||
|
|
|
@ -14,6 +14,7 @@ namespace gl
|
|||
gl::glsl::shader m_shader;
|
||||
gl::glsl::program m_program;
|
||||
bool compiled = false;
|
||||
bool initialized = false;
|
||||
|
||||
// Device-specific options
|
||||
bool unroll_loops = true;
|
||||
|
@ -260,6 +261,8 @@ namespace gl
|
|||
{
|
||||
ensure((sizeof(_BlockType) & 3) == 0); // "Unsupported block type"
|
||||
|
||||
initialize();
|
||||
|
||||
m_src =
|
||||
#include "../Program/GLSLSnippets/GPUDeswizzle.glsl"
|
||||
;
|
||||
|
|
|
@ -18,19 +18,20 @@ namespace gl
|
|||
buffer g_typeless_transfer_buffer;
|
||||
buffer g_upload_transfer_buffer;
|
||||
buffer g_compute_decode_buffer;
|
||||
buffer g_deswizzle_scratch_buffer;
|
||||
|
||||
std::pair<buffer*, buffer*> prepare_compute_resources(usz staging_data_length)
|
||||
{
|
||||
if (g_upload_transfer_buffer.size() < static_cast<GLsizeiptr>(staging_data_length))
|
||||
{
|
||||
g_upload_transfer_buffer.remove();
|
||||
g_upload_transfer_buffer.create(staging_data_length, nullptr, buffer::memory_type::host_visible, GL_STREAM_DRAW);
|
||||
g_upload_transfer_buffer.create(gl::buffer::target::pixel_unpack, staging_data_length, nullptr, buffer::memory_type::host_visible, GL_STREAM_DRAW);
|
||||
}
|
||||
|
||||
if (g_compute_decode_buffer.size() < static_cast<GLsizeiptr>(staging_data_length) * 3)
|
||||
{
|
||||
g_compute_decode_buffer.remove();
|
||||
g_compute_decode_buffer.create(std::max<GLsizeiptr>(512, staging_data_length * 3), nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
||||
g_compute_decode_buffer.create(gl::buffer::target::pixel_pack, std::max<GLsizeiptr>(512, staging_data_length * 3), nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
||||
}
|
||||
|
||||
return { &g_upload_transfer_buffer, &g_compute_decode_buffer };
|
||||
|
@ -41,6 +42,7 @@ namespace gl
|
|||
g_typeless_transfer_buffer.remove();
|
||||
g_upload_transfer_buffer.remove();
|
||||
g_compute_decode_buffer.remove();
|
||||
g_deswizzle_scratch_buffer.remove();
|
||||
}
|
||||
|
||||
template <typename WordType, bool SwapBytes>
|
||||
|
@ -727,8 +729,6 @@ namespace gl
|
|||
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
|
||||
u64 image_linear_size;
|
||||
|
||||
gl::buffer deswizzle_buf;
|
||||
|
||||
switch (gl_type)
|
||||
{
|
||||
case GL_BYTE:
|
||||
|
@ -792,13 +792,14 @@ namespace gl
|
|||
else
|
||||
{
|
||||
// 2.1 Copy data to deswizzle buf
|
||||
if (deswizzle_buf.size() < image_linear_size)
|
||||
if (g_deswizzle_scratch_buffer.size() < image_linear_size)
|
||||
{
|
||||
deswizzle_buf.remove();
|
||||
deswizzle_buf.create(gl::buffer::target::ssbo, image_linear_size, nullptr, gl::buffer::memory_type::local);
|
||||
g_deswizzle_scratch_buffer.remove();
|
||||
g_deswizzle_scratch_buffer.create(gl::buffer::target::ssbo, image_linear_size, nullptr, gl::buffer::memory_type::local);
|
||||
rsx_log.error("DESWZ BUF @0x%x", g_deswizzle_scratch_buffer.id());
|
||||
}
|
||||
|
||||
upload_scratch_mem->copy_to(&deswizzle_buf, 0, 0, image_linear_size);
|
||||
upload_scratch_mem->copy_to(&g_deswizzle_scratch_buffer, 0, 0, image_linear_size);
|
||||
|
||||
// 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem
|
||||
ensure(op.element_size == 2 || op.element_size == 4);
|
||||
|
@ -810,22 +811,22 @@ namespace gl
|
|||
|
||||
if (op.element_size == 4) [[ likely ]]
|
||||
{
|
||||
do_deswizzle_transformation<u32, true>(cmd, block_size, compute_scratch_mem, &deswizzle_buf, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
do_deswizzle_transformation<u32, true>(cmd, block_size, compute_scratch_mem, &g_deswizzle_scratch_buffer, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
}
|
||||
else
|
||||
{
|
||||
do_deswizzle_transformation<u16, true>(cmd, block_size, compute_scratch_mem, &deswizzle_buf, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
do_deswizzle_transformation<u16, true>(cmd, block_size, compute_scratch_mem, &g_deswizzle_scratch_buffer, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (op.element_size == 4) [[ likely ]]
|
||||
{
|
||||
do_deswizzle_transformation<u32, false>(cmd, block_size, compute_scratch_mem, &deswizzle_buf, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
do_deswizzle_transformation<u32, false>(cmd, block_size, compute_scratch_mem, &g_deswizzle_scratch_buffer, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
}
|
||||
else
|
||||
{
|
||||
do_deswizzle_transformation<u16, false>(cmd, block_size, compute_scratch_mem, &deswizzle_buf, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
do_deswizzle_transformation<u16, false>(cmd, block_size, compute_scratch_mem, &g_deswizzle_scratch_buffer, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -849,8 +850,6 @@ namespace gl
|
|||
dst->copy_from(out_pointer, static_cast<texture::format>(gl_format), static_cast<texture::type>(gl_type), layout.level, region, unpack_settings);
|
||||
}
|
||||
}
|
||||
|
||||
deswizzle_buf.remove();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue