mirror of https://github.com/RPCS3/rpcs3.git
gl: Compute optimizations
- Keep buffers around longer to allow driver heurestics to work - Properly initialize the shaders to allow optimal workgroup dispatch size
This commit is contained in:
parent
60a2a39e88
commit
943752db30
|
@ -24,10 +24,17 @@ namespace gl
|
||||||
optimal_kernel_size = 256 / optimal_group_size;
|
optimal_kernel_size = 256 / optimal_group_size;
|
||||||
|
|
||||||
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, reinterpret_cast<GLint*>(&max_invocations_x));
|
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, reinterpret_cast<GLint*>(&max_invocations_x));
|
||||||
|
|
||||||
|
initialized = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void compute_task::create()
|
void compute_task::create()
|
||||||
{
|
{
|
||||||
|
if (!initialized)
|
||||||
|
{
|
||||||
|
initialize();
|
||||||
|
}
|
||||||
|
|
||||||
if (!compiled)
|
if (!compiled)
|
||||||
{
|
{
|
||||||
m_shader.create(::glsl::program_domain::glsl_compute_program, m_src);
|
m_shader.create(::glsl::program_domain::glsl_compute_program, m_src);
|
||||||
|
|
|
@ -14,6 +14,7 @@ namespace gl
|
||||||
gl::glsl::shader m_shader;
|
gl::glsl::shader m_shader;
|
||||||
gl::glsl::program m_program;
|
gl::glsl::program m_program;
|
||||||
bool compiled = false;
|
bool compiled = false;
|
||||||
|
bool initialized = false;
|
||||||
|
|
||||||
// Device-specific options
|
// Device-specific options
|
||||||
bool unroll_loops = true;
|
bool unroll_loops = true;
|
||||||
|
@ -260,6 +261,8 @@ namespace gl
|
||||||
{
|
{
|
||||||
ensure((sizeof(_BlockType) & 3) == 0); // "Unsupported block type"
|
ensure((sizeof(_BlockType) & 3) == 0); // "Unsupported block type"
|
||||||
|
|
||||||
|
initialize();
|
||||||
|
|
||||||
m_src =
|
m_src =
|
||||||
#include "../Program/GLSLSnippets/GPUDeswizzle.glsl"
|
#include "../Program/GLSLSnippets/GPUDeswizzle.glsl"
|
||||||
;
|
;
|
||||||
|
|
|
@ -18,19 +18,20 @@ namespace gl
|
||||||
buffer g_typeless_transfer_buffer;
|
buffer g_typeless_transfer_buffer;
|
||||||
buffer g_upload_transfer_buffer;
|
buffer g_upload_transfer_buffer;
|
||||||
buffer g_compute_decode_buffer;
|
buffer g_compute_decode_buffer;
|
||||||
|
buffer g_deswizzle_scratch_buffer;
|
||||||
|
|
||||||
std::pair<buffer*, buffer*> prepare_compute_resources(usz staging_data_length)
|
std::pair<buffer*, buffer*> prepare_compute_resources(usz staging_data_length)
|
||||||
{
|
{
|
||||||
if (g_upload_transfer_buffer.size() < static_cast<GLsizeiptr>(staging_data_length))
|
if (g_upload_transfer_buffer.size() < static_cast<GLsizeiptr>(staging_data_length))
|
||||||
{
|
{
|
||||||
g_upload_transfer_buffer.remove();
|
g_upload_transfer_buffer.remove();
|
||||||
g_upload_transfer_buffer.create(staging_data_length, nullptr, buffer::memory_type::host_visible, GL_STREAM_DRAW);
|
g_upload_transfer_buffer.create(gl::buffer::target::pixel_unpack, staging_data_length, nullptr, buffer::memory_type::host_visible, GL_STREAM_DRAW);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_compute_decode_buffer.size() < static_cast<GLsizeiptr>(staging_data_length) * 3)
|
if (g_compute_decode_buffer.size() < static_cast<GLsizeiptr>(staging_data_length) * 3)
|
||||||
{
|
{
|
||||||
g_compute_decode_buffer.remove();
|
g_compute_decode_buffer.remove();
|
||||||
g_compute_decode_buffer.create(std::max<GLsizeiptr>(512, staging_data_length * 3), nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
g_compute_decode_buffer.create(gl::buffer::target::pixel_pack, std::max<GLsizeiptr>(512, staging_data_length * 3), nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
||||||
}
|
}
|
||||||
|
|
||||||
return { &g_upload_transfer_buffer, &g_compute_decode_buffer };
|
return { &g_upload_transfer_buffer, &g_compute_decode_buffer };
|
||||||
|
@ -41,6 +42,7 @@ namespace gl
|
||||||
g_typeless_transfer_buffer.remove();
|
g_typeless_transfer_buffer.remove();
|
||||||
g_upload_transfer_buffer.remove();
|
g_upload_transfer_buffer.remove();
|
||||||
g_compute_decode_buffer.remove();
|
g_compute_decode_buffer.remove();
|
||||||
|
g_deswizzle_scratch_buffer.remove();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename WordType, bool SwapBytes>
|
template <typename WordType, bool SwapBytes>
|
||||||
|
@ -727,8 +729,6 @@ namespace gl
|
||||||
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
|
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
|
||||||
u64 image_linear_size;
|
u64 image_linear_size;
|
||||||
|
|
||||||
gl::buffer deswizzle_buf;
|
|
||||||
|
|
||||||
switch (gl_type)
|
switch (gl_type)
|
||||||
{
|
{
|
||||||
case GL_BYTE:
|
case GL_BYTE:
|
||||||
|
@ -792,13 +792,14 @@ namespace gl
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// 2.1 Copy data to deswizzle buf
|
// 2.1 Copy data to deswizzle buf
|
||||||
if (deswizzle_buf.size() < image_linear_size)
|
if (g_deswizzle_scratch_buffer.size() < image_linear_size)
|
||||||
{
|
{
|
||||||
deswizzle_buf.remove();
|
g_deswizzle_scratch_buffer.remove();
|
||||||
deswizzle_buf.create(gl::buffer::target::ssbo, image_linear_size, nullptr, gl::buffer::memory_type::local);
|
g_deswizzle_scratch_buffer.create(gl::buffer::target::ssbo, image_linear_size, nullptr, gl::buffer::memory_type::local);
|
||||||
|
rsx_log.error("DESWZ BUF @0x%x", g_deswizzle_scratch_buffer.id());
|
||||||
}
|
}
|
||||||
|
|
||||||
upload_scratch_mem->copy_to(&deswizzle_buf, 0, 0, image_linear_size);
|
upload_scratch_mem->copy_to(&g_deswizzle_scratch_buffer, 0, 0, image_linear_size);
|
||||||
|
|
||||||
// 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem
|
// 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem
|
||||||
ensure(op.element_size == 2 || op.element_size == 4);
|
ensure(op.element_size == 2 || op.element_size == 4);
|
||||||
|
@ -810,22 +811,22 @@ namespace gl
|
||||||
|
|
||||||
if (op.element_size == 4) [[ likely ]]
|
if (op.element_size == 4) [[ likely ]]
|
||||||
{
|
{
|
||||||
do_deswizzle_transformation<u32, true>(cmd, block_size, compute_scratch_mem, &deswizzle_buf, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
do_deswizzle_transformation<u32, true>(cmd, block_size, compute_scratch_mem, &g_deswizzle_scratch_buffer, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
do_deswizzle_transformation<u16, true>(cmd, block_size, compute_scratch_mem, &deswizzle_buf, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
do_deswizzle_transformation<u16, true>(cmd, block_size, compute_scratch_mem, &g_deswizzle_scratch_buffer, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (op.element_size == 4) [[ likely ]]
|
if (op.element_size == 4) [[ likely ]]
|
||||||
{
|
{
|
||||||
do_deswizzle_transformation<u32, false>(cmd, block_size, compute_scratch_mem, &deswizzle_buf, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
do_deswizzle_transformation<u32, false>(cmd, block_size, compute_scratch_mem, &g_deswizzle_scratch_buffer, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
do_deswizzle_transformation<u16, false>(cmd, block_size, compute_scratch_mem, &deswizzle_buf, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
do_deswizzle_transformation<u16, false>(cmd, block_size, compute_scratch_mem, &g_deswizzle_scratch_buffer, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -849,8 +850,6 @@ namespace gl
|
||||||
dst->copy_from(out_pointer, static_cast<texture::format>(gl_format), static_cast<texture::type>(gl_type), layout.level, region, unpack_settings);
|
dst->copy_from(out_pointer, static_cast<texture::format>(gl_format), static_cast<texture::type>(gl_type), layout.level, region, unpack_settings);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
deswizzle_buf.remove();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue