diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index e7bddba5b8..2d4f99b251 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -3,6 +3,9 @@ #include "GLCompute.h" #include "GLRenderTargets.h" #include "GLOverlays.h" + +#include "glutils/ring_buffer.h" + #include "../GCM.h" #include "../RSXThread.h" #include "../RSXTexture.h" @@ -16,27 +19,10 @@ namespace gl extern void set_vis_texture(texture*); } - buffer g_typeless_transfer_buffer; - buffer g_upload_transfer_buffer; - buffer g_compute_decode_buffer; - buffer g_deswizzle_scratch_buffer; - - std::pair prepare_compute_resources(usz staging_data_length) - { - if (g_upload_transfer_buffer.size() < static_cast(staging_data_length)) - { - g_upload_transfer_buffer.remove(); - g_upload_transfer_buffer.create(gl::buffer::target::pixel_unpack, staging_data_length, nullptr, buffer::memory_type::host_visible, GL_STREAM_DRAW); - } - - if (g_compute_decode_buffer.size() < static_cast(staging_data_length) * 3) - { - g_compute_decode_buffer.remove(); - g_compute_decode_buffer.create(gl::buffer::target::ssbo, std::max(512, staging_data_length * 3), nullptr, buffer::memory_type::local, GL_STATIC_COPY); - } - - return { &g_upload_transfer_buffer, &g_compute_decode_buffer }; - } + scratch_ring_buffer g_typeless_transfer_buffer; + legacy_ring_buffer g_upload_transfer_buffer; + scratch_ring_buffer g_compute_decode_buffer; + scratch_ring_buffer g_deswizzle_scratch_buffer; void destroy_global_texture_resources() { @@ -47,23 +33,23 @@ namespace gl } template - void do_deswizzle_transformation(gl::command_context& cmd, u32 block_size, buffer* dst, buffer* src, u32 data_length, u16 width, u16 height, u16 depth) + void do_deswizzle_transformation(gl::command_context& cmd, u32 block_size, buffer* dst, u32 dst_offset, buffer* src, u32 src_offset, u32 data_length, u16 width, u16 height, u16 depth) { switch (block_size) { case 4: gl::get_compute_task>()->run( - cmd, dst, 0, src, 0, + cmd, dst, dst_offset, src, src_offset, data_length, width, height, depth, 1); break; case 8: gl::get_compute_task>()->run( - cmd, dst, 0, src, 0, + cmd, dst, dst_offset, src, src_offset, data_length, width, height, depth, 1); break; case 16: gl::get_compute_task>()->run( - cmd, dst, 0, src, 0, + cmd, dst, dst_offset, src, src_offset, data_length, width, height, depth, 1); break; default: @@ -497,7 +483,7 @@ namespace gl } void* copy_image_to_buffer(gl::command_context& cmd, const pixel_buffer_layout& pack_info, const gl::texture* src, gl::buffer* dst, - const int src_level, const coord3u& src_region, image_memory_requirements* mem_info) + u32 dst_offset, const int src_level, const coord3u& src_region, image_memory_requirements* mem_info) { auto initialize_scratch_mem = [&]() { @@ -533,10 +519,10 @@ namespace gl } dst->bind(buffer::target::pixel_pack); - src->copy_to(nullptr, static_cast(pack_info.format), static_cast(pack_info.type), src_level, src_region, {}); + src->copy_to(reinterpret_cast(static_cast(dst_offset)), static_cast(pack_info.format), static_cast(pack_info.type), src_level, src_region, {}); }; - void* result = nullptr; + void* result = reinterpret_cast(static_cast(dst_offset)); if (src->aspect() == image_aspect::color || pack_info.type == GL_UNSIGNED_SHORT || pack_info.type == GL_UNSIGNED_INT_24_8) @@ -544,7 +530,7 @@ namespace gl initialize_scratch_mem(); if (auto job = get_trivial_transform_job(pack_info)) { - job->run(cmd, dst, static_cast(mem_info->image_size_in_bytes)); + job->run(cmd, dst, static_cast(mem_info->image_size_in_bytes), dst_offset); } } else if (pack_info.type == GL_FLOAT) @@ -553,9 +539,9 @@ namespace gl mem_info->memory_required = (mem_info->image_size_in_texels * 6); initialize_scratch_mem(); - get_compute_task>()->run(cmd, dst, 0, + get_compute_task>()->run(cmd, dst, dst_offset, static_cast(mem_info->image_size_in_bytes), static_cast(mem_info->image_size_in_bytes)); - result = reinterpret_cast(mem_info->image_size_in_bytes); + result = reinterpret_cast(mem_info->image_size_in_bytes + dst_offset); } else if (pack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV) { @@ -563,9 +549,9 @@ namespace gl mem_info->memory_required = (mem_info->image_size_in_texels * 12); initialize_scratch_mem(); - get_compute_task()->run(cmd, dst, 0, + get_compute_task()->run(cmd, dst, dst_offset, static_cast(mem_info->image_size_in_bytes), static_cast(mem_info->image_size_in_texels)); - result = reinterpret_cast(mem_info->image_size_in_bytes); + result = reinterpret_cast(mem_info->image_size_in_bytes + dst_offset); } else { @@ -770,14 +756,14 @@ namespace gl { bool apply_settings = true; bool use_compute_transform = is_swizzled; - buffer *upload_scratch_mem = nullptr, *compute_scratch_mem = nullptr; + std::pair upload_scratch_mem = {}, compute_scratch_mem = {}; image_memory_requirements mem_info; pixel_buffer_layout mem_layout; std::span dst_buffer = staging_buffer; void* out_pointer = staging_buffer.data(); u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format); - u64 image_linear_size; + u64 image_linear_size = staging_buffer.size(); switch (gl_type) { @@ -798,9 +784,22 @@ namespace gl break; } + const auto min_required_buffer_size = std::max(utils::align(image_linear_size * 4, 0x100000), 16 * 0x100000); + if (use_compute_transform) { - std::tie(upload_scratch_mem, compute_scratch_mem) = prepare_compute_resources(staging_buffer.size()); + if (g_upload_transfer_buffer.size() < static_cast(min_required_buffer_size)) + { + g_upload_transfer_buffer.remove(); + g_upload_transfer_buffer.create(gl::buffer::target::pixel_unpack, min_required_buffer_size); + } + + if (g_compute_decode_buffer.size() < min_required_buffer_size) + { + g_compute_decode_buffer.remove(); + g_compute_decode_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); + } + out_pointer = nullptr; } @@ -810,9 +809,16 @@ namespace gl { const u64 row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, caps.alignment); image_linear_size = row_pitch * layout.height_in_block * layout.depth; - dst_buffer = { reinterpret_cast(upload_scratch_mem->map(0, image_linear_size, gl::buffer::access::write)), image_linear_size }; + + compute_scratch_mem = { nullptr, g_compute_decode_buffer.alloc(static_cast(image_linear_size), 256) }; + compute_scratch_mem.first = reinterpret_cast(static_cast(compute_scratch_mem.second)); + + g_upload_transfer_buffer.reserve_storage_on_heap(image_linear_size); + upload_scratch_mem = g_upload_transfer_buffer.alloc_from_heap(static_cast(image_linear_size), 256); + dst_buffer = { reinterpret_cast(upload_scratch_mem.first), image_linear_size }; } + caps.supports_hw_deswizzle = (is_swizzled && use_compute_transform && image_linear_size > 4096); auto op = upload_texture_subresource(dst_buffer, layout, format, is_swizzled, caps); // Define upload region @@ -831,24 +837,24 @@ namespace gl mem_layout.format = gl_format; mem_layout.type = gl_type; - // 1. Unmap buffer - upload_scratch_mem->unmap(); - // 2. Upload memory to GPU if (!op.require_deswizzle) { - upload_scratch_mem->copy_to(compute_scratch_mem, 0, 0, image_linear_size); + g_upload_transfer_buffer.unmap(); + g_upload_transfer_buffer.copy_to(&g_compute_decode_buffer.get(), upload_scratch_mem.second, compute_scratch_mem.second, image_linear_size); } else { // 2.1 Copy data to deswizzle buf - if (g_deswizzle_scratch_buffer.size() < static_cast(image_linear_size)) + if (g_deswizzle_scratch_buffer.size() < min_required_buffer_size) { g_deswizzle_scratch_buffer.remove(); - g_deswizzle_scratch_buffer.create(gl::buffer::target::ssbo, image_linear_size, nullptr, gl::buffer::memory_type::local); + g_deswizzle_scratch_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); } - upload_scratch_mem->copy_to(&g_deswizzle_scratch_buffer, 0, 0, image_linear_size); + u32 deswizzle_data_offset = g_deswizzle_scratch_buffer.alloc(static_cast(image_linear_size), 256); + g_upload_transfer_buffer.unmap(); + g_upload_transfer_buffer.copy_to(&g_deswizzle_scratch_buffer.get(), upload_scratch_mem.second, deswizzle_data_offset, static_cast(image_linear_size)); // 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem ensure(op.element_size == 2 || op.element_size == 4); @@ -860,24 +866,35 @@ namespace gl if (op.element_size == 4) [[ likely ]] { - do_deswizzle_transformation(cmd, block_size, compute_scratch_mem, &g_deswizzle_scratch_buffer, static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); } else { - do_deswizzle_transformation(cmd, block_size, compute_scratch_mem, &g_deswizzle_scratch_buffer, static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); } } else { if (op.element_size == 4) [[ likely ]] { - do_deswizzle_transformation(cmd, block_size, compute_scratch_mem, &g_deswizzle_scratch_buffer, static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); } else { - do_deswizzle_transformation(cmd, block_size, compute_scratch_mem, &g_deswizzle_scratch_buffer, static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); } } + + // Barrier + g_deswizzle_scratch_buffer.push_barrier(deswizzle_data_offset, static_cast(image_linear_size)); } // 3. Update configuration @@ -886,7 +903,10 @@ namespace gl mem_info.memory_required = 0; // 4. Dispatch compute routines - copy_buffer_to_image(cmd, mem_layout, compute_scratch_mem, dst, nullptr, layout.level, region, & mem_info); + copy_buffer_to_image(cmd, mem_layout, &g_compute_decode_buffer.get(), dst, compute_scratch_mem.first, layout.level, region, &mem_info); + + // Barrier + g_compute_decode_buffer.push_barrier(compute_scratch_mem.second, static_cast(image_linear_size)); } else { @@ -1079,8 +1099,24 @@ namespace gl unpack_info.swap_bytes = false; } - void* data_ptr = copy_image_to_buffer(cmd, pack_info, src, &g_typeless_transfer_buffer, 0, src_region, &src_mem); - copy_buffer_to_image(cmd, unpack_info, &g_typeless_transfer_buffer, dst, data_ptr, 0, dst_region, &dst_mem); + u32 scratch_offset = 0; + const u64 min_storage_requirement = src_mem.image_size_in_bytes + dst_mem.image_size_in_bytes; + const u64 min_required_buffer_size = std::max(utils::align(min_storage_requirement, 0x100000) * 4, 16 * 0x100000); + + if (g_typeless_transfer_buffer.size() >= min_required_buffer_size) [[ likely ]] + { + scratch_offset = g_typeless_transfer_buffer.alloc(static_cast(min_storage_requirement), 256); + } + else + { + g_typeless_transfer_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); + } + + void* data_ptr = copy_image_to_buffer(cmd, pack_info, src, &g_typeless_transfer_buffer.get(), scratch_offset, 0, src_region, &src_mem); + copy_buffer_to_image(cmd, unpack_info, &g_typeless_transfer_buffer.get(), dst, data_ptr, 0, dst_region, &dst_mem); + + // Not truly range-accurate, but should cover most of what we care about + g_typeless_transfer_buffer.push_barrier(scratch_offset, static_cast(min_storage_requirement)); // Cleanup // NOTE: glBindBufferRange also binds the buffer to the old-school target. @@ -1092,10 +1128,10 @@ namespace gl else { const u64 max_mem = std::max(src_mem.image_size_in_bytes, dst_mem.image_size_in_bytes); - if (!g_typeless_transfer_buffer || max_mem > static_cast(g_typeless_transfer_buffer.size())) + if (max_mem > static_cast(g_typeless_transfer_buffer.size())) { - if (g_typeless_transfer_buffer) g_typeless_transfer_buffer.remove(); - g_typeless_transfer_buffer.create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, GL_STATIC_COPY); + g_typeless_transfer_buffer.remove(); + g_typeless_transfer_buffer.create(buffer::target::pixel_pack, max_mem); } // Simplify pack/unpack information to something OpenGL can natively digest @@ -1152,7 +1188,7 @@ namespace gl pixel_pack_settings pack_settings{}; pack_settings.swap_bytes(pack_info.swap_bytes); - g_typeless_transfer_buffer.bind(buffer::target::pixel_pack); + g_typeless_transfer_buffer.get().bind(buffer::target::pixel_pack); src->copy_to(nullptr, static_cast(pack_info.format), static_cast(pack_info.type), 0, src_region, pack_settings); glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE); @@ -1161,7 +1197,7 @@ namespace gl pixel_unpack_settings unpack_settings{}; unpack_settings.swap_bytes(unpack_info.swap_bytes); - g_typeless_transfer_buffer.bind(buffer::target::pixel_unpack); + g_typeless_transfer_buffer.get().bind(buffer::target::pixel_unpack); dst->copy_from(nullptr, static_cast(unpack_info.format), static_cast(unpack_info.type), 0, dst_region, unpack_settings); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, GL_NONE); } diff --git a/rpcs3/Emu/RSX/GL/GLTexture.h b/rpcs3/Emu/RSX/GL/GLTexture.h index 49bc0a613e..6d544c2771 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.h +++ b/rpcs3/Emu/RSX/GL/GLTexture.h @@ -45,7 +45,7 @@ namespace gl void copy_typeless(gl::command_context& cmd, texture* dst, const texture* src); void* copy_image_to_buffer(gl::command_context& cmd, const pixel_buffer_layout& pack_info, const gl::texture* src, gl::buffer* dst, - const int src_level, const coord3u& src_region, image_memory_requirements* mem_info); + u32 dst_offset, const int src_level, const coord3u& src_region, image_memory_requirements* mem_info); void copy_buffer_to_image(gl::command_context& cmd, const pixel_buffer_layout& unpack_info, gl::buffer* src, gl::texture* dst, const void* src_offset, const int dst_level, const coord3u& dst_region, image_memory_requirements* mem_info); diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index c5540dcd8f..1e1ecbf452 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -193,7 +193,7 @@ namespace gl mem_info.image_size_in_bytes *= 2; } - void* out_offset = copy_image_to_buffer(cmd, pack_info, src, &scratch_mem, 0, { {}, src->size3D() }, &mem_info); + void* out_offset = copy_image_to_buffer(cmd, pack_info, src, &scratch_mem, 0, 0, { {}, src->size3D() }, &mem_info); glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE); glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); diff --git a/rpcs3/Emu/RSX/GL/glutils/ring_buffer.cpp b/rpcs3/Emu/RSX/GL/glutils/ring_buffer.cpp index 7feb8be196..75bb6ed57e 100644 --- a/rpcs3/Emu/RSX/GL/glutils/ring_buffer.cpp +++ b/rpcs3/Emu/RSX/GL/glutils/ring_buffer.cpp @@ -273,14 +273,14 @@ namespace gl u64 start = utils::align(m_alloc_pointer, alignment); m_alloc_pointer = (start + size); - if (m_alloc_pointer > m_storage.size()) + if (static_cast(m_alloc_pointer) > m_storage.size()) { start = 0; m_alloc_pointer = size; } - pop_barrier(start, size); - return start; + pop_barrier(static_cast(start), size); + return static_cast(start); } void scratch_ring_buffer::pop_barrier(u32 start, u32 length) @@ -291,7 +291,10 @@ namespace gl if (barrier_.range.overlaps(range)) { barrier_.signal.wait_for_signal(); + return true; } + + return false; }), m_barriers.end()); } diff --git a/rpcs3/Emu/RSX/GL/glutils/ring_buffer.h b/rpcs3/Emu/RSX/GL/glutils/ring_buffer.h index 9ec47247a7..509884c725 100644 --- a/rpcs3/Emu/RSX/GL/glutils/ring_buffer.h +++ b/rpcs3/Emu/RSX/GL/glutils/ring_buffer.h @@ -106,5 +106,8 @@ namespace gl u32 alloc(u32 size, u32 alignment); void push_barrier(u32 start, u32 length); + + buffer& get() { return m_storage; } + u64 size() const { return m_storage.size(); } }; } \ No newline at end of file