From cc313bfba1c338a2ce77c55eddab4d1f45203ed2 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 23 Apr 2024 05:02:30 +0300 Subject: [PATCH] gl: Implement transform constants patching --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 99 +++++++++++++++++++++++++++++---- rpcs3/Emu/RSX/GL/GLGSRender.h | 10 +++- 2 files changed, 96 insertions(+), 13 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 02d6feb860..ab6e86d18a 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -3,8 +3,10 @@ #include "../Overlays/Shaders/shader_loading_dialog_native.h" #include "GLGSRender.h" #include "GLCompute.h" + #include "Emu/Memory/vm_locking.h" #include "Emu/RSX/rsx_methods.h" +#include "Emu/RSX/NV47/HW/context_accessors.define.h" #include "../Program/program_state_cache2.hpp" @@ -263,6 +265,7 @@ void GLGSRender::on_init_thread() m_vertex_instructions_buffer = std::make_unique(); m_fragment_instructions_buffer = std::make_unique(); m_raster_env_ring_buffer = std::make_unique(); + m_scratch_ring_buffer = std::make_unique(); } else { @@ -277,6 +280,7 @@ void GLGSRender::on_init_thread() m_vertex_instructions_buffer = std::make_unique(); m_fragment_instructions_buffer = std::make_unique(); m_raster_env_ring_buffer = std::make_unique(); + m_scratch_ring_buffer = std::make_unique(); } m_attrib_ring_buffer->create(gl::buffer::target::texture, 256 * 0x100000); @@ -288,6 +292,7 @@ void GLGSRender::on_init_thread() m_texture_parameters_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); m_vertex_layout_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); m_raster_env_ring_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); + m_scratch_ring_buffer->create(gl::buffer::target::uniform, 16 * 0x100000); if (shadermode == shader_mode::async_with_interpreter || shadermode == shader_mode::interpreter_only) { @@ -501,6 +506,11 @@ void GLGSRender::on_exit() m_raster_env_ring_buffer->remove(); } + if (m_scratch_ring_buffer) + { + m_scratch_ring_buffer->remove(); + } + m_null_textures.clear(); m_gl_texture_cache.destroy(); m_ui_renderer.destroy(); @@ -823,18 +833,20 @@ void GLGSRender::load_program_env() if (update_transform_constants) { // Vertex constants - const usz transform_constants_size = (!m_vertex_prog || m_vertex_prog->has_indexed_constants) ? 8192 : m_vertex_prog->constant_ids.size() * 16; - if (transform_constants_size) + u32 mem_offset = 0; + auto mem_alloc = [&](usz size) -> std::pair { - auto mapping = m_transform_constants_buffer->alloc_from_heap(static_cast(transform_constants_size), m_uniform_buffer_offset_align); - auto buf = static_cast(mapping.first); + const auto mapping = m_transform_constants_buffer->alloc_from_heap(static_cast(size), m_uniform_buffer_offset_align); + mem_offset = mapping.second; + return { mapping.first, size }; + }; - const auto constant_ids = (transform_constants_size == 8192) - ? std::span{} - : std::span(m_vertex_prog->constant_ids); - fill_vertex_program_constants_data(buf, constant_ids); + rsx::io_buffer io_buf(mem_alloc); + upload_transform_constants(io_buf); - m_transform_constants_buffer->bind_range(GL_VERTEX_CONSTANT_BUFFERS_BIND_SLOT, mapping.second, static_cast(transform_constants_size)); + if (!io_buf.empty()) + { + m_transform_constants_buffer->bind_range(GL_VERTEX_CONSTANT_BUFFERS_BIND_SLOT, mem_offset, ::size32(io_buf)); } } @@ -946,6 +958,20 @@ void GLGSRender::load_program_env() rsx::pipeline_state::fragment_texture_state_dirty); } +void GLGSRender::upload_transform_constants(const rsx::io_buffer& buffer) +{ + const usz transform_constants_size = (!m_vertex_prog || m_vertex_prog->has_indexed_constants) ? 8192 : m_vertex_prog->constant_ids.size() * 16; + if (transform_constants_size) + { + const auto constant_ids = (transform_constants_size == 8192) + ? std::span{} + : std::span(m_vertex_prog->constant_ids); + + buffer.reserve(transform_constants_size); + fill_vertex_program_constants_data(buffer.data(), constant_ids); + } +} + void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info) { if (manually_flush_ring_buffers) @@ -971,12 +997,61 @@ void GLGSRender::update_vertex_env(const gl::vertex_upload_info& upload_info) } } +void GLGSRender::patch_transform_constants(rsx::context* ctx, u32 index, u32 count) +{ + std::pair data_range {}; + void* data_source = nullptr; + const auto bound_range = m_transform_constants_buffer->bound_range(); + + if (!m_vertex_prog || m_vertex_prog->has_indexed_constants) + { + // We're working with a full range. We can do a direct patch in this case since no index translation is required. + const auto byte_count = count * 16; + const auto byte_offset = index * 16; + + data_range = { bound_range.first + byte_offset, byte_count}; + data_source = ®S(ctx)->transform_constants[index]; + } + else + { + auto allocate_mem = [&](usz size) -> std::pair + { + m_scratch_buffer.resize(size); + return { m_scratch_buffer.data(), size }; + }; + + rsx::io_buffer iobuf(allocate_mem); + upload_transform_constants(iobuf); + + data_range = { bound_range.first, ::size32(iobuf) }; + data_source = iobuf.data(); + } + + // Move data to memory that the GPU can work with + if (manually_flush_ring_buffers) + { + m_scratch_ring_buffer->reserve_storage_on_heap(data_range.second); + } + + auto mapping = m_scratch_ring_buffer->alloc_from_heap(data_range.second, 16); + std::memcpy(mapping.first, data_source, data_range.second); + + if (manually_flush_ring_buffers) + { + m_scratch_ring_buffer->unmap(); + } + m_scratch_ring_buffer->notify(); + + // Do the transfer to patch the constants on the host device + m_scratch_ring_buffer->copy_to(m_transform_constants_buffer.get(), mapping.second, data_range.first, data_range.second); +} + bool GLGSRender::on_access_violation(u32 address, bool is_writing) { const bool can_flush = is_current_thread(); - const rsx::invalidation_cause cause = - is_writing ? (can_flush ? rsx::invalidation_cause::write : rsx::invalidation_cause::deferred_write) - : (can_flush ? rsx::invalidation_cause::read : rsx::invalidation_cause::deferred_read); + const rsx::invalidation_cause cause = is_writing + ? (can_flush ? rsx::invalidation_cause::write : rsx::invalidation_cause::deferred_write) + : (can_flush ? rsx::invalidation_cause::read : rsx::invalidation_cause::deferred_read); auto cmd = can_flush ? gl::command_context{ gl_state } : gl::command_context{}; auto result = m_gl_texture_cache.invalidate_address(cmd, address, cause); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index baf9179aec..c339f7dc39 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -108,6 +108,9 @@ class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control // Identity buffer used to fix broken gl_VertexID on ATI stack std::unique_ptr m_identity_index_buffer; + // Used for hot-patching + std::unique_ptr m_scratch_ring_buffer; + std::unique_ptr m_vertex_cache; std::unique_ptr m_shaders_cache; @@ -142,7 +145,7 @@ class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control std::array, rsx::limits::fragment_textures_count> fs_sampler_state = {}; std::array, rsx::limits::vertex_textures_count> vs_sampler_state = {}; std::unordered_map> m_null_textures; - std::vector m_scratch_buffer; + rsx::simple_array m_scratch_buffer; // Occlusion query type, can be SAMPLES_PASSED or ANY_SAMPLES_PASSED GLenum m_occlusion_type = GL_ANY_SAMPLES_PASSED; @@ -166,6 +169,7 @@ private: bool load_program(); void load_program_env(); void update_vertex_env(const gl::vertex_upload_info& upload_info); + void upload_transform_constants(const rsx::io_buffer& buffer); void update_draw_state(); @@ -182,12 +186,16 @@ public: bool scaled_image_from_memory(const rsx::blit_src_info& src_info, const rsx::blit_dst_info& dst_info, bool interpolate) override; + // ZCULL void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override; void end_occlusion_query(rsx::reports::occlusion_query_info* query) override; bool check_occlusion_query_status(rsx::reports::occlusion_query_info* query) override; void get_occlusion_query_result(rsx::reports::occlusion_query_info* query) override; void discard_occlusion_query(rsx::reports::occlusion_query_info* query) override; + // GRAPH backend + void patch_transform_constants(rsx::context* ctx, u32 index, u32 count) override; + protected: void clear_surface(u32 arg) override; void begin() override;