From a09111052a9542ded709b79a169f5958dbb779c3 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 23 Apr 2024 02:21:19 +0300 Subject: [PATCH] rsx/vk: Implement batched transform constant updates --- rpcs3/Emu/RSX/Common/BufferUtils.h | 4 +- rpcs3/Emu/RSX/NV47/FW/GRAPH_backend.h | 4 +- rpcs3/Emu/RSX/NV47/FW/draw_call.cpp | 19 ++++++++-- rpcs3/Emu/RSX/NV47/HW/nv4097.cpp | 40 ++++++++++++++------ rpcs3/Emu/RSX/NV47/HW/nv4097.h | 4 ++ rpcs3/Emu/RSX/RSXFIFO.cpp | 5 +++ rpcs3/Emu/RSX/RSXFIFO.h | 2 + rpcs3/Emu/RSX/VK/VKDraw.cpp | 37 ------------------ rpcs3/Emu/RSX/VK/VKGSRender.cpp | 54 ++++++++++++++++++++++++++- rpcs3/Emu/RSX/VK/VKGSRender.h | 4 +- 10 files changed, 114 insertions(+), 59 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.h b/rpcs3/Emu/RSX/Common/BufferUtils.h index f2e87445db..183ee1f8da 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.h +++ b/rpcs3/Emu/RSX/Common/BufferUtils.h @@ -39,7 +39,7 @@ std::tuple write_index_array_data_to_buffer(std::span void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, rsx::primitive_type draw_mode, unsigned count); // Copy and swap data in 32-bit units -extern void(*const copy_data_swap_u32)(u32*, const u32*, u32); +extern void(*const copy_data_swap_u32)(u32* dst, const u32* src, u32 count); // Copy and swap data in 32-bit units, return true if changed -extern bool(*const copy_data_swap_u32_cmp)(u32*, const u32*, u32); +extern bool(*const copy_data_swap_u32_cmp)(u32* dst, const u32* src, u32 count); diff --git a/rpcs3/Emu/RSX/NV47/FW/GRAPH_backend.h b/rpcs3/Emu/RSX/NV47/FW/GRAPH_backend.h index aa50e3eb0e..ab8a02984f 100644 --- a/rpcs3/Emu/RSX/NV47/FW/GRAPH_backend.h +++ b/rpcs3/Emu/RSX/NV47/FW/GRAPH_backend.h @@ -15,7 +15,7 @@ namespace rsx // virtual void begin() = 0; // virtual void end() = 0; - // Patch transform constants - virtual void patch_transform_constants(context* ctx, u32 first_index, const std::span& data) {}; + // Patch transform constants. Units are in 32x4 units + virtual void patch_transform_constants(context* /*ctx*/, u32 /*index*/, u32 /*count*/) {}; }; } diff --git a/rpcs3/Emu/RSX/NV47/FW/draw_call.cpp b/rpcs3/Emu/RSX/NV47/FW/draw_call.cpp index 4c2b73b8b2..39c8aa01e7 100644 --- a/rpcs3/Emu/RSX/NV47/FW/draw_call.cpp +++ b/rpcs3/Emu/RSX/NV47/FW/draw_call.cpp @@ -3,6 +3,7 @@ #include "Emu/RSX/rsx_methods.h" // FIXME #include "Emu/RSX/rsx_utils.h" +#include "Emu/RSX/RSXThread.h" #include "Emu/RSX/Common/BufferUtils.h" #include "Emu/RSX/NV47/HW/context.h" #include "Emu/RSX/NV47/HW/nv4097.h" @@ -115,33 +116,45 @@ namespace rsx switch (barrier.type) { case primitive_restart_barrier: + { break; + } case index_base_modifier_barrier: + { // Change index base offset REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_BASE_INDEX, barrier.arg0); result |= index_base_changed; break; + } case vertex_base_modifier_barrier: + { // Change vertex base offset REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_BASE_OFFSET, barrier.arg0); result |= vertex_base_changed; break; + } case vertex_array_offset_modifier_barrier: + { // Change vertex array offset REGS(ctx)->decode(NV4097_SET_VERTEX_DATA_ARRAY_OFFSET + barrier.index, barrier.arg0); result |= vertex_arrays_changed; break; + } case transform_constant_load_modifier_barrier: + { // Change the transform load target. Does not change result mask. REGS(ctx)->decode(NV4097_SET_TRANSFORM_PROGRAM_LOAD, barrier.arg0); break; + } case transform_constant_update_barrier: + { // Update transform constants - // REGS(ctx)->decode(NV4097_SET_TRANSFORM_CONSTANT + barrier.index, barrier.arg); // This statement technically does the right thing but has no consequence other than wasting perf. - // FIXME: Batching - nv4097::set_transform_constant::decode_one(ctx, NV4097_SET_TRANSFORM_CONSTANT + barrier.index, barrier.arg0); + auto ptr = RSX(ctx)->fifo_ctrl->translate_address(barrier.arg0); + auto buffer = std::span(static_cast(vm::base(ptr)), barrier.arg1); + nv4097::set_transform_constant::batch_decode(ctx, NV4097_SET_TRANSFORM_CONSTANT + barrier.index, buffer); result |= transform_constants_changed; break; + } default: fmt::throw_exception("Unreachable"); } diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp index e31450bec2..be929c6c33 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp @@ -30,20 +30,22 @@ namespace rsx REGS(ctx)->transform_constants[load + constant_id][subreg] = arg; } + void set_transform_constant::batch_decode(context* ctx, u32 reg, const std::span& args) + { + const u32 index = reg - NV4097_SET_TRANSFORM_CONSTANT; + const u32 constant_id = index / 4; + const u8 subreg = index % 4; + const u32 load = REGS(ctx)->transform_constant_load(); + + auto dst = ®S(ctx)->transform_constants[load + constant_id][subreg]; + copy_data_swap_u32(dst, args.data(), ::size32(args)); + + const u32 last_constant_id = ((reg + ::size32(args) + 3) - NV4097_SET_TRANSFORM_CONSTANT) / 4; // Aligned div + RSX(ctx)->patch_transform_constants(ctx, load + constant_id, last_constant_id - constant_id); + } + void set_transform_constant::impl(context* ctx, u32 reg, u32 arg) { - if (RSX(ctx)->in_begin_end && !REGS(ctx)->current_draw_clause.empty()) - { - // Updating constants mid-draw is messy. Push attr barrier. - REGS(ctx)->current_draw_clause.insert_command_barrier( - rsx::transform_constant_update_barrier, - arg, - 0, - reg - NV4097_SET_TRANSFORM_CONSTANT - ); - return; - } - const u32 index = reg - NV4097_SET_TRANSFORM_CONSTANT; const u32 constant_id = index / 4; const u8 subreg = index % 4; @@ -73,6 +75,20 @@ namespace rsx rcount = 0; } + if (RSX(ctx)->in_begin_end && !REGS(ctx)->current_draw_clause.empty()) + { + // Updating constants mid-draw is messy. Defer the writes + REGS(ctx)->current_draw_clause.insert_command_barrier( + rsx::transform_constant_update_barrier, + RSX(ctx)->fifo_ctrl->get_pos(), + rcount, + reg - NV4097_SET_TRANSFORM_CONSTANT + ); + + RSX(ctx)->fifo_ctrl->skip_methods(rcount - 1); + return; + } + const auto values = ®S(ctx)->transform_constants[load + constant_id][subreg]; const auto fifo_span = RSX(ctx)->fifo_ctrl->get_current_arg_ptr(); diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.h b/rpcs3/Emu/RSX/NV47/HW/nv4097.h index 06df8a53f6..d6c1a33f2d 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.h +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.h @@ -6,6 +6,8 @@ #include "Emu/RSX/gcm_enums.h" #include "Emu/RSX/NV47/FW/draw_call.inc.h" +#include + namespace rsx { enum command_barrier_type : u32; @@ -201,6 +203,8 @@ namespace rsx static void impl(context* ctx, u32 reg, u32 arg); static void decode_one(context* ctx, u32 reg, u32 arg); + + static void batch_decode(context* ctx, u32 reg, const std::span& args); }; struct set_transform_program diff --git a/rpcs3/Emu/RSX/RSXFIFO.cpp b/rpcs3/Emu/RSX/RSXFIFO.cpp index 6761124872..ae97509be4 100644 --- a/rpcs3/Emu/RSX/RSXFIFO.cpp +++ b/rpcs3/Emu/RSX/RSXFIFO.cpp @@ -29,6 +29,11 @@ namespace rsx m_iotable = &pctrl->iomap_table; } + u32 FIFO_control::translate_address(u32 address) const + { + return m_iotable->get_addr(address); + } + void FIFO_control::sync_get() const { m_ctrl->get.release(m_internal_get); diff --git a/rpcs3/Emu/RSX/RSXFIFO.h b/rpcs3/Emu/RSX/RSXFIFO.h index 7cb91239e1..8eb614b520 100644 --- a/rpcs3/Emu/RSX/RSXFIFO.h +++ b/rpcs3/Emu/RSX/RSXFIFO.h @@ -151,6 +151,8 @@ namespace rsx FIFO_control(rsx::thread* pctrl); ~FIFO_control() = default; + u32 translate_address(u32 addr) const; + std::pair fetch_u32(u32 addr); void invalidate_cache() { m_cache_size = 0; } diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index f74cf485ce..933436a041 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -732,43 +732,6 @@ void VKGSRender::emit_geometry(u32 sub_index) } } - if (state_flags & rsx::transform_constants_changed) - { - auto allocate_mem = [&](usz size) -> std::pair - { - vertex_scratchpad.resize(size); - return { vertex_scratchpad.data(), size }; - }; - - rsx::io_buffer iobuf(allocate_mem); - upload_transform_constants(iobuf); - - ensure(iobuf.size() >= m_vertex_constants_buffer_info.range); - - vk::insert_buffer_memory_barrier( - *m_current_command_buffer, - m_vertex_constants_buffer_info.buffer, - m_vertex_constants_buffer_info.offset, - m_vertex_constants_buffer_info.range, - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT); - - vkCmdUpdateBuffer( - *m_current_command_buffer, - m_vertex_constants_buffer_info.buffer, - m_vertex_constants_buffer_info.offset, - m_vertex_constants_buffer_info.range, - iobuf.data()); - - vk::insert_buffer_memory_barrier( - *m_current_command_buffer, - m_vertex_constants_buffer_info.buffer, - m_vertex_constants_buffer_info.offset, - m_vertex_constants_buffer_info.range, - VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_MEMORY_READ_BIT); - } - if ((state_flags & vertex_state_mask) && !m_vertex_layout.validate()) { // No vertex inputs enabled diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 4a7b970766..dfd57895e2 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -15,6 +15,7 @@ #include "vkutils/scratch.h" #include "Emu/RSX/rsx_methods.h" +#include "Emu/RSX/NV47/HW/context_accessors.define.h" #include "Emu/Memory/vm_locking.h" #include "../Program/program_state_cache2.hpp" @@ -2354,9 +2355,60 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_ m_vertex_layout_ring_info.unmap(); } -void VKGSRender::patch_transform_constants(rsx::context* ctx, u32 first_index, const std::span& data) +void VKGSRender::patch_transform_constants(rsx::context* ctx, u32 index, u32 count) { + // Hot-patching transform constants mid-draw (instanced draw) + utils::address_range data_range; + void* data_source = nullptr; + if (!m_vertex_prog || m_vertex_prog->has_indexed_constants) + { + // We're working with a full range. We can do a direct patch in this case since no index translation is required. + const auto byte_count = count * 16; + const auto byte_offset = index * 16; + + data_range = utils::address_range::start_length(m_vertex_constants_buffer_info.offset + byte_offset, byte_count); + data_source = ®S(ctx)->transform_constants[index]; + } + else + { + // Indexed. This is a bit trickier. Use scratchpad to avoid UAF + auto allocate_mem = [&](usz size) -> std::pair + { + scratchpad.resize(size); + return { scratchpad.data(), size }; + }; + + rsx::io_buffer iobuf(allocate_mem); + upload_transform_constants(iobuf); + + ensure(iobuf.size() >= m_vertex_constants_buffer_info.range); + data_range = utils::address_range::start_length(m_vertex_constants_buffer_info.offset, m_vertex_constants_buffer_info.range); + data_source = iobuf.data(); + } + + vk::insert_buffer_memory_barrier( + *m_current_command_buffer, + m_vertex_constants_buffer_info.buffer, + data_range.start, + data_range.length(), + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT); + + vkCmdUpdateBuffer( + *m_current_command_buffer, + m_vertex_constants_buffer_info.buffer, + data_range.start, + data_range.length(), + data_source); + + vk::insert_buffer_memory_barrier( + *m_current_command_buffer, + m_vertex_constants_buffer_info.buffer, + data_range.start, + data_range.length(), + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_MEMORY_READ_BIT); } void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool) diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index f1d222f227..58fab6656d 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -234,7 +234,7 @@ private: VkDescriptorSet allocate_descriptor_set(); vk::vertex_upload_info upload_vertex_data(); - rsx::simple_array vertex_scratchpad; + rsx::simple_array scratchpad; bool load_program(); void load_program_env(); @@ -277,7 +277,7 @@ public: inline std::pair map_host_object_data() { return { m_host_data_ptr, m_host_object_data->value }; } // GRAPH backend - void patch_transform_constants(rsx::context* ctx, u32 first_index, const std::span& data) override; + void patch_transform_constants(rsx::context* ctx, u32 index, u32 count) override; protected: void clear_surface(u32 mask) override;