From 2ca935a26b5a165b8396d2db60969a6eb824551a Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 1 Jul 2018 20:37:05 +0300 Subject: [PATCH] vp: Improve vertex program analyser - Adds dead code elimination - Fix absolute branch target addresses to take base address into account - Patch branch targets relative to base address to improve hash matching - Bumps shader cache version - Enables shader logging option to write out vertex program binary, helpful when debugging problems. --- rpcs3/Emu/RSX/Common/ProgramStateCache.cpp | 245 +++++++++++++++--- rpcs3/Emu/RSX/Common/ProgramStateCache.h | 5 +- .../RSX/Common/VertexProgramDecompiler.cpp | 148 +++++------ .../Emu/RSX/Common/VertexProgramDecompiler.h | 3 +- rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp | 2 +- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 2 +- rpcs3/Emu/RSX/RSXThread.cpp | 93 +++---- rpcs3/Emu/RSX/RSXThread.h | 2 +- rpcs3/Emu/RSX/RSXVertexProgram.h | 10 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 2 +- rpcs3/Emu/RSX/rsx_cache.h | 49 ++++ rpcs3/Emu/RSX/rsx_utils.h | 38 +++ 12 files changed, 427 insertions(+), 172 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp b/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp index 9a1734a120..0fee68931a 100644 --- a/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp +++ b/rpcs3/Emu/RSX/Common/ProgramStateCache.cpp @@ -1,5 +1,8 @@ #include "stdafx.h" #include "ProgramStateCache.h" +#include "Emu/System.h" + +#include using namespace program_hash_util; @@ -12,54 +15,222 @@ size_t vertex_program_utils::get_vertex_program_ucode_hash(const RSXVertexProgra bool end = false; for (unsigned i = 0; i < program.data.size() / 4; i++) { - const qword inst = instbuffer[instIndex]; - hash ^= inst.dword[0]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); - hash ^= inst.dword[1]; - hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + if (program.instruction_mask[i]) + { + const qword inst = instbuffer[instIndex]; + hash ^= inst.dword[0]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + hash ^= inst.dword[1]; + hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); + } + instIndex++; } return hash; } -vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vertex_program(const std::vector& data) +vertex_program_utils::vertex_program_metadata vertex_program_utils::analyse_vertex_program(const u32* data, u32 entry, RSXVertexProgram& dst_prog) { - u32 ucode_size = 0; - u32 current_instrution = 0; + vertex_program_utils::vertex_program_metadata result; u32 last_instruction_address = 0; + u32 first_instruction_address = entry; + + std::stack call_stack; + std::pair instruction_range = { UINT32_MAX, 0 }; + std::bitset<512> instructions_to_patch; + bool has_branch_instruction = false; + D3 d3; D2 d2; D1 d1; + D0 d0; - for (; ucode_size < data.size(); ucode_size += 4) + std::function walk_function = [&](u32 start, bool fast_exit) { - d1.HEX = data[ucode_size + 1]; - d3.HEX = data[ucode_size + 3]; + u32 current_instrution = start; + std::set conditional_targets; - switch (d1.sca_opcode) + while (true) { - case RSX_SCA_OPCODE_BRI: - case RSX_SCA_OPCODE_BRB: - case RSX_SCA_OPCODE_CAL: - case RSX_SCA_OPCODE_CLI: - case RSX_SCA_OPCODE_CLB: - { - d2.HEX = data[ucode_size + 2]; + verify(HERE), current_instrution < 512; - u32 jump_address = ((d2.iaddrh << 3) | d3.iaddrl) * 4; - last_instruction_address = std::max(last_instruction_address, jump_address); - break; - } + if (result.instruction_mask[current_instrution]) + { + if (!fast_exit) + { + // This can be harmless if a dangling RET was encountered before + LOG_ERROR(RSX, "vp_analyser: Possible infinite loop detected"); + current_instrution++; + continue; + } + else + { + // Block walk, looking for earliest exit + break; + } + } + + const qword* instruction = (const qword*)&data[current_instrution * 4]; + d1.HEX = instruction->word[1]; + d3.HEX = instruction->word[3]; + + // Touch current instruction + result.instruction_mask[current_instrution] = 1; + instruction_range.first = std::min(current_instrution, instruction_range.first); + instruction_range.second = std::max(current_instrution, instruction_range.second); + + bool static_jump = false; + bool function_call = true; + + switch (d1.sca_opcode) + { + case RSX_SCA_OPCODE_BRI: + { + d0.HEX = instruction->word[0]; + static_jump = (d0.cond == 0x7); + // Fall through + } + case RSX_SCA_OPCODE_BRB: + { + function_call = false; + // Fall through + } + case RSX_SCA_OPCODE_CAL: + case RSX_SCA_OPCODE_CLI: + case RSX_SCA_OPCODE_CLB: + { + // Need to patch the jump address to be consistent wherever the program is located + instructions_to_patch[current_instrution] = true; + has_branch_instruction = true; + + d2.HEX = instruction->word[2]; + const u32 jump_address = ((d2.iaddrh << 3) | d3.iaddrl); + + if (function_call) + { + call_stack.push(current_instrution + 1); + current_instrution = jump_address; + continue; + } + else if (static_jump) + { + // NOTE: This will skip potential jump target blocks between current->target + current_instrution = jump_address; + continue; + } + else + { + // Set possible end address and proceed as usual + conditional_targets.emplace(jump_address); + instruction_range.second = std::max(jump_address, instruction_range.second); + } + + break; + } + case RSX_SCA_OPCODE_RET: + { + if (call_stack.empty()) + { + LOG_ERROR(RSX, "vp_analyser: RET found outside subroutine call"); + } + else + { + current_instrution = call_stack.top(); + call_stack.pop(); + continue; + } + + break; + } + } + + if (d3.end && (fast_exit || current_instrution >= instruction_range.second) || + (current_instrution + 1) == 512) + { + break; + } + + current_instrution++; } - if (d3.end && (ucode_size >= last_instruction_address)) + for (const u32 target : conditional_targets) { - //Jumping over an end label is legal (verified) - break; + if (!result.instruction_mask[target]) + { + walk_function(target, true); + } + } + }; + + if (g_cfg.video.log_programs) + { + fs::file dump(fs::get_config_dir() + "shaderlog/vp_analyser.bin", fs::rewrite); + dump.write(&entry, 4); + dump.write(data, 512 * 16); + dump.close(); + } + + walk_function(entry, false); + + const u32 instruction_count = (instruction_range.second - instruction_range.first + 1); + result.ucode_length = instruction_count * 16; + + dst_prog.base_address = instruction_range.first; + dst_prog.entry = entry; + dst_prog.data.resize(instruction_count * 4); + dst_prog.instruction_mask = (result.instruction_mask >> instruction_range.first); + + if (!has_branch_instruction) + { + verify(HERE), instruction_range.first == entry; + std::memcpy(dst_prog.data.data(), data + (instruction_range.first * 4), result.ucode_length); + } + else + { + for (u32 i = instruction_range.first, count = 0; i <= instruction_range.second; ++i, ++count) + { + const qword* instruction = (const qword*)&data[i * 4]; + qword* dst = (qword*)&dst_prog.data[count * 4]; + + if (result.instruction_mask[i]) + { + dst->dword[0] = instruction->dword[0]; + dst->dword[1] = instruction->dword[1]; + + if (instructions_to_patch[i]) + { + d2.HEX = dst->word[2]; + d3.HEX = dst->word[3]; + + u32 address = ((d2.iaddrh << 3) | d3.iaddrl); + address -= instruction_range.first; + + d2.iaddrh = (address >> 3); + d3.iaddrl = (address & 0x7); + dst->word[2] = d2.HEX; + dst->word[3] = d3.HEX; + + dst_prog.jump_table.emplace(address); + } + } + else + { + dst->dword[0] = 0ull; + dst->dword[1] = 0ull; + } + } + + // Verification + for (const u32 target : dst_prog.jump_table) + { + if (!result.instruction_mask[target]) + { + LOG_ERROR(RSX, "vp_analyser: Failed, branch target 0x%x was not resolved", target); + } } } - return{ ucode_size + 4 }; + return result; } size_t vertex_program_storage_hash::operator()(const RSXVertexProgram &program) const @@ -75,6 +246,8 @@ bool vertex_program_compare::operator()(const RSXVertexProgram &binary1, const R return false; if (binary1.data.size() != binary2.data.size()) return false; + if (binary1.jump_table != binary2.jump_table) + return false; if (!binary1.skip_vertex_input_check && !binary2.skip_vertex_input_check && binary1.rsx_vertex_inputs != binary2.rsx_vertex_inputs) return false; @@ -83,10 +256,22 @@ bool vertex_program_compare::operator()(const RSXVertexProgram &binary1, const R size_t instIndex = 0; for (unsigned i = 0; i < binary1.data.size() / 4; i++) { - const qword& inst1 = instBuffer1[instIndex]; - const qword& inst2 = instBuffer2[instIndex]; - if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1]) + const auto active = binary1.instruction_mask[instIndex]; + if (active != binary2.instruction_mask[instIndex]) + { return false; + } + + if (active) + { + const qword& inst1 = instBuffer1[instIndex]; + const qword& inst2 = instBuffer2[instIndex]; + if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1]) + { + return false; + } + } + instIndex++; } diff --git a/rpcs3/Emu/RSX/Common/ProgramStateCache.h b/rpcs3/Emu/RSX/Common/ProgramStateCache.h index 3ddedd9561..f971a38e38 100644 --- a/rpcs3/Emu/RSX/Common/ProgramStateCache.h +++ b/rpcs3/Emu/RSX/Common/ProgramStateCache.h @@ -29,12 +29,13 @@ namespace program_hash_util { struct vertex_program_metadata { - u32 ucode_size; + std::bitset<512> instruction_mask; + u32 ucode_length; }; static size_t get_vertex_program_ucode_hash(const RSXVertexProgram &program); - static vertex_program_metadata analyse_vertex_program(const std::vector& data); + static vertex_program_metadata analyse_vertex_program(const u32* data, u32 entry, RSXVertexProgram& dst_prog); }; struct vertex_program_storage_hash diff --git a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp index b4dcf35e04..2737c6a108 100644 --- a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.cpp @@ -409,106 +409,54 @@ std::string VertexProgramDecompiler::BuildCode() } VertexProgramDecompiler::VertexProgramDecompiler(const RSXVertexProgram& prog) : - m_data(prog.data) + m_prog(prog) { } std::string VertexProgramDecompiler::Decompile() { - for (unsigned i = 0; i < PF_PARAM_COUNT; i++) - m_parr.params[i].clear(); - - m_instr_count = m_data.size() / 4; - - for (int i = 0; i < m_max_instr_count; ++i) - { - m_instructions[i].reset(); - } + const auto& data = m_prog.data; + m_instr_count = data.size() / 4; bool is_has_BRA = false; bool program_end = false; u32 i = 1; u32 last_label_addr = 0; - while (i < m_data.size()) + for (unsigned i = 0; i < PF_PARAM_COUNT; i++) { - if (is_has_BRA) - { - d3.HEX = m_data[i]; - i += 4; - } - else - { - d1.HEX = m_data[i++]; - - switch (d1.sca_opcode) - { - case RSX_SCA_OPCODE_BRA: - { - LOG_ERROR(RSX, "Unimplemented VP opcode BRA"); - is_has_BRA = true; - m_jump_lvls.clear(); - d3.HEX = m_data[++i]; - i += 4; - break; - } - case RSX_SCA_OPCODE_BRB: - case RSX_SCA_OPCODE_BRI: - case RSX_SCA_OPCODE_CAL: - case RSX_SCA_OPCODE_CLI: - case RSX_SCA_OPCODE_CLB: - { - d2.HEX = m_data[i++]; - d3.HEX = m_data[i]; - i += 2; - - const u32 label_addr = GetAddr(); - last_label_addr = std::max(last_label_addr, label_addr); - m_jump_lvls.emplace(label_addr); - break; - } - default: - { - d3.HEX = m_data[++i]; - i += 2; - break; - } - } - } + m_parr.params[i].clear(); } - uint jump_position = 0; - if (is_has_BRA || !m_jump_lvls.empty()) + for (int i = 0; i < m_max_instr_count; ++i) { - m_cur_instr = &m_instructions[0]; - AddCode("int jump_position = 0;"); - AddCode("while (true)"); - AddCode("{"); - m_cur_instr->open_scopes++; + m_instructions[i].reset(); + } - AddCode(fmt::format("if (jump_position <= %u)", jump_position++)); - AddCode("{"); - m_cur_instr->open_scopes++; + if (m_prog.jump_table.size()) + { + last_label_addr = *m_prog.jump_table.rbegin(); } auto find_jump_lvl = [this](u32 address) { u32 jump = 1; - for (auto pos : m_jump_lvls) + for (auto pos : m_prog.jump_table) { if (address == pos) - break; + return jump; ++jump; } - return jump; + return UINT32_MAX; }; auto do_function_call = [this, &i](const std::string& condition) { - //call function + // Call function + // NOTE: Addresses are assumed to have been patched m_call_stack.push(i+1); AddCode(condition); AddCode("{"); @@ -552,17 +500,41 @@ std::string VertexProgramDecompiler::Decompile() } }; - for (i = 0; i < m_instr_count; ++i) + if (is_has_BRA || !m_prog.jump_table.empty()) { - if (m_call_stack.empty()) + m_cur_instr = &m_instructions[0]; + + u32 jump_position = 0; + if (m_prog.entry != m_prog.base_address) { - m_cur_instr = &m_instructions[i]; + jump_position = find_jump_lvl(m_prog.entry - m_prog.base_address); + verify(HERE), jump_position != UINT32_MAX; } - d0.HEX = m_data[i * 4 + 0]; - d1.HEX = m_data[i * 4 + 1]; - d2.HEX = m_data[i * 4 + 2]; - d3.HEX = m_data[i * 4 + 3]; + AddCode(fmt::format("int jump_position = %u;", jump_position)); + AddCode("while (true)"); + AddCode("{"); + m_cur_instr->open_scopes++; + + AddCode("if (jump_position <= 0)"); + AddCode("{"); + m_cur_instr->open_scopes++; + } + + for (i = 0; i < m_instr_count; ++i) + { + if (!m_prog.instruction_mask[i]) + { + // Dead code, skip + continue; + } + + m_cur_instr = &m_instructions[i]; + + d0.HEX = data[i * 4 + 0]; + d1.HEX = data[i * 4 + 1]; + d2.HEX = data[i * 4 + 2]; + d3.HEX = data[i * 4 + 3]; src[0].src0l = d2.src0l; src[0].src0h = d1.src0h; @@ -570,27 +542,29 @@ std::string VertexProgramDecompiler::Decompile() src[2].src2l = d3.src2l; src[2].src2h = d2.src2h; - if (!src[0].reg_type || !src[1].reg_type || !src[2].reg_type) - { - AddCode("//Src check failed. Aborting"); - program_end = true; - } - - if (m_call_stack.empty()) + if (m_call_stack.empty() && i) { //TODO: Subroutines can also have arbitrary jumps! - if (i && (is_has_BRA || std::find(m_jump_lvls.begin(), m_jump_lvls.end(), i) != m_jump_lvls.end())) + u32 jump_position = find_jump_lvl(i); + if (is_has_BRA || jump_position != UINT32_MAX) { m_cur_instr->close_scopes++; AddCode("}"); AddCode(""); - AddCode(fmt::format("if (jump_position <= %u)", jump_position++)); + AddCode(fmt::format("if (jump_position <= %u)", jump_position)); AddCode("{"); m_cur_instr->open_scopes++; } } + if (!src[0].reg_type || !src[1].reg_type || !src[2].reg_type) + { + AddCode("//Src check failed. Aborting"); + program_end = true; + d1.vec_opcode = d1.sca_opcode = 0; + } + switch (d1.vec_opcode) { case RSX_VEC_OPCODE_NOP: break; @@ -754,7 +728,7 @@ std::string VertexProgramDecompiler::Decompile() if ((i + 1) < m_instr_count) { // In rare cases, this might be harmless (large coalesced program blocks controlled via branches aka ubershaders) - LOG_ERROR(RSX, "Vertex program aborted prematurely. Expect glitches"); + LOG_ERROR(RSX, "Vertex program block aborts prematurely. Expect glitches"); } break; @@ -762,7 +736,7 @@ std::string VertexProgramDecompiler::Decompile() } } - if (is_has_BRA || !m_jump_lvls.empty()) + if (is_has_BRA || !m_prog.jump_table.empty()) { m_cur_instr = &m_instructions[m_instr_count - 1]; m_cur_instr->close_scopes++; @@ -774,8 +748,6 @@ std::string VertexProgramDecompiler::Decompile() std::string result = BuildCode(); - m_jump_lvls.clear(); m_body.clear(); - return result; } diff --git a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.h b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.h index 4399948158..9d5600ed70 100644 --- a/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.h +++ b/rpcs3/Emu/RSX/Common/VertexProgramDecompiler.h @@ -53,11 +53,10 @@ struct VertexProgramDecompiler Instruction* m_cur_instr; size_t m_instr_count; - std::set m_jump_lvls; std::vector m_body; std::stack m_call_stack; - const std::vector& m_data; + const RSXVertexProgram& m_prog; ParamArray m_parr; std::string NotZeroPositive(const std::string& code); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index c1b1ef7ab0..4494a97dd0 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -53,7 +53,7 @@ void D3D12GSRender::load_program() return std::make_tuple(true, native_pitch); }; - get_current_vertex_program(); + get_current_vertex_program(false); get_current_fragment_program_legacy(rtt_lookup_func); if (!current_fragment_program.valid) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index b0b03282f0..1dc4ab7a74 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -24,7 +24,7 @@ namespace GLGSRender::GLGSRender() : GSRender() { - m_shaders_cache.reset(new gl::shader_cache(m_prog_buffer, "opengl", "v1.3")); + m_shaders_cache.reset(new gl::shader_cache(m_prog_buffer, "opengl", "v1.5")); if (g_cfg.video.disable_vertex_cache) m_vertex_cache.reset(new gl::null_vertex_cache()); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 33767ba2a5..8776c69453 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1393,7 +1393,7 @@ namespace rsx return rsx::get_address(offset_zeta, m_context_dma_z); } - void thread::get_current_vertex_program() + void thread::get_current_vertex_program(bool skip_vertex_inputs) { if (!(m_graphics_state & rsx::pipeline_state::vertex_program_dirty)) return; @@ -1401,57 +1401,60 @@ namespace rsx m_graphics_state &= ~(rsx::pipeline_state::vertex_program_dirty); const u32 transform_program_start = rsx::method_registers.transform_program_start(); current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask(); - current_vertex_program.skip_vertex_input_check = false; + current_vertex_program.skip_vertex_input_check = skip_vertex_inputs; current_vertex_program.rsx_vertex_inputs.resize(0); - current_vertex_program.data.resize((512 - transform_program_start) * 4); + current_vertex_program.data.reserve(512 * 4); + current_vertex_program.jump_table.clear(); - u32* ucode_src = rsx::method_registers.transform_program.data() + (transform_program_start * 4); - u32* ucode_dst = current_vertex_program.data.data(); + current_vp_metadata = program_hash_util::vertex_program_utils::analyse_vertex_program + ( + method_registers.transform_program.data(), // Input raw block + transform_program_start, // Address of entry point + current_vertex_program // [out] Program object + ); - memcpy(ucode_dst, ucode_src, current_vertex_program.data.size() * sizeof(u32)); - - current_vp_metadata = program_hash_util::vertex_program_utils::analyse_vertex_program(current_vertex_program.data); - current_vertex_program.data.resize(current_vp_metadata.ucode_size); - - const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask(); - const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask(); - - for (u8 index = 0; index < rsx::limits::vertex_count; ++index) + if (!skip_vertex_inputs) { - bool enabled = !!(input_mask & (1 << index)); - if (!enabled) - continue; + const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask(); + const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask(); - if (rsx::method_registers.vertex_arrays_info[index].size() > 0) + for (u8 index = 0; index < rsx::limits::vertex_count; ++index) { - current_vertex_program.rsx_vertex_inputs.push_back( - {index, - rsx::method_registers.vertex_arrays_info[index].size(), - rsx::method_registers.vertex_arrays_info[index].frequency(), - !!((modulo_mask >> index) & 0x1), - true, - is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0}); - } - else if (vertex_push_buffers[index].vertex_count > 1) - { - current_vertex_program.rsx_vertex_inputs.push_back( - { index, - rsx::method_registers.register_vertex_info[index].size, - 1, - false, - true, - is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0 }); - } - else if (rsx::method_registers.register_vertex_info[index].size > 0) - { - current_vertex_program.rsx_vertex_inputs.push_back( - {index, - rsx::method_registers.register_vertex_info[index].size, - rsx::method_registers.register_vertex_info[index].frequency, - !!((modulo_mask >> index) & 0x1), - false, - is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0}); + bool enabled = !!(input_mask & (1 << index)); + if (!enabled) + continue; + + if (rsx::method_registers.vertex_arrays_info[index].size() > 0) + { + current_vertex_program.rsx_vertex_inputs.push_back( + { index, + rsx::method_registers.vertex_arrays_info[index].size(), + rsx::method_registers.vertex_arrays_info[index].frequency(), + !!((modulo_mask >> index) & 0x1), + true, + is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0 }); + } + else if (vertex_push_buffers[index].vertex_count > 1) + { + current_vertex_program.rsx_vertex_inputs.push_back( + { index, + rsx::method_registers.register_vertex_info[index].size, + 1, + false, + true, + is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0 }); + } + else if (rsx::method_registers.register_vertex_info[index].size > 0) + { + current_vertex_program.rsx_vertex_inputs.push_back( + { index, + rsx::method_registers.register_vertex_info[index].size, + rsx::method_registers.register_vertex_info[index].frequency, + !!((modulo_mask >> index) & 0x1), + false, + is_int_type(rsx::method_registers.vertex_arrays_info[index].type()), 0 }); + } } } } diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index c543081a68..26a4efc2ab 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -377,7 +377,7 @@ namespace rsx program_hash_util::fragment_program_utils::fragment_program_metadata current_fp_metadata = {}; program_hash_util::vertex_program_utils::vertex_program_metadata current_vp_metadata = {}; - void get_current_vertex_program(); + void get_current_vertex_program(bool skip_vertex_inputs = true); /** * Gets current fragment program and associated fragment state diff --git a/rpcs3/Emu/RSX/RSXVertexProgram.h b/rpcs3/Emu/RSX/RSXVertexProgram.h index c170ce1b28..04d33ee3b3 100644 --- a/rpcs3/Emu/RSX/RSXVertexProgram.h +++ b/rpcs3/Emu/RSX/RSXVertexProgram.h @@ -1,5 +1,8 @@ #pragma once +#include +#include + enum vp_reg_type { RSX_VP_REGISTER_TYPE_TEMP = 1, @@ -229,4 +232,9 @@ struct RSXVertexProgram std::vector rsx_vertex_inputs; u32 output_mask; bool skip_vertex_input_check; -}; + + u32 base_address; + u32 entry; + std::bitset<512> instruction_mask; + std::set jump_table; +}; \ No newline at end of file diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index c46d565883..1091af88fe 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -626,7 +626,7 @@ VKGSRender::VKGSRender() : GSRender() else m_vertex_cache.reset(new vk::weak_vertex_cache()); - m_shaders_cache.reset(new vk::shader_cache(*m_prog_buffer.get(), "vulkan", "v1.3")); + m_shaders_cache.reset(new vk::shader_cache(*m_prog_buffer.get(), "vulkan", "v1.5")); open_command_buffer(); diff --git a/rpcs3/Emu/RSX/rsx_cache.h b/rpcs3/Emu/RSX/rsx_cache.h index 299d2f93d6..96a2c7d9e2 100644 --- a/rpcs3/Emu/RSX/rsx_cache.h +++ b/rpcs3/Emu/RSX/rsx_cache.h @@ -378,6 +378,11 @@ namespace rsx u64 pipeline_storage_hash; u32 vp_ctrl; + u64 vp_instruction_mask[8]; + + u32 vp_base_address; + u32 vp_entry; + u16 vp_jump_table[32]; u32 fp_ctrl; u32 fp_texture_dimensions; @@ -653,6 +658,12 @@ namespace rsx return; } + if (vp.jump_table.size() > 32) + { + LOG_ERROR(RSX, "shaders_cache: vertex program has more than 32 jump addresses. Entry not saved to cache"); + return; + } + pipeline_data data = pack(pipeline, vp, fp); std::string fp_name = root_path + "/raw/" + fmt::format("%llX.fp", data.fragment_program_hash); std::string vp_name = root_path + "/raw/" + fmt::format("%llX.vp", data.vertex_program_hash); @@ -723,6 +734,22 @@ namespace rsx pipeline_storage_type pipeline = data.pipeline_properties; vp.output_mask = data.vp_ctrl; + vp.base_address = data.vp_base_address; + vp.entry = data.vp_entry; + + pack_bitset<512>(vp.instruction_mask, data.vp_instruction_mask); + + for (u8 index = 0; index < 32; ++index) + { + const auto address = data.vp_jump_table[index]; + if (address == UINT16_MAX) + { + // End of list marker + break; + } + + vp.jump_table.emplace(address); + } fp.ctrl = data.fp_ctrl; fp.texture_dimensions = data.fp_texture_dimensions; @@ -753,6 +780,28 @@ namespace rsx data_block.pipeline_storage_hash = m_storage.get_hash(pipeline); data_block.vp_ctrl = vp.output_mask; + data_block.vp_base_address = vp.base_address; + data_block.vp_entry = vp.entry; + + unpack_bitset<512>(vp.instruction_mask, data_block.vp_instruction_mask); + + u8 index = 0; + while (index < 32) + { + if (!index && !vp.jump_table.empty()) + { + for (auto &address : vp.jump_table) + { + data_block.vp_jump_table[index++] = (u16)address; + } + } + else + { + // End of list marker + data_block.vp_jump_table[index] = UINT16_MAX; + break; + } + } data_block.fp_ctrl = fp.ctrl; data_block.fp_texture_dimensions = fp.texture_dimensions; diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index 7ddbf279e3..498e4a1e0b 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -5,6 +5,7 @@ #include "gcm_enums.h" #include #include +#include // TODO: replace the code below by #include when C++17 or newer will be used #include @@ -726,4 +727,41 @@ namespace rsx { return g_current_renderer; } + + template + void unpack_bitset(std::bitset& block, u64* values) + { + constexpr int count = N / 64; + for (int n = 0; n < count; ++n) + { + int i = (n << 6); + values[n] = 0; + + for (int bit = 0; bit < 64; ++bit, ++i) + { + if (block[i]) + { + values[n] |= (1 << bit); + } + } + } + } + + template + void pack_bitset(std::bitset& block, u64* values) + { + constexpr int count = N / 64; + for (int n = (count - 1); n >= 0; --n) + { + if ((n + 1) < count) + { + block <<= 64; + } + + if (values[n]) + { + block |= values[n]; + } + } + } }