diff --git a/rpcs3/Emu/RSX/RSXFIFO.cpp b/rpcs3/Emu/RSX/RSXFIFO.cpp index 0704df86e3..9bcb7c2032 100644 --- a/rpcs3/Emu/RSX/RSXFIFO.cpp +++ b/rpcs3/Emu/RSX/RSXFIFO.cpp @@ -13,619 +13,10 @@ namespace rsx { namespace FIFO { - template - struct scoped_priority - { - scoped_priority() - { - thread_ctrl::set_native_priority(To); - } - - ~scoped_priority() - { - thread_ctrl::set_native_priority(From); - } - }; - - static inline void __prefetcher_sleep() { std::this_thread::sleep_for(100us); } - static inline void __prefetcher_yield() { std::this_thread::yield(); } - FIFO_control::FIFO_control(::rsx::thread* pctrl) { m_ctrl = pctrl->ctrl; - m_queue.reserve(16384); - m_prefetched_queue.reserve(16384); - thread_ctrl::spawn(m_prefetcher_thread, "FIFO Prefetch Thread", [this]() - { - // TODO: - return; - - if (g_cfg.core.thread_scheduler_enabled) - { - thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::rsx)); - } - - u32 internal_get; - u32 target_addr; - - while (!Emu.IsStopped()) - { - target_addr = -1u; - - if (m_prefetched_queue.empty() && m_ctrl->put != m_ctrl->get) - { - // Get address to read ahead at/to - const u64 control_tag = m_ctrl_tag; - internal_get = m_internal_get; - - if (m_memwatch_addr) - { - // Spinning - __prefetcher_sleep(); - continue; - } - else - { - // This is normal - m_prefetch_get = m_ctrl->get; - m_prefetcher_speculating = false; - } - - // Check again - if (control_tag != m_ctrl_tag) - { - // Race condition - continue; - } - - if (m_prefetch_get != -1u) - { - // Check for special conditions in the existing queue - { - std::lock_guard lock(m_queue_mutex); - if (!m_queue.empty()) - { - const auto cmd = m_queue.back().reg; - - if ((cmd >> 2) == NV406E_SEMAPHORE_ACQUIRE) - { - // Blocking command, cannot read ahead - __prefetcher_sleep(); - continue; - } - - if ((cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD) - { - m_prefetch_get = cmd & 0x1ffffffc; - } - else if ((cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD) - { - m_prefetch_get = cmd & 0xfffffffc; - } - else if ((cmd & RSX_METHOD_CALL_CMD_MASK) == RSX_METHOD_CALL_CMD) - { - m_prefetch_get = cmd & 0xfffffffc; - } - else if ((cmd & RSX_METHOD_RETURN_MASK) == RSX_METHOD_RETURN_CMD) - { - // Cannot determine RET address safely, cannot read ahead - __prefetcher_sleep(); - continue; - } - } - } - - scoped_priority<0, 1> priority; - if (m_prefetch_mutex.try_lock()) - { - if (control_tag != m_ctrl_tag) - { - // Do not stall with the prefetch mutex held! - m_prefetch_mutex.unlock(); - continue; - } - - m_prefetcher_busy.store(true); - - read_ahead(m_prefetcher_info, m_prefetched_queue, m_prefetch_get); - optimize(m_prefetcher_info, m_prefetched_queue); - - m_prefetcher_busy.store(false); - m_prefetch_mutex.unlock(); - } - } - } - - __prefetcher_sleep(); - } - }); - } - - void FIFO_control::finalize() - { - if (m_prefetcher_thread) - { - m_prefetcher_thread->join(); - m_prefetcher_thread.reset(); - } - } - - bool FIFO_control::is_blocking_cmd(u32 cmd) - { - switch (cmd) - { - // Sync - case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE: - case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE: - case NV406E_SEMAPHORE_ACQUIRE: - case NV406E_SEMAPHORE_RELEASE: - case NV406E_SET_REFERENCE: - - // Data xfer - case NV3089_IMAGE_IN: - case NV0039_BUFFER_NOTIFY: - case NV308A_COLOR: - return true; - default: - return false; - } - } - - bool FIFO_control::is_sync_cmd(u32 cmd) - { - switch (cmd) - { - case NV4097_BACK_END_WRITE_SEMAPHORE_RELEASE: - case NV4097_TEXTURE_READ_SEMAPHORE_RELEASE: - case NV406E_SEMAPHORE_ACQUIRE: - case NV406E_SEMAPHORE_RELEASE: - case NV406E_SET_REFERENCE: - return true; - default: - return false; - } - } - - void FIFO_control::register_optimization_pass(optimization_pass* pass) - { - m_optimization_passes.emplace_back(pass); - } - - void FIFO_control::clear_buffer() - { - std::lock_guard lock(m_queue_mutex); - - m_queue.clear(); - m_command_index = 0; - } - - void FIFO_control::read_ahead(fifo_buffer_info_t& info, simple_array &commands, u32& get_pointer) - { - const u32 put = m_ctrl->put; - - info.start_loc = get_pointer; - info.num_draw_calls = 0; - - u32 cmd; - u32 count; - - while (true) - { - if (get_pointer == put) - { - // Nothing to do - break; - } - - // Validate put and get registers before reading the command - // TODO: Who should handle graphics exceptions?? - if (u32 addr = RSXIOMem.RealAddr(get_pointer)) - { - cmd = vm::read32(addr); - } - else - { - // TODO: Optional recovery - break; - } - - if (UNLIKELY(cmd & 0xe0030003)) - { - if ((cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD || - (cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD || - (cmd & RSX_METHOD_CALL_CMD_MASK) == RSX_METHOD_CALL_CMD || - (cmd & RSX_METHOD_RETURN_MASK) == RSX_METHOD_RETURN_CMD) - { - // Flow control, stop read ahead - commands.push_back({ cmd, 0, get_pointer }); - break; - } - } - - if (UNLIKELY((cmd & RSX_METHOD_NOP_MASK) == RSX_METHOD_NOP_CMD)) - { - if (commands.empty() || commands.back().reg != RSX_METHOD_NOP_CMD) - { - // Insert one NOP only - commands.push_back({ RSX_METHOD_NOP_CMD, 0, get_pointer }); - } - - get_pointer += 4; - continue; - } - else if (UNLIKELY(cmd & 0x3)) - { - // Malformed command, optional recovery - break; - } - - //Validate the args ptr if the command attempts to read from it - auto args = vm::ptr::make(RSXIOMem.RealAddr(get_pointer + 4)); - if (UNLIKELY(!args)) - { - // Optional recovery - break; - } - - count = (cmd >> 18) & 0x7ff; - if (count > 1) - { - // Stop command execution if put will be equal to get ptr during the execution itself - if (UNLIKELY(count * 4 + 4 > put - get_pointer)) - { - count = (put - get_pointer) / 4 - 1; - } - - // Queue packet header - commands.push_back({ FIFO_PACKET_BEGIN, count, get_pointer }); - - // First executed command is at data[0] - get_pointer += 4; - - if (UNLIKELY((cmd & RSX_METHOD_NON_INCREMENT_CMD_MASK) == RSX_METHOD_NON_INCREMENT_CMD)) - { - const u32 reg = cmd & 0xfffc; - for (u32 i = 0; i < count; i++, get_pointer += 4) - { - commands.push_back({ reg, args[i], get_pointer }); - } - } - else - { - u32 reg = cmd & 0xfffc; - for (u32 i = 0; i < count; i++, get_pointer += 4, reg += 4) - { - commands.push_back({ reg, args[i], get_pointer }); - } - } - } - else - { - const u32 reg = cmd & 0xfffc; - commands.push_back({ reg, args[0], get_pointer }); - get_pointer += 8; - - if (reg == (NV4097_SET_BEGIN_END << 2)) - { - info.num_draw_calls++; - } - else if (reg == (NV406E_SEMAPHORE_ACQUIRE << 2)) - { - // Hard sync, stop read ahead - break; - } - } - } - - info.length = get_pointer - info.start_loc; - if (info.num_draw_calls < 2) - { - return; - } - - info.num_draw_calls /= 2; // Begin+End pairs - } - - void FIFO_control::report_branch_hit(u32 source, u32 target) - { - const auto range = m_branch_prediction_table.equal_range(source); - for (auto It = range.first; It != range.second; It++) - { - if (It->second.branch_target == target) - { - It->second.weight++; - return; - } - } - - fmt::throw_exception("Unreachable" HERE); - } - - void FIFO_control::report_branch_miss(u32 source, u32 target, u32 actual) - { - const auto range = m_branch_prediction_table.equal_range(source); - for (auto It = range.first; It != range.second; It++) - { - if (target < -1u && It->second.branch_target == target) - { - It->second.weight--; - target = -1u; - - if (actual == -1u) - return; - } - else if (actual < -1u && It->second.branch_target == actual) - { - It->second.weight++; - actual = -1u; - - if (target == -1u) - return; - } - } - - if (target != -1u) - { - branch_target_info_t info; - info.branch_origin = source; - info.branch_target = target; - info.checksum_16 = 0; - info.weight = 0; - - m_branch_prediction_table.emplace(source, info); - } - - if (actual != -1u) - { - branch_target_info_t info; - info.branch_origin = source; - info.branch_target = actual; - info.checksum_16 = 0; - info.weight = 1; - - m_branch_prediction_table.emplace(source, info); - } - } - - u32 FIFO_control::get_likely_target(u32 source) - { - s64 weight = 0; - u32 target = -1u; - - const auto range = m_branch_prediction_table.equal_range(source); - for (auto It = range.first; It != range.second; It++) - { - if (It->second.weight > weight) - { - target = It->second.branch_target; - } - } - - return target; - } - - void FIFO_control::optimize(const fifo_buffer_info_t& info, simple_array& commands) - { - if (commands.empty() || user_asked_for_frame_capture || g_cfg.video.disable_FIFO_reordering) - { - // Nothing to do - return; - } - - for (auto &opt : m_optimization_passes) - { - opt->optimize(info, commands, rsx::method_registers.registers.data()); - } - } - - bool FIFO_control::test_prefetcher_correctness(u32 target) - { - m_fifo_busy.store(true); - - if (!m_prefetched_queue.empty()) - { - const u32 guessed_target = m_prefetcher_info.start_loc; - bool result = true; - - if (guessed_target != m_ctrl->get) - { - const u32 ctrl_get = m_ctrl->get; - LOG_ERROR(RSX, "fifo::Prefetcher was seemingly wrong!, guessed=0x%x, get=0x%x", - guessed_target, ctrl_get); -// report_branch_miss(m_internal_get, guessed_target, get); - - // Kick -// m_ctrl->get = get; - m_prefetched_queue.clear(); - result = false; - } - else - { - // Nothing to do, guessed correctly -// report_branch_hit(m_internal_get, guessed_target); - } - - m_fifo_busy.store(false); - return result; - } - - m_fifo_busy.store(false); - return false; - } - - void FIFO_control::set_put(u32 put) - { - if (m_ctrl->put == put) - { - return; - } - - m_ctrl->put = put; - } - - void FIFO_control::set_get(u32 get, bool spinning) - { - if (m_ctrl->get == get) - { - if (spinning) - { - verify(HERE), !m_queue.empty(); - - const auto& last_cmd = m_queue.back(); - m_memwatch_addr = get; - m_memwatch_cmp = last_cmd.reg; - - m_ctrl_tag++; - } - - return; - } - - // Update ctrl registers - m_ctrl->get = get; - m_internal_get = get; - - // Clear memwatch spinner - m_memwatch_addr = 0; - - // Update control tag - m_ctrl_tag++; - - // NOTE: This will 'free' the prefetcher in case it was stopped by a sync command - clear_buffer(); - } - - const register_pair& FIFO_control::read_unsafe() - { - // Fast read with no processing, only safe inside a PACKET_BEGIN+count block - AUDIT(m_command_index < m_queue.size()); - return m_queue[m_command_index++]; - } - - const register_pair& FIFO_control::read() - { - bool registers_changed = false; - const auto queue_size = m_queue.size(); - - if (queue_size > 0) - { - if (UNLIKELY(m_internal_get != m_ctrl->get)) - { - // Control register changed - registers_changed = true; - clear_buffer(); - } - else if (m_command_index >= queue_size) - { - // Consumed whole queue previously - clear_buffer(); - } - else - { - const auto& inst = m_queue[m_command_index++]; - if (inst.reg == FIFO_DISABLED_COMMAND) - { - // Jump to the first safe command - for (u32 n = m_command_index; n < m_queue.size(); ++n) - { - const auto& _inst = m_queue[n]; - if (_inst.reg != FIFO_DISABLED_COMMAND) - { - m_command_index = ++n; - return _inst; - } - } - - // Whole remainder is just disabled commands - clear_buffer(); - } - else - { - // Command is 'ok' - return inst; - } - } - } - - //verify(HERE), m_queue.empty(); - - if (m_ctrl->put == m_ctrl->get) - { - // Nothing to do - return empty_cmd; - } - - if (m_memwatch_addr) - { - if (m_ctrl->get == m_memwatch_addr) - { - if (const auto addr = RSXIOMem.RealAddr(m_memwatch_addr)) - { - if (vm::read32(addr) == m_memwatch_cmp) - { - // Still spinning in place - return empty_cmd; - } - } - } - - m_memwatch_addr = 0; - m_memwatch_cmp = 0; - m_ctrl_tag++; - } - - // Lock to disable the prefetcher - if (0)//!m_prefetch_mutex.try_lock()) - { - return busy_cmd; - } - - if (UNLIKELY(registers_changed)) - { - if (!m_prefetched_queue.empty()) - { - if (m_prefetcher_info.start_loc != m_ctrl->get) - { - // Guessed wrong, discard results - m_prefetched_queue.clear(); - } - } - } - - if (!m_prefetched_queue.empty()) - { - m_ctrl->get = m_internal_get = m_prefetch_get; - m_ctrl_tag++; - - m_queue.swap(m_prefetched_queue); - } - else - { - m_internal_get = m_ctrl->get; - read_ahead(m_fifo_info, m_queue, m_internal_get); - //optimize(m_fifo_info, m_queue); - - m_ctrl->get = m_internal_get; - m_ctrl_tag++; - } - - //m_prefetch_mutex.unlock(); - - if (!m_queue.empty()) - { - // A few guarantees here.. - // First command is not really skippable even if useless - // Queue size is at least 1 - return m_queue[m_command_index++]; - } - - return empty_cmd; - } - - // Optimization passes - flattening_pass::flattening_pass() - { const std::pair skippable_ranges[] = { // Texture configuration @@ -691,7 +82,7 @@ namespace rsx { for (int i = 0; i < method.second; ++i) { - m_register_properties[method.first + i] = register_props::skippable; + m_register_properties[method.first + i] = register_props::skip_on_match; } } @@ -699,510 +90,177 @@ namespace rsx { for (int i = 0; i < method.second; ++i) { - m_register_properties[method.first + i] |= register_props::ignorable; + m_register_properties[method.first + i] |= register_props::always_ignore; } } } - void flattening_pass::optimize(const fifo_buffer_info_t& info, simple_array& commands, const u32* registers) + void FIFO_control::set_put(u32 put) { - if (info.num_draw_calls < 20) - { - // Not enough draw calls - return; - } - -#if (ENABLE_OPTIMIZATION_DEBUGGING) - auto copy = commands; -#endif - // Removes commands that have no effect on the pipeline - - register_pair* last_begin = nullptr; - register_pair* last_end = nullptr; - - u32 deferred_primitive_type = UINT32_MAX; - bool has_deferred_call = false; - - std::unordered_map register_tracker; // Tracks future register writes - auto test_register = [&](u32 reg, u32 value) - { - u32 test; - auto found = register_tracker.find(reg); - if (found == register_tracker.end()) - { - test = registers[reg]; - } - else - { - test = found->second; - } - - return (value == test); - }; - - auto set_register = [&](u32 reg, u32 value) - { - register_tracker[reg] = value; - }; - - auto patch_draw_calls = [&]() - { - if (last_end) - { - // Restore scope end - last_end->reg = (NV4097_SET_BEGIN_END << 2); - } - - if (last_begin > last_end) - { - // Dangling clause, restore scope open - last_begin->reg = (NV4097_SET_BEGIN_END << 2); - } - }; - - for (auto &command : commands) - { - bool flush_commands_flag = has_deferred_call; - bool execute_method_flag = true; - - const auto reg = command.reg >> 2; - const auto value = command.value; - - switch (reg) - { - case NV4097_SET_BEGIN_END: - { - if (value && value != deferred_primitive_type) - { - // Begin call with different primitive type - deferred_primitive_type = value; - } - else - { - // This is either an End call or another Begin with the same primitive type - has_deferred_call = true; - flush_commands_flag = false; - execute_method_flag = false; - } - break; - } - case NV4097_DRAW_ARRAYS: - { - if (has_deferred_call) - { - const auto cmd = method_registers.current_draw_clause.command; - if (cmd != rsx::draw_command::array && cmd != rsx::draw_command::none) - break; - - flush_commands_flag = false; - } - break; - } - case NV4097_DRAW_INDEX_ARRAY: - { - if (has_deferred_call) - { - const auto cmd = method_registers.current_draw_clause.command; - if (cmd != rsx::draw_command::indexed && cmd != rsx::draw_command::none) - break; - - flush_commands_flag = false; - } - break; - } - default: - { - if (reg >= m_register_properties.size()) - { - // Flow control or special command - break; - } - - const auto properties = m_register_properties[reg]; - if (properties & register_props::ignorable) - { - // These have no effect on rendering behavior or can be handled within begin/end - flush_commands_flag = false; - break; - } - - if (properties & register_props::skippable) - { - if (has_deferred_call) - { - // Safe to ignore if value has not changed - if (test_register(reg, value)) - { - execute_method_flag = false; - flush_commands_flag = false; - break; - } - } - - set_register(reg, value); - } - - break; - } - } - - if (!execute_method_flag) - { - command.reg = FIFO_DISABLED_COMMAND; - - if (reg == NV4097_SET_BEGIN_END) - { - if (command.value) - { - last_begin = &command; - command.reg = FIFO_DRAW_BARRIER; - } - else - { - last_end = &command; - } - } - } - - if (flush_commands_flag) - { - has_deferred_call = false; - deferred_primitive_type = UINT32_MAX; - - patch_draw_calls(); - } - } - - if (has_deferred_call) - { - verify(HERE), deferred_primitive_type != UINT32_MAX; - patch_draw_calls(); - } - -#if (ENABLE_OPTIMIZATION_DEBUGGING) - - bool mismatch = false; - for (int n = 0; n < commands.size(); ++n) - { - auto command = commands[n]; - auto old = copy[n]; - - if (command.reg != old.reg) - { - if (old.reg == (NV4097_SET_BEGIN_END << 2) && old.value) - { - mismatch = true; - break; - } - } - } - - if (!mismatch) + if (m_ctrl->put == put) { return; } - auto _get_method_name = [&](u32 reg) -> std::string - { - if (reg == FIFO_DISABLED_COMMAND) - { - return "COMMAND DISABLED"; - } - - if (reg == FIFO_PACKET_BEGIN) - { - return "PACKET BEGIN"; - } - - return rsx::get_method_name(reg >> 2); - }; - - LOG_ERROR(RSX, "------------------- DUMP BEGINS--------------------"); - for (int n = 0; n < commands.size(); ++n) - { - auto command = commands[n]; - auto old = copy[n]; - - if (old.reg != command.reg || command.value != command.value) - { - LOG_ERROR(RSX, "[0x%x] %s(0x%x) -> %s(0x%x)", command.loc, _get_method_name(old.reg), old.value, _get_method_name(command.reg), command.value); - } - else - { - LOG_ERROR(RSX, "[0x%x] %s(0x%x)", command.loc, _get_method_name(old.reg), old.value); - } - } - LOG_ERROR(RSX, "------------------- DUMP ENDS--------------------"); -#endif + m_ctrl->put = put; } - void reordering_pass::optimize(const fifo_buffer_info_t& info, simple_array& commands, const u32* registers) + void FIFO_control::set_get(u32 get, bool spinning) { - if (info.num_draw_calls < 8) + if (m_ctrl->get == get) { - // TODO: Better threshold checking - return; - } - - std::unordered_map register_tracker; // Tracks future register writes - auto get_register = [&](u32 reg) - { - auto found = register_tracker.find(reg); - if (found == register_tracker.end()) + if (spinning) { - return registers[reg]; + if (const auto addr = RSXIOMem.RealAddr(m_memwatch_addr)) + { + m_memwatch_addr = get; + m_memwatch_cmp = vm::read32(addr); + } } else { - return found->second; - } - }; - - auto set_register = [&](u32 reg, u32 value) - { - register_tracker[reg] = value; - }; - - bool recording_changes = false; - bool writing_draw_call = false; - bool has_merged = false; - u32 num_draws_processed = 0; - u32 num_draws_merged = 0; - - draw_call *target_bin = nullptr; - const register_pair *rollback_pos = nullptr; - - auto flush_commands = [&](const register_pair* end_pos) mutable - { - if (has_merged) - { - register_pair* mem_ptr = const_cast(bins.front().start_pos); - for (const auto& draw : bins) - { - if (draw.write_prologue) - { - for (u32 n = 0; n < draw.prologue.size(); ++n) - { - const auto e = draw.prologue.get(n); - mem_ptr->reg = e.first; - mem_ptr->value = e.second; - mem_ptr++; - } - } - - mem_ptr->reg = (NV4097_SET_BEGIN_END << 2); - mem_ptr->value = draw.primitive_type; - mem_ptr++; - - for (const auto &inst : draw.draws) - { - *mem_ptr = inst; - mem_ptr++; - } - - mem_ptr->reg = (NV4097_SET_BEGIN_END << 2); - mem_ptr->value = 0; - mem_ptr++; - } - - verify(HERE), mem_ptr <= end_pos; - - for (; mem_ptr <= end_pos; mem_ptr++) - { - mem_ptr->reg = FIFO_DISABLED_COMMAND; - } + LOG_ERROR(RSX, "Undetected spinning?"); } - bins.clear(); - has_merged = false; - }; - - auto allowed = [](u32 reg) - { - if (reg & ~0xfffc) - return false; - - if (FIFO_control::is_blocking_cmd(reg >> 2)) - return false; - - return true; - }; - -#if (ENABLE_OPTIMIZATION_DEBUGGING) - - auto _get_method_name = [&](u32 reg) -> std::string - { - if (reg == FIFO_DISABLED_COMMAND) - { - return "COMMAND DISABLED"; - } - - if (reg == FIFO_PACKET_BEGIN) - { - return "PACKET BEGIN"; - } - - return rsx::get_method_name(reg >> 2); - }; - - if (user_asked_for_frame_capture) - { - LOG_ERROR(RSX, "-----------------PRE DUMP BEGINS--------------------"); - for (const auto& command : commands) - { - LOG_ERROR(RSX, "[0x%x] %s(0x%x)", command.loc, _get_method_name(command.reg), command.value); - } - LOG_ERROR(RSX, "------------------- DUMP ENDS--------------------"); + return; } -#endif - for (const auto& command : commands) + // Update ctrl registers + m_ctrl->get = get; + m_internal_get = get; + + // Clear memwatch spinner + m_memwatch_addr = 0; + } + + bool FIFO_control::has_next() const + { + return (m_remaining_commands > 0); + } + + void FIFO_control::read_unsafe(register_pair& data) + { + // Fast read with no processing, only safe inside a PACKET_BEGIN+count block + //verify(HERE), m_remaining_commands; + + m_command_reg += m_command_inc; + m_args_ptr += 4; + m_remaining_commands--; + + data.reg = m_command_reg; + data.value = vm::read32(m_args_ptr); + } + + void FIFO_control::read(register_pair& data) + { + const u32 put = m_ctrl->put; + m_internal_get = m_ctrl->get; + + if (put == m_internal_get) { - bool write = false; - switch (const u32 reg = (command.reg >> 2)) + // Nothing to do + data.reg = FIFO_EMPTY; + return; + } + + if (m_memwatch_addr) + { + if (m_internal_get == m_memwatch_addr) { - case NV4097_INVALIDATE_VERTEX_FILE: // PSLight clears VERTEX_FILE[0-2] - case NV4097_PIPE_NOP: - case NV4097_INVALIDATE_VERTEX_FILE + 2: - case NV4097_INVALIDATE_VERTEX_CACHE_FILE: - case NV4097_INVALIDATE_L2: - case NV4097_INVALIDATE_ZCULL: - case (FIFO_DISABLED_COMMAND >> 2): - case (FIFO_PACKET_BEGIN >> 2): - case (FIFO_DRAW_BARRIER >> 2): - case (FIFO_EMPTY >> 2): - case (FIFO_BUSY >> 2): - { - break; - } - case NV4097_SET_BEGIN_END: - { - if (!command.value) + if (const auto addr = RSXIOMem.RealAddr(m_memwatch_addr)) { - target_bin = nullptr; - recording_changes = true; - writing_draw_call = false; - rollback_pos = &command; - } - else - { - if (bins.empty()) + if (vm::read32(addr) == m_memwatch_cmp) { - registers_changed.clear(); - target_bin = &bins.emplace_back(); - target_bin->write_prologue = false; - target_bin->start_pos = &command; - target_bin->primitive_type = command.value; + // Still spinning in place + data.reg = FIFO_EMPTY; + return; } - else - { - target_bin = nullptr; - - for (auto& draw : bins) - { - if (draw.matches(registers_changed, command.value)) - { - num_draws_merged++; - has_merged = true; - target_bin = &draw; - //target_bin->draws.push_back({ FIFO_DRAW_BARRIER << 2 }); - break; - } - } - - if (!target_bin) - { - target_bin = &bins.emplace_back(); - target_bin->write_prologue = true; - target_bin->prologue.swap(registers_changed); - target_bin->start_pos = &command; - target_bin->primitive_type = command.value; - } - } - - recording_changes = false; - writing_draw_call = true; - num_draws_processed++; } - - break; - } - default: - { - write = true; - - if (bins.empty()) - { - break; - } - - if (recording_changes) - { - // Stop if any of the following conditions is met - // The draw 'bin' changes more than 16 instructions (scanning performance) - // The number of unique bins is greater than 4 making it non-trivial and likely not worthwhile to scan - - if (!allowed(command.reg)) - { - // TODO: Maintain list of mergable commands - target_bin = nullptr; - - if (recording_changes) - { - recording_changes = false; - registers_changed.clear(); - } - - flush_commands(rollback_pos); - break; - } - - if (bins.size() == 1) - { - bins[0].prologue.add_cmd(command.reg, get_register(reg)); - } - - registers_changed.add_cmd(command.reg, command.value); - } - else if (writing_draw_call) - { - target_bin->draws.push_back(command); - } - - break; - } } - if (write) + m_memwatch_addr = 0; + m_memwatch_cmp = 0; + } + + u32 cmd; + + if (u32 addr = RSXIOMem.RealAddr(m_internal_get)) + { + cmd = vm::read32(addr); + } + else + { + // TODO: Optional recovery + data.reg = FIFO_ERROR; + return; + } + + if (UNLIKELY(cmd & 0xe0030003)) + { + if ((cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD || + (cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD || + (cmd & RSX_METHOD_CALL_CMD_MASK) == RSX_METHOD_CALL_CMD || + (cmd & RSX_METHOD_RETURN_MASK) == RSX_METHOD_RETURN_CMD) { - set_register(command.reg >> 2, command.value); + // Flow control, stop reading + data = { cmd, 0, m_internal_get }; + return; } } - flush_commands(rollback_pos); - - if (num_draws_merged) + if (UNLIKELY((cmd & RSX_METHOD_NOP_MASK) == RSX_METHOD_NOP_CMD)) { - LOG_ERROR(RSX, "Merges happened: Draws before: %d, draws merged %d", info.num_draw_calls, num_draws_merged); + m_ctrl->get.store(m_internal_get + 4); + data = { RSX_METHOD_NOP_CMD, 0, m_internal_get }; + return; + } + else if (UNLIKELY(cmd & 0x3)) + { + // Malformed command, optional recovery + data.reg = FIFO_ERROR; + return; } -#if (ENABLE_OPTIMIZATION_DEBUGGING) - if (user_asked_for_frame_capture) + // Validate the args ptr if the command attempts to read from it + m_args_ptr = RSXIOMem.RealAddr(m_internal_get + 4); + if (UNLIKELY(!m_args_ptr)) { - LOG_ERROR(RSX, "----------------POST DUMP BEGINS--------------------"); - for (const auto& command : commands) + // Optional recovery + data.reg = FIFO_ERROR; + return; + } + + verify(HERE), !m_remaining_commands; + u32 count = (cmd >> 18) & 0x7ff; + + if (count > 1) + { + // Stop command execution if put will be equal to get ptr during the execution itself + if (UNLIKELY(count * 4 + 4 > put - m_internal_get)) { - LOG_ERROR(RSX, "[0x%x] %s(0x%x)", command.loc, _get_method_name(command.reg), command.value); + count = (put - m_internal_get) / 4 - 1; } - LOG_ERROR(RSX, "------------------- DUMP ENDS--------------------"); + + // Set up readback parameters + m_command_reg = cmd & 0xfffc; + m_command_inc = ((cmd & RSX_METHOD_NON_INCREMENT_CMD_MASK) == RSX_METHOD_NON_INCREMENT_CMD) ? 0 : 4; + m_remaining_commands = count - 1; + + m_ctrl->get.store(m_internal_get + (count * 4 + 4)); + data = { m_command_reg, vm::read32(m_args_ptr), m_internal_get }; + } + else + { + m_ctrl->get.store(m_internal_get + 8); + data = { cmd & 0xfffc, vm::read32(m_args_ptr), m_internal_get }; } -#endif } } void thread::run_FIFO() { - const auto& command = fifo_ctrl->read(); + FIFO::register_pair command; + fifo_ctrl->read(command); const auto cmd = command.reg; if (cmd == FIFO::FIFO_BUSY) @@ -1218,77 +276,86 @@ namespace rsx performance_counters.FIFO_idle_timestamp = get_system_time(); performance_counters.state = FIFO_state::empty; } + else + { + std::this_thread::yield(); + } return; } // Validate put and get registers before reading the command // TODO: Who should handle graphics exceptions?? - if ((cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD) + if (UNLIKELY(cmd & 0xe0030003)) { - const u32 offs = cmd & 0x1ffffffc; - if (offs == command.loc) + if ((cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD) { - //Jump to self. Often preceded by NOP - if (performance_counters.state == FIFO_state::running) + const u32 offs = cmd & 0x1ffffffc; + if (offs == command.loc) { - performance_counters.FIFO_idle_timestamp = get_system_time(); + //Jump to self. Often preceded by NOP + if (performance_counters.state == FIFO_state::running) + { + performance_counters.FIFO_idle_timestamp = get_system_time(); + } + + performance_counters.state = FIFO_state::spinning; } - performance_counters.state = FIFO_state::spinning; + //LOG_WARNING(RSX, "rsx jump(0x%x) #addr=0x%x, cmd=0x%x, get=0x%x, put=0x%x", offs, m_ioAddress + get, cmd, get, put); + fifo_ctrl->set_get(offs, offs == command.loc); + return; } - - //LOG_WARNING(RSX, "rsx jump(0x%x) #addr=0x%x, cmd=0x%x, get=0x%x, put=0x%x", offs, m_ioAddress + get, cmd, get, put); - fifo_ctrl->set_get(offs, offs == command.loc); - return; - } - if ((cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD) - { - const u32 offs = cmd & 0xfffffffc; - if (offs == command.loc) + if ((cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD) { - //Jump to self. Often preceded by NOP - if (performance_counters.state == FIFO_state::running) + const u32 offs = cmd & 0xfffffffc; + if (offs == command.loc) { - performance_counters.FIFO_idle_timestamp = get_system_time(); + //Jump to self. Often preceded by NOP + if (performance_counters.state == FIFO_state::running) + { + performance_counters.FIFO_idle_timestamp = get_system_time(); + } + + performance_counters.state = FIFO_state::spinning; } - performance_counters.state = FIFO_state::spinning; + //LOG_WARNING(RSX, "rsx jump(0x%x) #addr=0x%x, cmd=0x%x, get=0x%x, put=0x%x", offs, m_ioAddress + get, cmd, get, put); + fifo_ctrl->set_get(offs, offs == command.loc); + return; } - - //LOG_WARNING(RSX, "rsx jump(0x%x) #addr=0x%x, cmd=0x%x, get=0x%x, put=0x%x", offs, m_ioAddress + get, cmd, get, put); - fifo_ctrl->set_get(offs, offs == command.loc); - return; - } - if ((cmd & RSX_METHOD_CALL_CMD_MASK) == RSX_METHOD_CALL_CMD) - { - if (m_return_addr != -1) + if ((cmd & RSX_METHOD_CALL_CMD_MASK) == RSX_METHOD_CALL_CMD) { - // Only one layer is allowed in the call stack. - LOG_ERROR(RSX, "FIFO: CALL found inside a subroutine. Discarding subroutine"); - fifo_ctrl->set_get(std::exchange(m_return_addr, -1)); + if (m_return_addr != -1) + { + // Only one layer is allowed in the call stack. + LOG_ERROR(RSX, "FIFO: CALL found inside a subroutine. Discarding subroutine"); + fifo_ctrl->set_get(std::exchange(m_return_addr, -1)); + return; + } + + const u32 offs = cmd & 0xfffffffc; + m_return_addr = command.loc + 4; + fifo_ctrl->set_get(offs); + return; + } + if ((cmd & RSX_METHOD_RETURN_MASK) == RSX_METHOD_RETURN_CMD) + { + if (m_return_addr == -1) + { + LOG_ERROR(RSX, "FIFO: RET found without corresponding CALL. Discarding queue"); + fifo_ctrl->set_get(ctrl->put); + return; + } + + fifo_ctrl->set_get(m_return_addr); + m_return_addr = -1; return; } - const u32 offs = cmd & 0xfffffffc; - m_return_addr = command.loc + 4; - fifo_ctrl->set_get(offs); - return; + fmt::throw_exception("Unreachable" HERE); } - if ((cmd & RSX_METHOD_RETURN_MASK) == RSX_METHOD_RETURN_CMD) - { - if (m_return_addr == -1) - { - LOG_ERROR(RSX, "FIFO: RET found without corresponding CALL. Discarding queue"); - fifo_ctrl->set_get(ctrl->put); - return; - } - - fifo_ctrl->set_get(m_return_addr); - m_return_addr = -1; - return; - } - if (cmd == RSX_METHOD_NOP_CMD) + else if (cmd == RSX_METHOD_NOP_CMD) { if (performance_counters.state == FIFO_state::running) { @@ -1298,14 +365,6 @@ namespace rsx return; } - if (cmd & 0x3) - { - // TODO: Check for more invalid bits combinations - LOG_ERROR(RSX, "FIFO: Illegal command(0x%x) was executed. Resetting...", cmd); - fifo_ctrl->set_get(restore_point.load()); - m_return_addr = restore_ret_addr; - return; - } if (performance_counters.state != FIFO_state::running) { @@ -1323,27 +382,10 @@ namespace rsx performance_counters.state = FIFO_state::running; } - u32 count = 1; - auto *command_ptr = &command; - - if (cmd == FIFO::FIFO_PACKET_BEGIN) + for (int i = 0; ; i++, fifo_ctrl->read_unsafe(command)) { - count = command.value; - command_ptr = &fifo_ctrl->read_unsafe(); - } - - for (u32 i = 0; i < count; ++i) - { - if (i) command_ptr = &fifo_ctrl->read_unsafe(); - - if (command_ptr->reg == FIFO::FIFO_DISABLED_COMMAND) - { - // Placeholder for dropped commands - continue; - } - - const u32 reg = command_ptr->reg >> 2; - const u32& value = command_ptr->value; + const u32 reg = command.reg >> 2; + const u32 value = command.value; if (capture_current_frame) { @@ -1353,7 +395,7 @@ namespace rsx { // todo: handle nv406e methods better?, do we care about call/jumps? rsx::frame_capture_data::replay_command replay_cmd; - replay_cmd.rsx_command = std::make_pair(i == 0 ? command_ptr->reg : 0, value); + replay_cmd.rsx_command = std::make_pair(i == 0 ? command.reg : 0, value); frame_capture.replay_commands.push_back(replay_cmd); @@ -1388,6 +430,11 @@ namespace rsx { method(this, reg, value); } + + if (!fifo_ctrl->has_next()) + { + break; + } } } } \ No newline at end of file diff --git a/rpcs3/Emu/RSX/RSXFIFO.h b/rpcs3/Emu/RSX/RSXFIFO.h index a832a92a62..43613d84a8 100644 --- a/rpcs3/Emu/RSX/RSXFIFO.h +++ b/rpcs3/Emu/RSX/RSXFIFO.h @@ -29,6 +29,7 @@ namespace rsx NOP = 0, FIFO_EMPTY = 0xDEADF1F0, FIFO_BUSY = 0xBABEF1F0, + FIFO_ERROR = 0xDEADBEEF, FIFO_PACKET_BEGIN = 0xF1F0, FIFO_DISABLED_COMMAND = 0xF1F4, FIFO_DRAW_BARRIER = 0xF1F8, @@ -42,193 +43,30 @@ namespace rsx u32 reserved; }; - struct fifo_buffer_info_t - { - u32 start_loc; - u32 length; - u32 num_draw_calls; - u32 reserved; - }; - - struct branch_target_info_t - { - u32 branch_target; - u32 branch_origin; - s64 weight; - u64 checksum_16; - u64 reserved; - }; - - struct optimization_pass - { - virtual void optimize(const fifo_buffer_info_t& info, simple_array& commands, const u32* registers) = 0; - }; - - struct flattening_pass : public optimization_pass + class FIFO_control { private: enum register_props : u8 { - skippable = 1, - ignorable = 2 + none = 0, + skip_on_match = 1, + always_ignore = 2 }; - std::array m_register_properties; - - public: - flattening_pass(); - void optimize(const fifo_buffer_info_t& info, simple_array& commands, const u32* registers) override; - }; - - struct reordering_pass : public optimization_pass - { private: - - struct instruction_buffer_t - { - std::unordered_map m_storage; - simple_array m_insertion_order; - - instruction_buffer_t() - { - m_insertion_order.reserve(64); - } - - void add_cmd(u32 reg, u32 value) - { - const auto is_new = std::get<1>(m_storage.insert_or_assign(reg, value)); - if (!is_new) - { - for (auto &loc : m_insertion_order) - { - if (loc == reg) - { - loc |= 0x80000000; - break; - } - } - } - - m_insertion_order.push_back(reg); - } - - void clear() - { - m_storage.clear(); - m_insertion_order.clear(); - } - - void swap(instruction_buffer_t& other) - { - m_storage.swap(other.m_storage); - m_insertion_order.swap(other.m_insertion_order); - } - - auto size() const - { - return m_storage.size(); - } - - inline std::pair get(int index) const - { - const auto key = m_insertion_order[index]; - if (key & 0x80000000) - { - // Disabled by a later write to the same register - // TODO: Track command type registers and avoid this - return { FIFO_DISABLED_COMMAND, 0 }; - } - - const auto value = m_storage.at(key); - return { key, value }; - } - - bool operator == (const instruction_buffer_t& other) const - { - if (size() == other.size()) - { - for (const auto &e : other.m_storage) - { - const auto found = m_storage.find(e.first); - if (found == m_storage.end()) - return false; - - if (found->second != e.second) - return false; - } - - return true; - } - - return false; - } - }; - - struct draw_call - { - instruction_buffer_t prologue; - std::vector draws; - bool write_prologue; - u32 primitive_type; - const register_pair* start_pos; - - bool matches(const instruction_buffer_t setup, u32 prim) const - { - if (prim != primitive_type) - return false; - - return prologue == setup; - } - }; - - instruction_buffer_t registers_changed; - std::vector bins; - - std::unordered_multimap m_results_prediction_table; - - public: - void optimize(const fifo_buffer_info_t& info, simple_array& commands, const u32* registers) override; - }; - - class FIFO_control - { RsxDmaControl* m_ctrl = nullptr; u32 m_internal_get = 0; - std::shared_ptr m_prefetcher_thread; - u32 m_prefetch_get = 0; - atomic_t m_prefetcher_busy{ false }; - atomic_t m_fifo_busy{ false }; - fifo_buffer_info_t m_prefetcher_info; - bool m_prefetcher_speculating; - - std::vector> m_optimization_passes; - - simple_array m_queue; - simple_array m_prefetched_queue; - atomic_t m_command_index{ 0 }; - - shared_mutex m_prefetch_mutex; // Guards prefetch queue - shared_mutex m_queue_mutex; // Guards primary queue - atomic_t m_ctrl_tag{ 0 }; // 'Guards' control registers - - register_pair empty_cmd { FIFO_EMPTY }; - register_pair busy_cmd { FIFO_BUSY }; - u32 m_memwatch_addr = 0; u32 m_memwatch_cmp = 0; - fifo_buffer_info_t m_fifo_info; - std::unordered_multimap m_branch_prediction_table; + u32 m_command_reg = 0; + u32 m_command_inc = 0; + u32 m_remaining_commands = 0; + u32 m_args_ptr = 0; - void read_ahead(fifo_buffer_info_t& info, simple_array& commands, u32& get_pointer); - void optimize(const fifo_buffer_info_t& info, simple_array& commands); - void clear_buffer(); - - u32 get_likely_target(u32 source); - void report_branch_miss(u32 source, u32 target, u32 actual); - void report_branch_hit(u32 source, u32 target); - bool test_prefetcher_correctness(u32 actual_target); + std::array m_register_properties; + bool has_deferred_draw = false; public: FIFO_control(rsx::thread* pctrl); @@ -237,12 +75,9 @@ namespace rsx void set_get(u32 get, bool spinning = false); void set_put(u32 put); - const register_pair& read(); - inline const register_pair& read_unsafe(); - - void register_optimization_pass(optimization_pass* pass); - - void finalize(); + void read(register_pair& data); + inline void read_unsafe(register_pair& data); + inline bool has_next() const; public: static bool is_blocking_cmd(u32 cmd); diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 9037bc40d0..e4d8ace74c 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -554,10 +554,6 @@ namespace rsx fifo_ctrl = std::make_unique<::rsx::FIFO::FIFO_control>(this); - fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass()); - //fifo_ctrl->register_optimization_pass(new FIFO::reordering_pass()); // R&C2 - Not working if flattening is also enabled!!! - //fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass()); - last_flip_time = get_system_time() - 1000000; named_thread vblank_thread("VBlank Thread", [this]() @@ -682,11 +678,6 @@ namespace rsx m_decompiler_thread->join(); m_decompiler_thread.reset(); } - - if (fifo_ctrl) - { - fifo_ctrl->finalize(); - } } std::string thread::get_name() const diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 6f6ab7e0e5..80a96232f7 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -456,6 +456,10 @@ namespace rsx rsx::method_registers.current_draw_clause.compile(); rsxthr->end(); } + else + { + rsxthr->in_begin_end = false; + } } vm::addr_t get_report_data_impl(u32 offset) @@ -2229,8 +2233,11 @@ namespace rsx { u32 result = 0; - for (const auto &barrier : draw_command_barriers[current_range_index]) + for (const auto &barrier : draw_command_barriers) { + if (barrier.draw_id != current_range_index) + continue; + switch (barrier.type) { case primitive_restart_barrier: diff --git a/rpcs3/Emu/RSX/rsx_methods.h b/rpcs3/Emu/RSX/rsx_methods.h index 5893e408e5..39b9620f03 100644 --- a/rpcs3/Emu/RSX/rsx_methods.h +++ b/rpcs3/Emu/RSX/rsx_methods.h @@ -43,6 +43,7 @@ namespace rsx struct barrier_t { + u32 draw_id; u64 timestamp; u32 address; @@ -74,7 +75,7 @@ namespace rsx simple_array draw_command_ranges; // Stores rasterization barriers for primitive types sensitive to adjacency - std::vector> draw_command_barriers; + simple_array draw_command_barriers; // Counter used to parse the commands in order u32 current_range_index; @@ -87,28 +88,27 @@ namespace rsx void append_draw_command(const draw_range_t& range) { draw_command_ranges.push_back(range); - draw_command_barriers.push_back({}); } // Insert a new draw command within the others void insert_draw_command(int index, const draw_range_t& range) { auto range_It = draw_command_ranges.begin(); - auto barrier_It = draw_command_barriers.begin(); - - // Because deque::insert fails with initializer list on MSVC - const std::set new_barrier; - while (index--) { ++range_It; - ++barrier_It; } draw_command_ranges.insert(range_It, range); - draw_command_barriers.insert(barrier_It, new_barrier); - verify(HERE), draw_command_ranges.size() == draw_command_barriers.size(); + // Update all barrier draw ids after this one + for (auto &barrier : draw_command_barriers) + { + if (barrier.draw_id >= index) + { + barrier.draw_id++; + } + } } public: @@ -125,6 +125,26 @@ namespace rsx { verify(HERE), !draw_command_ranges.empty(); + auto _do_barrier_insert = [this](barrier_t&& val) + { + if (draw_command_barriers.empty() || draw_command_barriers.back() < val) + { + draw_command_barriers.push_back(val); + return; + } + + for (auto it = draw_command_barriers.begin(); it != draw_command_barriers.end(); it++) + { + if (*it < val) + { + continue; + } + + draw_command_barriers.insert(it, val); + break; + } + }; + if (type == primitive_restart_barrier) { // Rasterization flow barrier @@ -132,7 +152,7 @@ namespace rsx const auto address = last.first + last.count; const auto command_index = draw_command_ranges.size() - 1; - draw_command_barriers[command_index].insert({ 0, address, arg, 0, type }); + _do_barrier_insert({ command_index, 0, address, arg, 0, type }); } else { @@ -140,7 +160,7 @@ namespace rsx append_draw_command({}); const auto command_index = draw_command_ranges.size() - 1; - draw_command_barriers[command_index].insert({ get_system_time(), -1u, arg, 0, type }); + _do_barrier_insert({ command_index, get_system_time(), -1u, arg, 0, type }); last_execution_barrier_index = command_index; } } @@ -243,8 +263,11 @@ namespace rsx } verify(HERE), current_range_index != -1u; - for (const auto &barrier : draw_command_barriers[current_range_index]) + for (const auto &barrier : draw_command_barriers) { + if (barrier.draw_id != current_range_index) + continue; + if (barrier.type == primitive_restart_barrier) return false; } @@ -342,8 +365,11 @@ namespace rsx u32 previous_barrier = range.first; u32 vertex_counter = 0; - for (const auto &barrier : draw_command_barriers[current_range_index]) + for (const auto &barrier : draw_command_barriers) { + if (barrier.draw_id != current_range_index) + continue; + if (barrier.type != primitive_restart_barrier) continue;