rsx: Avoid calling slow functions every draw call

- Use TSC for timing where interval duration matters.
- Use atomic counter for ordering timestamps otherwise.
This commit is contained in:
kd-11 2022-03-06 15:09:28 +03:00 committed by kd-11
parent 762b594927
commit cfecbb24ca
10 changed files with 80 additions and 31 deletions

View File

@ -147,6 +147,8 @@ namespace rsx
u8 samples_x = 1; u8 samples_x = 1;
u8 samples_y = 1; u8 samples_y = 1;
rsx::address_range memory_range;
std::unique_ptr<typename std::remove_pointer<image_storage_type>::type> resolve_surface; std::unique_ptr<typename std::remove_pointer<image_storage_type>::type> resolve_surface;
surface_sample_layout sample_layout = surface_sample_layout::null; surface_sample_layout sample_layout = surface_sample_layout::null;
surface_raster_type raster_type = surface_raster_type::linear; surface_raster_type raster_type = surface_raster_type::linear;
@ -348,7 +350,14 @@ namespace rsx
void queue_tag(u32 address) void queue_tag(u32 address)
{ {
ensure(native_pitch);
ensure(rsx_pitch);
base_addr = address; base_addr = address;
const u32 internal_height = get_surface_height<rsx::surface_metrics::samples>();
const u32 excess = (rsx_pitch - native_pitch);
memory_range = rsx::address_range::start_length(base_addr, internal_height * rsx_pitch - excess);
} }
void sync_tag() void sync_tag()
@ -394,6 +403,10 @@ namespace rsx
const auto sample_offset = (samples[n].y * rsx_pitch) + samples[n].x; const auto sample_offset = (samples[n].y * rsx_pitch) + samples[n].x;
memory_tag_samples[n].first = (sample_offset + base_addr); memory_tag_samples[n].first = (sample_offset + base_addr);
} }
const u32 internal_height = get_surface_height<rsx::surface_metrics::samples>();
const u32 excess = (rsx_pitch - native_pitch);
memory_range = rsx::address_range::start_length(base_addr, internal_height * rsx_pitch - excess);
} }
void sync_tag() void sync_tag()
@ -629,9 +642,7 @@ namespace rsx
inline rsx::address_range get_memory_range() const inline rsx::address_range get_memory_range() const
{ {
const u32 internal_height = get_surface_height<rsx::surface_metrics::samples>(); return memory_range;
const u32 excess = (rsx_pitch - native_pitch);
return rsx::address_range::start_length(base_addr, internal_height * rsx_pitch - excess);
} }
template <typename T> template <typename T>

View File

@ -0,0 +1,21 @@
#pragma once
#include <util/asm.hpp>
#include <util/sysinfo.hpp>
extern u64 get_system_time();
namespace rsx
{
static inline u64 uclock()
{
if (const u64 freq = (utils::get_tsc_freq() / 1000000))
{
return utils::get_tsc() / freq;
}
else
{
return get_system_time();
}
}
}

View File

@ -3,6 +3,7 @@
#include "RSXFIFO.h" #include "RSXFIFO.h"
#include "RSXThread.h" #include "RSXThread.h"
#include "Capture/rsx_capture.h" #include "Capture/rsx_capture.h"
#include "Common/time.hpp"
#include "Emu/Cell/lv2/sys_rsx.h" #include "Emu/Cell/lv2/sys_rsx.h"
namespace rsx namespace rsx
@ -395,7 +396,7 @@ namespace rsx
{ {
if (performance_counters.state == FIFO_state::running) if (performance_counters.state == FIFO_state::running)
{ {
performance_counters.FIFO_idle_timestamp = get_system_time(); performance_counters.FIFO_idle_timestamp = rsx::uclock();
performance_counters.state = FIFO_state::nop; performance_counters.state = FIFO_state::nop;
} }
@ -405,7 +406,7 @@ namespace rsx
{ {
if (performance_counters.state == FIFO_state::running) if (performance_counters.state == FIFO_state::running)
{ {
performance_counters.FIFO_idle_timestamp = get_system_time(); performance_counters.FIFO_idle_timestamp = rsx::uclock();
performance_counters.state = FIFO_state::empty; performance_counters.state = FIFO_state::empty;
} }
else else
@ -437,7 +438,7 @@ namespace rsx
//Jump to self. Often preceded by NOP //Jump to self. Often preceded by NOP
if (performance_counters.state == FIFO_state::running) if (performance_counters.state == FIFO_state::running)
{ {
performance_counters.FIFO_idle_timestamp = get_system_time(); performance_counters.FIFO_idle_timestamp = rsx::uclock();
sync_point_request.release(true); sync_point_request.release(true);
} }
@ -456,7 +457,7 @@ namespace rsx
//Jump to self. Often preceded by NOP //Jump to self. Often preceded by NOP
if (performance_counters.state == FIFO_state::running) if (performance_counters.state == FIFO_state::running)
{ {
performance_counters.FIFO_idle_timestamp = get_system_time(); performance_counters.FIFO_idle_timestamp = rsx::uclock();
sync_point_request.release(true); sync_point_request.release(true);
} }
@ -513,7 +514,7 @@ namespace rsx
} }
// Update performance counters with time spent in idle mode // Update performance counters with time spent in idle mode
performance_counters.idle_time += (get_system_time() - performance_counters.FIFO_idle_timestamp); performance_counters.idle_time += (rsx::uclock() - performance_counters.FIFO_idle_timestamp);
} }
do do

View File

@ -7,6 +7,7 @@
#include "Common/BufferUtils.h" #include "Common/BufferUtils.h"
#include "Common/texture_cache.h" #include "Common/texture_cache.h"
#include "Common/surface_store.h" #include "Common/surface_store.h"
#include "Common/time.hpp"
#include "Capture/rsx_capture.h" #include "Capture/rsx_capture.h"
#include "rsx_methods.h" #include "rsx_methods.h"
#include "gcm_printing.h" #include "gcm_printing.h"
@ -483,6 +484,7 @@ namespace rsx
// This whole thing becomes a mess if we don't have a provoking attribute. // This whole thing becomes a mess if we don't have a provoking attribute.
const auto vertex_id = vertex_push_buffers[0].get_vertex_id(); const auto vertex_id = vertex_push_buffers[0].get_vertex_id();
vertex_push_buffers[attribute].set_vertex_data(attribute, vertex_id, subreg_index, type, size, value); vertex_push_buffers[attribute].set_vertex_data(attribute, vertex_id, subreg_index, type, size, value);
m_graphics_state |= rsx::pipeline_state::push_buffer_arrays_dirty;
} }
u32 thread::get_push_buffer_vertex_count() const u32 thread::get_push_buffer_vertex_count() const
@ -507,7 +509,9 @@ namespace rsx
void thread::end() void thread::end()
{ {
if (capture_current_frame) if (capture_current_frame)
{
capture::capture_draw_memory(this); capture::capture_draw_memory(this);
}
in_begin_end = false; in_begin_end = false;
m_frame_stats.draw_calls++; m_frame_stats.draw_calls++;
@ -517,12 +521,17 @@ namespace rsx
m_graphics_state |= rsx::pipeline_state::framebuffer_reads_dirty; m_graphics_state |= rsx::pipeline_state::framebuffer_reads_dirty;
ROP_sync_timestamp = rsx::get_shared_tag(); ROP_sync_timestamp = rsx::get_shared_tag();
for (auto & push_buf : vertex_push_buffers) if (m_graphics_state & rsx::pipeline_state::push_buffer_arrays_dirty)
{ {
//Disabled, see https://github.com/RPCS3/rpcs3/issues/1932 for (auto& push_buf : vertex_push_buffers)
//rsx::method_registers.register_vertex_info[index].size = 0; {
//Disabled, see https://github.com/RPCS3/rpcs3/issues/1932
//rsx::method_registers.register_vertex_info[index].size = 0;
push_buf.clear(); push_buf.clear();
}
m_graphics_state &= ~rsx::pipeline_state::push_buffer_arrays_dirty;
} }
element_push_buffer.clear(); element_push_buffer.clear();
@ -630,7 +639,7 @@ namespace rsx
fifo_ctrl = std::make_unique<::rsx::FIFO::FIFO_control>(this); fifo_ctrl = std::make_unique<::rsx::FIFO::FIFO_control>(this);
last_flip_time = get_system_time() - 1000000; last_flip_time = rsx::uclock() - 1000000;
vblank_count = 0; vblank_count = 0;
@ -642,7 +651,7 @@ namespace rsx
#else #else
constexpr u32 host_min_quantum = 500; constexpr u32 host_min_quantum = 500;
#endif #endif
u64 start_time = get_system_time(); u64 start_time = rsx::uclock();
u64 vblank_rate = g_cfg.video.vblank_rate; u64 vblank_rate = g_cfg.video.vblank_rate;
u64 vblank_period = 1'000'000 + u64{g_cfg.video.vblank_ntsc.get()} * 1000; u64 vblank_period = 1'000'000 + u64{g_cfg.video.vblank_ntsc.get()} * 1000;
@ -653,7 +662,7 @@ namespace rsx
while (!is_stopped()) while (!is_stopped())
{ {
// Get current time // Get current time
const u64 current = get_system_time(); const u64 current = rsx::uclock();
// Calculate the time at which we need to send a new VBLANK signal // Calculate the time at which we need to send a new VBLANK signal
const u64 post_event_time = start_time + (local_vblank_count + 1) * vblank_period / vblank_rate; const u64 post_event_time = start_time + (local_vblank_count + 1) * vblank_period / vblank_rate;
@ -715,7 +724,7 @@ namespace rsx
if (Emu.IsPaused()) if (Emu.IsPaused())
{ {
// Save the difference before pause // Save the difference before pause
start_time = get_system_time() - start_time; start_time = rsx::uclock() - start_time;
while (Emu.IsPaused() && !is_stopped()) while (Emu.IsPaused() && !is_stopped())
{ {
@ -723,7 +732,7 @@ namespace rsx
} }
// Restore difference // Restore difference
start_time = get_system_time() - start_time; start_time = rsx::uclock() - start_time;
} }
} }
}); });
@ -2602,7 +2611,7 @@ namespace rsx
void thread::recover_fifo(u32 line, u32 col, const char* file, const char* func) void thread::recover_fifo(u32 line, u32 col, const char* file, const char* func)
{ {
const u64 current_time = get_system_time(); const u64 current_time = rsx::uclock();
if (recovered_fifo_cmds_history.size() == 20u) if (recovered_fifo_cmds_history.size() == 20u)
{ {
@ -2659,7 +2668,7 @@ namespace rsx
// Some cases do not need full delay // Some cases do not need full delay
remaining = utils::aligned_div(remaining, div); remaining = utils::aligned_div(remaining, div);
const u64 until = get_system_time() + remaining; const u64 until = rsx::uclock() + remaining;
while (true) while (true)
{ {
@ -2691,7 +2700,7 @@ namespace rsx
busy_wait(100); busy_wait(100);
} }
const u64 current = get_system_time(); const u64 current = rsx::uclock();
if (current >= until) if (current >= until)
{ {
@ -2922,7 +2931,7 @@ namespace rsx
//Average load over around 30 frames //Average load over around 30 frames
if (!performance_counters.last_update_timestamp || performance_counters.sampled_frames > 30) if (!performance_counters.last_update_timestamp || performance_counters.sampled_frames > 30)
{ {
const auto timestamp = get_system_time(); const auto timestamp = rsx::uclock();
const auto idle = performance_counters.idle_time.load(); const auto idle = performance_counters.idle_time.load();
const auto elapsed = timestamp - performance_counters.last_update_timestamp; const auto elapsed = timestamp - performance_counters.last_update_timestamp;
@ -3086,7 +3095,7 @@ namespace rsx
if (limit) if (limit)
{ {
const u64 time = get_system_time() - Emu.GetPauseTime(); const u64 time = rsx::uclock() - Emu.GetPauseTime();
const u64 needed_us = static_cast<u64>(1000000 / limit); const u64 needed_us = static_cast<u64>(1000000 / limit);
if (int_flip_index == 0) if (int_flip_index == 0)
@ -3124,7 +3133,7 @@ namespace rsx
flip(m_queued_flip); flip(m_queued_flip);
last_flip_time = get_system_time() - 1000000; last_flip_time = rsx::uclock() - 1000000;
flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_DONE; flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_DONE;
m_queued_flip.in_progress = false; m_queued_flip.in_progress = false;
@ -3630,7 +3639,7 @@ namespace rsx
} }
} }
if (m_tsc = get_system_time(); m_tsc < m_next_tsc) if (m_tsc = rsx::uclock(); m_tsc < m_next_tsc)
{ {
return; return;
} }

View File

@ -129,6 +129,8 @@ namespace rsx
polygon_stipple_pattern_dirty = 0x8000, // Rasterizer stippling pattern changed polygon_stipple_pattern_dirty = 0x8000, // Rasterizer stippling pattern changed
line_stipple_pattern_dirty = 0x10000, // Line stippling pattern changed line_stipple_pattern_dirty = 0x10000, // Line stippling pattern changed
push_buffer_arrays_dirty = 0x20000, // Push buffers have data written to them (immediate mode vertex buffers)
fragment_program_dirty = fragment_program_ucode_dirty | fragment_program_state_dirty, fragment_program_dirty = fragment_program_ucode_dirty | fragment_program_state_dirty,
vertex_program_dirty = vertex_program_ucode_dirty | vertex_program_state_dirty, vertex_program_dirty = vertex_program_ucode_dirty | vertex_program_state_dirty,
invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty, invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty,

View File

@ -1085,7 +1085,7 @@ void VKGSRender::check_heap_status(u32 flags)
m_texture_upload_buffer_ring_info.reset_allocation_stats(); m_texture_upload_buffer_ring_info.reset_allocation_stats();
m_raster_env_ring_info.reset_allocation_stats(); m_raster_env_ring_info.reset_allocation_stats();
m_current_frame->reset_heap_ptrs(); m_current_frame->reset_heap_ptrs();
m_last_heap_sync_time = get_system_time(); m_last_heap_sync_time = rsx::get_shared_tag();
} }
else else
{ {

View File

@ -244,7 +244,7 @@ namespace vk
texture_upload_heap_ptr = texture_loc; texture_upload_heap_ptr = texture_loc;
rasterizer_env_heap_ptr = rasterizer_loc; rasterizer_env_heap_ptr = rasterizer_loc;
last_frame_sync_time = get_system_time(); last_frame_sync_time = rsx::get_shared_tag();
} }
void reset_heap_ptrs() void reset_heap_ptrs()

View File

@ -3,6 +3,7 @@
#include "RSXThread.h" #include "RSXThread.h"
#include "rsx_utils.h" #include "rsx_utils.h"
#include "rsx_decode.h" #include "rsx_decode.h"
#include "Common/time.hpp"
#include "Emu/Cell/PPUCallback.h" #include "Emu/Cell/PPUCallback.h"
#include "Emu/Cell/lv2/sys_rsx.h" #include "Emu/Cell/lv2/sys_rsx.h"
#include "Emu/RSX/Common/BufferUtils.h" #include "Emu/RSX/Common/BufferUtils.h"
@ -106,7 +107,7 @@ namespace rsx
rsx->flush_fifo(); rsx->flush_fifo();
} }
u64 start = get_system_time(); u64 start = rsx::uclock();
while (sema != arg) while (sema != arg)
{ {
if (rsx->is_stopped()) if (rsx->is_stopped())
@ -118,7 +119,7 @@ namespace rsx
{ {
if (rsx->is_paused()) if (rsx->is_paused())
{ {
const u64 start0 = get_system_time(); const u64 start0 = rsx::uclock();
while (rsx->is_paused()) while (rsx->is_paused())
{ {
@ -126,11 +127,11 @@ namespace rsx
} }
// Reset // Reset
start += get_system_time() - start0; start += rsx::uclock() - start0;
} }
else else
{ {
if ((get_system_time() - start) > tdr) if ((rsx::uclock() - start) > tdr)
{ {
// If longer than driver timeout force exit // If longer than driver timeout force exit
rsx_log.error("nv406e::semaphore_acquire has timed out. semaphore_address=0x%X", addr); rsx_log.error("nv406e::semaphore_acquire has timed out. semaphore_address=0x%X", addr);
@ -143,7 +144,7 @@ namespace rsx
} }
rsx->fifo_wake_delay(); rsx->fifo_wake_delay();
rsx->performance_counters.idle_time += (get_system_time() - start); rsx->performance_counters.idle_time += (rsx::uclock() - start);
} }
void semaphore_release(thread* rsx, u32 /*reg*/, u32 arg) void semaphore_release(thread* rsx, u32 /*reg*/, u32 arg)

View File

@ -479,6 +479,7 @@
<ClInclude Include="Emu\RSX\Common\bitfield.hpp" /> <ClInclude Include="Emu\RSX\Common\bitfield.hpp" />
<ClInclude Include="Emu\RSX\Common\profiling_timer.hpp" /> <ClInclude Include="Emu\RSX\Common\profiling_timer.hpp" />
<ClInclude Include="Emu\RSX\Common\simple_array.hpp" /> <ClInclude Include="Emu\RSX\Common\simple_array.hpp" />
<ClInclude Include="Emu\RSX\Common\time.hpp" />
<ClInclude Include="Emu\RSX\Overlays\overlay_edit_text.hpp" /> <ClInclude Include="Emu\RSX\Overlays\overlay_edit_text.hpp" />
<ClInclude Include="Emu\RSX\Overlays\overlay_list_view.hpp" /> <ClInclude Include="Emu\RSX\Overlays\overlay_list_view.hpp" />
<ClInclude Include="Emu\RSX\Overlays\overlay_media_list_dialog.h" /> <ClInclude Include="Emu\RSX\Overlays\overlay_media_list_dialog.h" />

View File

@ -2053,6 +2053,9 @@
<ClInclude Include="Emu\RSX\Overlays\overlay_media_list_dialog.h"> <ClInclude Include="Emu\RSX\Overlays\overlay_media_list_dialog.h">
<Filter>Emu\GPU\RSX\Overlays</Filter> <Filter>Emu\GPU\RSX\Overlays</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="Emu\RSX\Common\time.hpp">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="Emu\RSX\Common\Interpreter\FragmentInterpreter.glsl"> <None Include="Emu\RSX\Common\Interpreter\FragmentInterpreter.glsl">