rsx: Avoid calling slow functions every draw call

- Use TSC for timing where interval duration matters.
- Use atomic counter for ordering timestamps otherwise.
This commit is contained in:
kd-11 2022-03-06 15:09:28 +03:00 committed by kd-11
parent 762b594927
commit cfecbb24ca
10 changed files with 80 additions and 31 deletions

View File

@ -147,6 +147,8 @@ namespace rsx
u8 samples_x = 1;
u8 samples_y = 1;
rsx::address_range memory_range;
std::unique_ptr<typename std::remove_pointer<image_storage_type>::type> resolve_surface;
surface_sample_layout sample_layout = surface_sample_layout::null;
surface_raster_type raster_type = surface_raster_type::linear;
@ -348,7 +350,14 @@ namespace rsx
void queue_tag(u32 address)
{
ensure(native_pitch);
ensure(rsx_pitch);
base_addr = address;
const u32 internal_height = get_surface_height<rsx::surface_metrics::samples>();
const u32 excess = (rsx_pitch - native_pitch);
memory_range = rsx::address_range::start_length(base_addr, internal_height * rsx_pitch - excess);
}
void sync_tag()
@ -394,6 +403,10 @@ namespace rsx
const auto sample_offset = (samples[n].y * rsx_pitch) + samples[n].x;
memory_tag_samples[n].first = (sample_offset + base_addr);
}
const u32 internal_height = get_surface_height<rsx::surface_metrics::samples>();
const u32 excess = (rsx_pitch - native_pitch);
memory_range = rsx::address_range::start_length(base_addr, internal_height * rsx_pitch - excess);
}
void sync_tag()
@ -629,9 +642,7 @@ namespace rsx
inline rsx::address_range get_memory_range() const
{
const u32 internal_height = get_surface_height<rsx::surface_metrics::samples>();
const u32 excess = (rsx_pitch - native_pitch);
return rsx::address_range::start_length(base_addr, internal_height * rsx_pitch - excess);
return memory_range;
}
template <typename T>

View File

@ -0,0 +1,21 @@
#pragma once
#include <util/asm.hpp>
#include <util/sysinfo.hpp>
extern u64 get_system_time();
namespace rsx
{
static inline u64 uclock()
{
if (const u64 freq = (utils::get_tsc_freq() / 1000000))
{
return utils::get_tsc() / freq;
}
else
{
return get_system_time();
}
}
}

View File

@ -3,6 +3,7 @@
#include "RSXFIFO.h"
#include "RSXThread.h"
#include "Capture/rsx_capture.h"
#include "Common/time.hpp"
#include "Emu/Cell/lv2/sys_rsx.h"
namespace rsx
@ -395,7 +396,7 @@ namespace rsx
{
if (performance_counters.state == FIFO_state::running)
{
performance_counters.FIFO_idle_timestamp = get_system_time();
performance_counters.FIFO_idle_timestamp = rsx::uclock();
performance_counters.state = FIFO_state::nop;
}
@ -405,7 +406,7 @@ namespace rsx
{
if (performance_counters.state == FIFO_state::running)
{
performance_counters.FIFO_idle_timestamp = get_system_time();
performance_counters.FIFO_idle_timestamp = rsx::uclock();
performance_counters.state = FIFO_state::empty;
}
else
@ -437,7 +438,7 @@ namespace rsx
//Jump to self. Often preceded by NOP
if (performance_counters.state == FIFO_state::running)
{
performance_counters.FIFO_idle_timestamp = get_system_time();
performance_counters.FIFO_idle_timestamp = rsx::uclock();
sync_point_request.release(true);
}
@ -456,7 +457,7 @@ namespace rsx
//Jump to self. Often preceded by NOP
if (performance_counters.state == FIFO_state::running)
{
performance_counters.FIFO_idle_timestamp = get_system_time();
performance_counters.FIFO_idle_timestamp = rsx::uclock();
sync_point_request.release(true);
}
@ -513,7 +514,7 @@ namespace rsx
}
// Update performance counters with time spent in idle mode
performance_counters.idle_time += (get_system_time() - performance_counters.FIFO_idle_timestamp);
performance_counters.idle_time += (rsx::uclock() - performance_counters.FIFO_idle_timestamp);
}
do

View File

@ -7,6 +7,7 @@
#include "Common/BufferUtils.h"
#include "Common/texture_cache.h"
#include "Common/surface_store.h"
#include "Common/time.hpp"
#include "Capture/rsx_capture.h"
#include "rsx_methods.h"
#include "gcm_printing.h"
@ -483,6 +484,7 @@ namespace rsx
// This whole thing becomes a mess if we don't have a provoking attribute.
const auto vertex_id = vertex_push_buffers[0].get_vertex_id();
vertex_push_buffers[attribute].set_vertex_data(attribute, vertex_id, subreg_index, type, size, value);
m_graphics_state |= rsx::pipeline_state::push_buffer_arrays_dirty;
}
u32 thread::get_push_buffer_vertex_count() const
@ -507,7 +509,9 @@ namespace rsx
void thread::end()
{
if (capture_current_frame)
{
capture::capture_draw_memory(this);
}
in_begin_end = false;
m_frame_stats.draw_calls++;
@ -517,12 +521,17 @@ namespace rsx
m_graphics_state |= rsx::pipeline_state::framebuffer_reads_dirty;
ROP_sync_timestamp = rsx::get_shared_tag();
for (auto & push_buf : vertex_push_buffers)
if (m_graphics_state & rsx::pipeline_state::push_buffer_arrays_dirty)
{
//Disabled, see https://github.com/RPCS3/rpcs3/issues/1932
//rsx::method_registers.register_vertex_info[index].size = 0;
for (auto& push_buf : vertex_push_buffers)
{
//Disabled, see https://github.com/RPCS3/rpcs3/issues/1932
//rsx::method_registers.register_vertex_info[index].size = 0;
push_buf.clear();
push_buf.clear();
}
m_graphics_state &= ~rsx::pipeline_state::push_buffer_arrays_dirty;
}
element_push_buffer.clear();
@ -630,7 +639,7 @@ namespace rsx
fifo_ctrl = std::make_unique<::rsx::FIFO::FIFO_control>(this);
last_flip_time = get_system_time() - 1000000;
last_flip_time = rsx::uclock() - 1000000;
vblank_count = 0;
@ -642,7 +651,7 @@ namespace rsx
#else
constexpr u32 host_min_quantum = 500;
#endif
u64 start_time = get_system_time();
u64 start_time = rsx::uclock();
u64 vblank_rate = g_cfg.video.vblank_rate;
u64 vblank_period = 1'000'000 + u64{g_cfg.video.vblank_ntsc.get()} * 1000;
@ -653,7 +662,7 @@ namespace rsx
while (!is_stopped())
{
// Get current time
const u64 current = get_system_time();
const u64 current = rsx::uclock();
// Calculate the time at which we need to send a new VBLANK signal
const u64 post_event_time = start_time + (local_vblank_count + 1) * vblank_period / vblank_rate;
@ -715,7 +724,7 @@ namespace rsx
if (Emu.IsPaused())
{
// Save the difference before pause
start_time = get_system_time() - start_time;
start_time = rsx::uclock() - start_time;
while (Emu.IsPaused() && !is_stopped())
{
@ -723,7 +732,7 @@ namespace rsx
}
// Restore difference
start_time = get_system_time() - start_time;
start_time = rsx::uclock() - start_time;
}
}
});
@ -2602,7 +2611,7 @@ namespace rsx
void thread::recover_fifo(u32 line, u32 col, const char* file, const char* func)
{
const u64 current_time = get_system_time();
const u64 current_time = rsx::uclock();
if (recovered_fifo_cmds_history.size() == 20u)
{
@ -2659,7 +2668,7 @@ namespace rsx
// Some cases do not need full delay
remaining = utils::aligned_div(remaining, div);
const u64 until = get_system_time() + remaining;
const u64 until = rsx::uclock() + remaining;
while (true)
{
@ -2691,7 +2700,7 @@ namespace rsx
busy_wait(100);
}
const u64 current = get_system_time();
const u64 current = rsx::uclock();
if (current >= until)
{
@ -2922,7 +2931,7 @@ namespace rsx
//Average load over around 30 frames
if (!performance_counters.last_update_timestamp || performance_counters.sampled_frames > 30)
{
const auto timestamp = get_system_time();
const auto timestamp = rsx::uclock();
const auto idle = performance_counters.idle_time.load();
const auto elapsed = timestamp - performance_counters.last_update_timestamp;
@ -3086,7 +3095,7 @@ namespace rsx
if (limit)
{
const u64 time = get_system_time() - Emu.GetPauseTime();
const u64 time = rsx::uclock() - Emu.GetPauseTime();
const u64 needed_us = static_cast<u64>(1000000 / limit);
if (int_flip_index == 0)
@ -3124,7 +3133,7 @@ namespace rsx
flip(m_queued_flip);
last_flip_time = get_system_time() - 1000000;
last_flip_time = rsx::uclock() - 1000000;
flip_status = CELL_GCM_DISPLAY_FLIP_STATUS_DONE;
m_queued_flip.in_progress = false;
@ -3630,7 +3639,7 @@ namespace rsx
}
}
if (m_tsc = get_system_time(); m_tsc < m_next_tsc)
if (m_tsc = rsx::uclock(); m_tsc < m_next_tsc)
{
return;
}

View File

@ -129,6 +129,8 @@ namespace rsx
polygon_stipple_pattern_dirty = 0x8000, // Rasterizer stippling pattern changed
line_stipple_pattern_dirty = 0x10000, // Line stippling pattern changed
push_buffer_arrays_dirty = 0x20000, // Push buffers have data written to them (immediate mode vertex buffers)
fragment_program_dirty = fragment_program_ucode_dirty | fragment_program_state_dirty,
vertex_program_dirty = vertex_program_ucode_dirty | vertex_program_state_dirty,
invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty,

View File

@ -1085,7 +1085,7 @@ void VKGSRender::check_heap_status(u32 flags)
m_texture_upload_buffer_ring_info.reset_allocation_stats();
m_raster_env_ring_info.reset_allocation_stats();
m_current_frame->reset_heap_ptrs();
m_last_heap_sync_time = get_system_time();
m_last_heap_sync_time = rsx::get_shared_tag();
}
else
{

View File

@ -244,7 +244,7 @@ namespace vk
texture_upload_heap_ptr = texture_loc;
rasterizer_env_heap_ptr = rasterizer_loc;
last_frame_sync_time = get_system_time();
last_frame_sync_time = rsx::get_shared_tag();
}
void reset_heap_ptrs()

View File

@ -3,6 +3,7 @@
#include "RSXThread.h"
#include "rsx_utils.h"
#include "rsx_decode.h"
#include "Common/time.hpp"
#include "Emu/Cell/PPUCallback.h"
#include "Emu/Cell/lv2/sys_rsx.h"
#include "Emu/RSX/Common/BufferUtils.h"
@ -106,7 +107,7 @@ namespace rsx
rsx->flush_fifo();
}
u64 start = get_system_time();
u64 start = rsx::uclock();
while (sema != arg)
{
if (rsx->is_stopped())
@ -118,7 +119,7 @@ namespace rsx
{
if (rsx->is_paused())
{
const u64 start0 = get_system_time();
const u64 start0 = rsx::uclock();
while (rsx->is_paused())
{
@ -126,11 +127,11 @@ namespace rsx
}
// Reset
start += get_system_time() - start0;
start += rsx::uclock() - start0;
}
else
{
if ((get_system_time() - start) > tdr)
if ((rsx::uclock() - start) > tdr)
{
// If longer than driver timeout force exit
rsx_log.error("nv406e::semaphore_acquire has timed out. semaphore_address=0x%X", addr);
@ -143,7 +144,7 @@ namespace rsx
}
rsx->fifo_wake_delay();
rsx->performance_counters.idle_time += (get_system_time() - start);
rsx->performance_counters.idle_time += (rsx::uclock() - start);
}
void semaphore_release(thread* rsx, u32 /*reg*/, u32 arg)

View File

@ -479,6 +479,7 @@
<ClInclude Include="Emu\RSX\Common\bitfield.hpp" />
<ClInclude Include="Emu\RSX\Common\profiling_timer.hpp" />
<ClInclude Include="Emu\RSX\Common\simple_array.hpp" />
<ClInclude Include="Emu\RSX\Common\time.hpp" />
<ClInclude Include="Emu\RSX\Overlays\overlay_edit_text.hpp" />
<ClInclude Include="Emu\RSX\Overlays\overlay_list_view.hpp" />
<ClInclude Include="Emu\RSX\Overlays\overlay_media_list_dialog.h" />

View File

@ -2053,6 +2053,9 @@
<ClInclude Include="Emu\RSX\Overlays\overlay_media_list_dialog.h">
<Filter>Emu\GPU\RSX\Overlays</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Common\time.hpp">
<Filter>Emu\GPU\RSX\Common</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="Emu\RSX\Common\Interpreter\FragmentInterpreter.glsl">