[GPU] Change FPS limit strategy

This commit is contained in:
Mateusz Dukat 2024-03-14 20:26:39 +01:00
parent b9061e6292
commit 5122da3af7
4 changed files with 64 additions and 37 deletions

View File

@ -20,7 +20,10 @@ DEFINE_path(
DEFINE_bool(vsync, true, "Enable VSYNC.", "GPU"); DEFINE_bool(vsync, true, "Enable VSYNC.", "GPU");
DEFINE_uint64(vsync_fps, 60, "VSYNC frames per second", "GPU"); DEFINE_uint64(framerate_limit, 60,
"Maximum frames per second. 0 = Unlimited frames.\n"
"Defaults to 60, when set to 0, and VSYNC is enabled.",
"GPU");
DEFINE_bool( DEFINE_bool(
gpu_allow_invalid_fetch_constants, true, gpu_allow_invalid_fetch_constants, true,

View File

@ -18,7 +18,7 @@ DECLARE_path(dump_shaders);
DECLARE_bool(vsync); DECLARE_bool(vsync);
DECLARE_uint64(vsync_fps); DECLARE_uint64(framerate_limit);
DECLARE_bool(gpu_allow_invalid_fetch_constants); DECLARE_bool(gpu_allow_invalid_fetch_constants);

View File

@ -50,7 +50,7 @@ __declspec(dllexport) uint32_t AmdPowerXpressRequestHighPerformance = 1;
} // extern "C" } // extern "C"
#endif // XE_PLATFORM_WIN32 #endif // XE_PLATFORM_WIN32
GraphicsSystem::GraphicsSystem() : vsync_worker_running_(false) { GraphicsSystem::GraphicsSystem() : frame_limiter_worker_running_(false) {
register_file_ = reinterpret_cast<RegisterFile*>(memory::AllocFixed( register_file_ = reinterpret_cast<RegisterFile*>(memory::AllocFixed(
nullptr, sizeof(RegisterFile), memory::AllocationType::kReserveCommit, nullptr, sizeof(RegisterFile), memory::AllocationType::kReserveCommit,
memory::PageAccess::kReadWrite)); memory::PageAccess::kReadWrite));
@ -100,55 +100,79 @@ X_STATUS GraphicsSystem::Setup(cpu::Processor* processor,
reinterpret_cast<cpu::MMIOReadCallback>(ReadRegisterThunk), reinterpret_cast<cpu::MMIOReadCallback>(ReadRegisterThunk),
reinterpret_cast<cpu::MMIOWriteCallback>(WriteRegisterThunk)); reinterpret_cast<cpu::MMIOWriteCallback>(WriteRegisterThunk));
// 60hz vsync timer. // Frame limiter thread.
vsync_worker_running_ = true; frame_limiter_worker_running_ = true;
vsync_worker_thread_ = frame_limiter_worker_thread_ =
kernel::object_ref<kernel::XHostThread>(new kernel::XHostThread( kernel::object_ref<kernel::XHostThread>(new kernel::XHostThread(
kernel_state_, 128 * 1024, 0, kernel_state_, 128 * 1024, 0,
[this]() { [this]() {
uint64_t normalized_framerate_limit =
std::max<uint64_t>(0, cvars::framerate_limit);
// If VSYNC is enabled, but frames are not limited,
// lock framerate at default value of 60
if (normalized_framerate_limit == 0 && cvars::vsync)
normalized_framerate_limit = 60;
const double vsync_duration_d = const double vsync_duration_d =
cvars::vsync cvars::vsync
? std::max<double>( ? std::max<double>(5.0,
5.0, 1000.0 / static_cast<double>(cvars::vsync_fps)) 1000.0 / static_cast<double>(
normalized_framerate_limit))
: 1.0; : 1.0;
uint64_t last_frame_time = Clock::QueryGuestTickCount(); uint64_t last_frame_time = Clock::QueryGuestTickCount();
// Sleep for 90% of the vblank duration, spin for 10% // Sleep for 90% of the vblank duration, spin for 10%
const double duration_scalar = 0.90; const double duration_scalar = 0.90;
while (vsync_worker_running_) { while (frame_limiter_worker_running_) {
const uint64_t current_time = Clock::QueryGuestTickCount(); if (cvars::vsync) {
const uint64_t tick_freq = Clock::guest_tick_frequency(); const uint64_t current_time = Clock::QueryGuestTickCount();
const uint64_t time_delta = current_time - last_frame_time; const uint64_t tick_freq = Clock::guest_tick_frequency();
const double elapsed_d = const uint64_t time_delta = current_time - last_frame_time;
static_cast<double>(time_delta) / const double elapsed_d =
(static_cast<double>(tick_freq) / 1000.0); static_cast<double>(time_delta) /
if (elapsed_d >= vsync_duration_d) { (static_cast<double>(tick_freq) / 1000.0);
last_frame_time = current_time; if (elapsed_d >= vsync_duration_d) {
last_frame_time = current_time;
// TODO(disjtqz): should recalculate the remaining time to a // TODO(disjtqz): should recalculate the remaining time to a
// vblank after MarkVblank, no idea how long the guest code // vblank after MarkVblank, no idea how long the guest code
// normally takes // normally takes
MarkVblank(); MarkVblank();
if (cvars::vsync) { if (cvars::vsync) {
const uint64_t estimated_nanoseconds = static_cast<uint64_t>( const uint64_t estimated_nanoseconds =
(vsync_duration_d * 1000000.0) * static_cast<uint64_t>(
duration_scalar); // 1000 microseconds = 1 ms (vsync_duration_d * 1000000.0) *
duration_scalar); // 1000 microseconds = 1 ms
threading::NanoSleep(estimated_nanoseconds); threading::NanoSleep(estimated_nanoseconds);
}
} }
} }
if (!cvars::vsync) { if (!cvars::vsync) {
xe::threading::Sleep(std::chrono::milliseconds(1)); MarkVblank();
if (normalized_framerate_limit > 0) {
// framerate_limit is over 0, vsync disabled
// - No VSYNC + limited frames defined by user
uint64_t framerate_limited_sleep_time =
1000000000 / normalized_framerate_limit;
xe::threading::NanoSleep(framerate_limited_sleep_time);
} else {
// framerate_limit is 0, vsync disabled
// - No VSYNC + unlimited frames
xe::threading::Sleep(std::chrono::milliseconds(1));
}
} }
} }
return 0; return 0;
}, },
kernel_state->GetIdleProcess())); kernel_state->GetIdleProcess()));
// As we run vblank interrupts the debugger must be able to suspend us. // As we run vblank interrupts the debugger must be able to suspend us.
vsync_worker_thread_->set_can_debugger_suspend(true); frame_limiter_worker_thread_->set_can_debugger_suspend(true);
vsync_worker_thread_->set_name("GPU VSync"); frame_limiter_worker_thread_->set_name("GPU Frame limiter");
vsync_worker_thread_->Create(); frame_limiter_worker_thread_->Create();
vsync_worker_thread_->thread()->set_priority( frame_limiter_worker_thread_->thread()->set_priority(
threading::ThreadPriority::kLowest); threading::ThreadPriority::kLowest);
if (cvars::trace_gpu_stream) { if (cvars::trace_gpu_stream) {
BeginTracing(); BeginTracing();
@ -164,10 +188,10 @@ void GraphicsSystem::Shutdown() {
command_processor_.reset(); command_processor_.reset();
} }
if (vsync_worker_thread_) { if (frame_limiter_worker_thread_) {
vsync_worker_running_ = false; frame_limiter_worker_running_ = false;
vsync_worker_thread_->Wait(0, 0, 0, nullptr); frame_limiter_worker_thread_->Wait(0, 0, 0, nullptr);
vsync_worker_thread_.reset(); frame_limiter_worker_thread_.reset();
} }
if (presenter_) { if (presenter_) {

View File

@ -109,8 +109,8 @@ class GraphicsSystem {
uint32_t interrupt_callback_ = 0; uint32_t interrupt_callback_ = 0;
uint32_t interrupt_callback_data_ = 0; uint32_t interrupt_callback_data_ = 0;
std::atomic<bool> vsync_worker_running_; std::atomic<bool> frame_limiter_worker_running_;
kernel::object_ref<kernel::XHostThread> vsync_worker_thread_; kernel::object_ref<kernel::XHostThread> frame_limiter_worker_thread_;
RegisterFile* register_file_; RegisterFile* register_file_;
std::unique_ptr<CommandProcessor> command_processor_; std::unique_ptr<CommandProcessor> command_processor_;