[GPU] Change FPS limit strategy

This commit is contained in:
Mateusz Dukat 2024-03-14 20:26:39 +01:00 committed by Radosław Gliński
parent b9061e6292
commit 34ed823bfc
4 changed files with 64 additions and 37 deletions

View File

@ -20,7 +20,10 @@ DEFINE_path(
DEFINE_bool(vsync, true, "Enable VSYNC.", "GPU");
DEFINE_uint64(vsync_fps, 60, "VSYNC frames per second", "GPU");
DEFINE_uint64(framerate_limit, 60,
"Maximum frames per second. 0 = Unlimited frames.\n"
"Defaults to 60, when set to 0, and VSYNC is enabled.",
"GPU");
DEFINE_bool(
gpu_allow_invalid_fetch_constants, true,

View File

@ -18,7 +18,7 @@ DECLARE_path(dump_shaders);
DECLARE_bool(vsync);
DECLARE_uint64(vsync_fps);
DECLARE_uint64(framerate_limit);
DECLARE_bool(gpu_allow_invalid_fetch_constants);

View File

@ -50,7 +50,7 @@ __declspec(dllexport) uint32_t AmdPowerXpressRequestHighPerformance = 1;
} // extern "C"
#endif // XE_PLATFORM_WIN32
GraphicsSystem::GraphicsSystem() : vsync_worker_running_(false) {
GraphicsSystem::GraphicsSystem() : frame_limiter_worker_running_(false) {
register_file_ = reinterpret_cast<RegisterFile*>(memory::AllocFixed(
nullptr, sizeof(RegisterFile), memory::AllocationType::kReserveCommit,
memory::PageAccess::kReadWrite));
@ -100,55 +100,79 @@ X_STATUS GraphicsSystem::Setup(cpu::Processor* processor,
reinterpret_cast<cpu::MMIOReadCallback>(ReadRegisterThunk),
reinterpret_cast<cpu::MMIOWriteCallback>(WriteRegisterThunk));
// 60hz vsync timer.
vsync_worker_running_ = true;
vsync_worker_thread_ =
// Frame limiter thread.
frame_limiter_worker_running_ = true;
frame_limiter_worker_thread_ =
kernel::object_ref<kernel::XHostThread>(new kernel::XHostThread(
kernel_state_, 128 * 1024, 0,
[this]() {
uint64_t normalized_framerate_limit =
std::max<uint64_t>(0, cvars::framerate_limit);
// If VSYNC is enabled, but frames are not limited,
// lock framerate at default value of 60
if (normalized_framerate_limit == 0 && cvars::vsync)
normalized_framerate_limit = 60;
const double vsync_duration_d =
cvars::vsync
? std::max<double>(
5.0, 1000.0 / static_cast<double>(cvars::vsync_fps))
? std::max<double>(5.0,
1000.0 / static_cast<double>(
normalized_framerate_limit))
: 1.0;
uint64_t last_frame_time = Clock::QueryGuestTickCount();
// Sleep for 90% of the vblank duration, spin for 10%
const double duration_scalar = 0.90;
while (vsync_worker_running_) {
const uint64_t current_time = Clock::QueryGuestTickCount();
const uint64_t tick_freq = Clock::guest_tick_frequency();
const uint64_t time_delta = current_time - last_frame_time;
const double elapsed_d =
static_cast<double>(time_delta) /
(static_cast<double>(tick_freq) / 1000.0);
if (elapsed_d >= vsync_duration_d) {
last_frame_time = current_time;
while (frame_limiter_worker_running_) {
if (cvars::vsync) {
const uint64_t current_time = Clock::QueryGuestTickCount();
const uint64_t tick_freq = Clock::guest_tick_frequency();
const uint64_t time_delta = current_time - last_frame_time;
const double elapsed_d =
static_cast<double>(time_delta) /
(static_cast<double>(tick_freq) / 1000.0);
if (elapsed_d >= vsync_duration_d) {
last_frame_time = current_time;
// TODO(disjtqz): should recalculate the remaining time to a
// vblank after MarkVblank, no idea how long the guest code
// normally takes
MarkVblank();
if (cvars::vsync) {
const uint64_t estimated_nanoseconds = static_cast<uint64_t>(
(vsync_duration_d * 1000000.0) *
duration_scalar); // 1000 microseconds = 1 ms
// TODO(disjtqz): should recalculate the remaining time to a
// vblank after MarkVblank, no idea how long the guest code
// normally takes
MarkVblank();
if (cvars::vsync) {
const uint64_t estimated_nanoseconds =
static_cast<uint64_t>(
(vsync_duration_d * 1000000.0) *
duration_scalar); // 1000 microseconds = 1 ms
threading::NanoSleep(estimated_nanoseconds);
threading::NanoSleep(estimated_nanoseconds);
}
}
}
if (!cvars::vsync) {
xe::threading::Sleep(std::chrono::milliseconds(1));
MarkVblank();
if (normalized_framerate_limit > 0) {
// framerate_limit is over 0, vsync disabled
// - No VSYNC + limited frames defined by user
uint64_t framerate_limited_sleep_time =
1000000000 / normalized_framerate_limit;
xe::threading::NanoSleep(framerate_limited_sleep_time);
} else {
// framerate_limit is 0, vsync disabled
// - No VSYNC + unlimited frames
xe::threading::Sleep(std::chrono::milliseconds(1));
}
}
}
return 0;
},
kernel_state->GetIdleProcess()));
// As we run vblank interrupts the debugger must be able to suspend us.
vsync_worker_thread_->set_can_debugger_suspend(true);
vsync_worker_thread_->set_name("GPU VSync");
vsync_worker_thread_->Create();
vsync_worker_thread_->thread()->set_priority(
frame_limiter_worker_thread_->set_can_debugger_suspend(true);
frame_limiter_worker_thread_->set_name("GPU Frame limiter");
frame_limiter_worker_thread_->Create();
frame_limiter_worker_thread_->thread()->set_priority(
threading::ThreadPriority::kLowest);
if (cvars::trace_gpu_stream) {
BeginTracing();
@ -164,10 +188,10 @@ void GraphicsSystem::Shutdown() {
command_processor_.reset();
}
if (vsync_worker_thread_) {
vsync_worker_running_ = false;
vsync_worker_thread_->Wait(0, 0, 0, nullptr);
vsync_worker_thread_.reset();
if (frame_limiter_worker_thread_) {
frame_limiter_worker_running_ = false;
frame_limiter_worker_thread_->Wait(0, 0, 0, nullptr);
frame_limiter_worker_thread_.reset();
}
if (presenter_) {

View File

@ -109,8 +109,8 @@ class GraphicsSystem {
uint32_t interrupt_callback_ = 0;
uint32_t interrupt_callback_data_ = 0;
std::atomic<bool> vsync_worker_running_;
kernel::object_ref<kernel::XHostThread> vsync_worker_thread_;
std::atomic<bool> frame_limiter_worker_running_;
kernel::object_ref<kernel::XHostThread> frame_limiter_worker_thread_;
RegisterFile* register_file_;
std::unique_ptr<CommandProcessor> command_processor_;