From 922f1f220a2712eb1a705daea2cd4a380408fa18 Mon Sep 17 00:00:00 2001 From: Joel Linn Date: Fri, 22 Nov 2019 03:42:15 +0100 Subject: [PATCH] [CPU] Implement mftb instruction natively. When the cvars clock_no_scaling and clock_source_raw are set, tick counts will be directly calculated in the emitted code. --- src/xenia/base/clock.cc | 5 ++++ src/xenia/base/clock.h | 2 ++ src/xenia/cpu/backend/x64/x64_sequences.cc | 33 +++++++++++++++++++--- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/xenia/base/clock.cc b/src/xenia/base/clock.cc index af18967f2..398f16708 100644 --- a/src/xenia/base/clock.cc +++ b/src/xenia/base/clock.cc @@ -134,6 +134,11 @@ void Clock::set_guest_time_scalar(double scalar) { RecomputeGuestTickScalar(); } +std::pair Clock::guest_tick_ratio() { + std::lock_guard lock(tick_mutex_); + return guest_tick_ratio_; +} + uint64_t Clock::guest_tick_frequency() { return guest_tick_frequency_; } void Clock::set_guest_tick_frequency(uint64_t frequency) { diff --git a/src/xenia/base/clock.h b/src/xenia/base/clock.h index 00f1ff449..9ab3728c4 100644 --- a/src/xenia/base/clock.h +++ b/src/xenia/base/clock.h @@ -43,6 +43,8 @@ class Clock { // Sets the guest time scalar, adjusting tick and wall clock speed. // Ex: 1x=normal, 2x=double speed, 1/2x=half speed. static void set_guest_time_scalar(double scalar); + // Get the tick ration between host and guest including time scaling if set. + static std::pair guest_tick_ratio(); // Guest ticks-per-second. static uint64_t guest_tick_frequency(); // Sets the guest ticks-per-second. diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index 7d18cb4d0..6ae28b468 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * + * Copyright 2019 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -440,9 +440,34 @@ EMITTER_OPCODE_TABLE(OPCODE_ROUND, ROUND_F32, ROUND_F64, ROUND_V128); // ============================================================================ struct LOAD_CLOCK : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - // It'd be cool to call QueryPerformanceCounter directly, but w/e. - e.CallNative(LoadClock); - e.mov(i.dest, e.rax); + // When scaling is disabled and the raw clock source is selected, the code + // in the Clock class is actually just forwarding tick counts after one + // simple multiply and division. In that case we rather bake the scaling in + // here to cut extra function calls with CPU cache misses and stack frame + // overhead. + if (cvars::clock_no_scaling && cvars::clock_source_raw) { + auto ratio = Clock::guest_tick_ratio(); + // The 360 CPU is an in-order CPU, AMD64 usually isn't. Without + // mfence/lfence magic the rdtsc instruction can be executed sooner or + // later in the cache window. Since it's resolution however is much higher + // than the 360's mftb instruction this can safely be ignored. + + // Read time stamp in edx (high part) and eax (low part). + e.rdtsc(); + // Make it a 64 bit number in rax. + e.shl(e.rdx, 32); + e.or_(e.rax, e.rdx); + // Apply tick frequency scaling. + e.mov(e.rcx, ratio.first); + e.mul(e.rcx); + // We actually now have a 128 bit number in rdx:rax. + e.mov(e.rcx, ratio.second); + e.div(e.rcx); + e.mov(i.dest, e.rax); + } else { + e.CallNative(LoadClock); + e.mov(i.dest, e.rax); + } } static uint64_t LoadClock(void* raw_context) { return Clock::QueryGuestTickCount();