Merge remote-tracking branch 'JoelLinn/fix-cpu-clock' into canary
This commit is contained in:
parent
feb4f0c2a5
commit
03e926605a
|
@ -10,50 +10,135 @@
|
|||
#include "xenia/base/clock.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <climits>
|
||||
#include <limits>
|
||||
#include <mutex>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/math.h"
|
||||
|
||||
DEFINE_bool(clock_no_scaling, false,
|
||||
"Disable scaling code. Time management and locking is bypassed. "
|
||||
"Guest system time is directly pulled from host.",
|
||||
"CPU");
|
||||
DEFINE_bool(clock_source_raw, false,
|
||||
"Use the RDTSC instruction as the time source. "
|
||||
"Host CPU must support invariant TSC. ",
|
||||
"CPU");
|
||||
|
||||
namespace xe {
|
||||
|
||||
// Time scalar applied to all time operations.
|
||||
double guest_time_scalar_ = 1.0;
|
||||
// Tick frequency of guest.
|
||||
uint64_t guest_tick_frequency_ = Clock::host_tick_frequency();
|
||||
uint64_t guest_tick_frequency_ = Clock::host_tick_frequency_platform();
|
||||
// Base FILETIME of the guest system from app start.
|
||||
uint64_t guest_system_time_base_ = Clock::QueryHostSystemTime();
|
||||
// Combined time and frequency scalar (computed by RecomputeGuestTickScalar).
|
||||
double guest_tick_scalar_ = 1.0;
|
||||
// Combined time and frequency ratio between host and guest.
|
||||
// Split in numerator (first) and denominator (second).
|
||||
// Computed by RecomputeGuestTickScalar.
|
||||
std::pair<uint64_t, uint64_t> guest_tick_ratio_ = std::make_pair(1, 1);
|
||||
|
||||
// Native guest ticks.
|
||||
thread_local uint64_t guest_tick_count_ = 0;
|
||||
// 100ns ticks, relative to guest_system_time_base_.
|
||||
thread_local uint64_t guest_time_filetime_ = 0;
|
||||
uint64_t last_guest_tick_count_ = 0;
|
||||
// Last sampled host tick count.
|
||||
thread_local uint64_t last_host_tick_count_ = Clock::QueryHostTickCount();
|
||||
uint64_t last_host_tick_count_ = Clock::QueryHostTickCount();
|
||||
// Mutex to ensure last_host_tick_count_ and last_guest_tick_count_ are in sync
|
||||
std::mutex tick_mutex_;
|
||||
|
||||
void RecomputeGuestTickScalar() {
|
||||
guest_tick_scalar_ = (guest_tick_frequency_ * guest_time_scalar_) /
|
||||
static_cast<double>(Clock::host_tick_frequency());
|
||||
// Create a rational number with numerator (first) and denominator (second)
|
||||
auto frac =
|
||||
std::make_pair(guest_tick_frequency_, Clock::QueryHostTickFrequency());
|
||||
// Doing it this way ensures we don't mess up our frequency scaling and
|
||||
// precisely controls the precision the guest_time_scalar_ can have.
|
||||
if (guest_time_scalar_ > 1.0) {
|
||||
frac.first *= static_cast<uint64_t>(guest_time_scalar_ * 10.0);
|
||||
frac.second *= 10;
|
||||
} else {
|
||||
frac.first *= 10;
|
||||
frac.second *= static_cast<uint64_t>(10.0 / guest_time_scalar_);
|
||||
}
|
||||
// Keep this a rational calculation and reduce the fraction
|
||||
reduce_fraction(frac);
|
||||
|
||||
std::lock_guard<std::mutex> lock(tick_mutex_);
|
||||
guest_tick_ratio_ = frac;
|
||||
}
|
||||
|
||||
void UpdateGuestClock() {
|
||||
// Update the guest timer for all threads.
|
||||
// Return a copy of the value so locking is reduced.
|
||||
uint64_t UpdateGuestClock() {
|
||||
uint64_t host_tick_count = Clock::QueryHostTickCount();
|
||||
uint64_t host_tick_delta = host_tick_count > last_host_tick_count_
|
||||
? host_tick_count - last_host_tick_count_
|
||||
: 0;
|
||||
last_host_tick_count_ = host_tick_count;
|
||||
uint64_t guest_tick_delta = uint64_t(host_tick_delta * guest_tick_scalar_);
|
||||
guest_tick_count_ += guest_tick_delta;
|
||||
guest_time_filetime_ += (guest_tick_delta * 10000000) / guest_tick_frequency_;
|
||||
|
||||
if (cvars::clock_no_scaling) {
|
||||
// Nothing to update, calculate on the fly
|
||||
return host_tick_count * guest_tick_ratio_.first / guest_tick_ratio_.second;
|
||||
}
|
||||
|
||||
std::unique_lock<std::mutex> lock(tick_mutex_, std::defer_lock);
|
||||
if (lock.try_lock()) {
|
||||
// Translate host tick count to guest tick count.
|
||||
uint64_t host_tick_delta = host_tick_count > last_host_tick_count_
|
||||
? host_tick_count - last_host_tick_count_
|
||||
: 0;
|
||||
last_host_tick_count_ = host_tick_count;
|
||||
uint64_t guest_tick_delta =
|
||||
host_tick_delta * guest_tick_ratio_.first / guest_tick_ratio_.second;
|
||||
last_guest_tick_count_ += guest_tick_delta;
|
||||
return last_guest_tick_count_;
|
||||
} else {
|
||||
// Wait until another thread has finished updating the clock.
|
||||
lock.lock();
|
||||
return last_guest_tick_count_;
|
||||
}
|
||||
}
|
||||
|
||||
// Offset of the current guest system file time relative to the guest base time.
|
||||
inline uint64_t QueryGuestSystemTimeOffset() {
|
||||
if (cvars::clock_no_scaling) {
|
||||
return Clock::QueryHostSystemTime() - guest_system_time_base_;
|
||||
}
|
||||
|
||||
auto guest_tick_count = UpdateGuestClock();
|
||||
|
||||
uint64_t numerator = 10000000; // 100ns/10MHz resolution
|
||||
uint64_t denominator = guest_tick_frequency_;
|
||||
reduce_fraction(numerator, denominator);
|
||||
|
||||
return guest_tick_count * numerator / denominator;
|
||||
}
|
||||
|
||||
uint64_t Clock::QueryHostTickFrequency() {
|
||||
if (cvars::clock_source_raw) {
|
||||
return host_tick_frequency_raw();
|
||||
} else {
|
||||
return host_tick_frequency_platform();
|
||||
}
|
||||
}
|
||||
uint64_t Clock::QueryHostTickCount() {
|
||||
if (cvars::clock_source_raw) {
|
||||
return host_tick_count_raw();
|
||||
} else {
|
||||
return host_tick_count_platform();
|
||||
}
|
||||
}
|
||||
|
||||
double Clock::guest_time_scalar() { return guest_time_scalar_; }
|
||||
|
||||
void Clock::set_guest_time_scalar(double scalar) {
|
||||
if (cvars::clock_no_scaling) {
|
||||
return;
|
||||
}
|
||||
|
||||
guest_time_scalar_ = scalar;
|
||||
RecomputeGuestTickScalar();
|
||||
}
|
||||
|
||||
std::pair<uint64_t, uint64_t> Clock::guest_tick_ratio() {
|
||||
std::lock_guard<std::mutex> lock(tick_mutex_);
|
||||
return guest_tick_ratio_;
|
||||
}
|
||||
|
||||
uint64_t Clock::guest_tick_frequency() { return guest_tick_frequency_; }
|
||||
|
||||
void Clock::set_guest_tick_frequency(uint64_t frequency) {
|
||||
|
@ -68,43 +153,58 @@ void Clock::set_guest_system_time_base(uint64_t time_base) {
|
|||
}
|
||||
|
||||
uint64_t Clock::QueryGuestTickCount() {
|
||||
UpdateGuestClock();
|
||||
return guest_tick_count_;
|
||||
auto guest_tick_count = UpdateGuestClock();
|
||||
return guest_tick_count;
|
||||
}
|
||||
|
||||
uint64_t Clock::QueryGuestSystemTime() {
|
||||
UpdateGuestClock();
|
||||
return guest_system_time_base_ + guest_time_filetime_;
|
||||
if (cvars::clock_no_scaling) {
|
||||
return Clock::QueryHostSystemTime();
|
||||
}
|
||||
|
||||
auto guest_system_time_offset = QueryGuestSystemTimeOffset();
|
||||
return guest_system_time_base_ + guest_system_time_offset;
|
||||
}
|
||||
|
||||
uint32_t Clock::QueryGuestUptimeMillis() {
|
||||
UpdateGuestClock();
|
||||
uint64_t uptime_millis = guest_tick_count_ / (guest_tick_frequency_ / 1000);
|
||||
uint32_t result = uint32_t(std::min(uptime_millis, uint64_t(UINT_MAX)));
|
||||
return result;
|
||||
}
|
||||
|
||||
void Clock::SetGuestTickCount(uint64_t tick_count) {
|
||||
last_host_tick_count_ = Clock::QueryHostTickCount();
|
||||
guest_tick_count_ = tick_count;
|
||||
return static_cast<uint32_t>(
|
||||
std::min<uint64_t>(QueryGuestSystemTimeOffset() / 10000,
|
||||
std::numeric_limits<uint32_t>::max()));
|
||||
}
|
||||
|
||||
void Clock::SetGuestSystemTime(uint64_t system_time) {
|
||||
last_host_tick_count_ = Clock::QueryHostTickCount();
|
||||
guest_time_filetime_ = system_time - guest_system_time_base_;
|
||||
if (cvars::clock_no_scaling) {
|
||||
// Time is fixed to host time.
|
||||
return;
|
||||
}
|
||||
|
||||
// Query the filetime offset to calculate a new base time.
|
||||
auto guest_system_time_offset = QueryGuestSystemTimeOffset();
|
||||
guest_system_time_base_ = system_time - guest_system_time_offset;
|
||||
}
|
||||
|
||||
uint32_t Clock::ScaleGuestDurationMillis(uint32_t guest_ms) {
|
||||
if (guest_ms == UINT_MAX) {
|
||||
return UINT_MAX;
|
||||
if (cvars::clock_no_scaling) {
|
||||
return guest_ms;
|
||||
}
|
||||
|
||||
constexpr uint64_t max = std::numeric_limits<uint32_t>::max();
|
||||
|
||||
if (guest_ms >= max) {
|
||||
return max;
|
||||
} else if (!guest_ms) {
|
||||
return 0;
|
||||
}
|
||||
uint64_t scaled_ms = uint64_t(uint64_t(guest_ms) * guest_time_scalar_);
|
||||
return uint32_t(std::min(scaled_ms, uint64_t(UINT_MAX)));
|
||||
uint64_t scaled_ms = static_cast<uint64_t>(
|
||||
(static_cast<uint64_t>(guest_ms) * guest_time_scalar_));
|
||||
return static_cast<uint32_t>(std::min(scaled_ms, max));
|
||||
}
|
||||
|
||||
int64_t Clock::ScaleGuestDurationFileTime(int64_t guest_file_time) {
|
||||
if (cvars::clock_no_scaling) {
|
||||
return static_cast<uint64_t>(guest_file_time);
|
||||
}
|
||||
|
||||
if (!guest_file_time) {
|
||||
return 0;
|
||||
} else if (guest_file_time > 0) {
|
||||
|
@ -116,17 +216,23 @@ int64_t Clock::ScaleGuestDurationFileTime(int64_t guest_file_time) {
|
|||
return static_cast<int64_t>(guest_time) + scaled_time;
|
||||
} else {
|
||||
// Relative time.
|
||||
uint64_t scaled_file_time =
|
||||
uint64_t(uint64_t(guest_file_time) * guest_time_scalar_);
|
||||
uint64_t scaled_file_time = static_cast<uint64_t>(
|
||||
(static_cast<uint64_t>(guest_file_time) * guest_time_scalar_));
|
||||
// TODO(benvanik): check for overflow?
|
||||
return scaled_file_time;
|
||||
}
|
||||
}
|
||||
|
||||
void Clock::ScaleGuestDurationTimeval(int32_t* tv_sec, int32_t* tv_usec) {
|
||||
uint64_t scaled_sec = uint64_t(uint64_t(*tv_sec) * guest_tick_scalar_);
|
||||
uint64_t scaled_usec = uint64_t(uint64_t(*tv_usec) * guest_time_scalar_);
|
||||
if (scaled_usec > UINT_MAX) {
|
||||
if (cvars::clock_no_scaling) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t scaled_sec = static_cast<uint64_t>(static_cast<uint64_t>(*tv_sec) *
|
||||
guest_time_scalar_);
|
||||
uint64_t scaled_usec = static_cast<uint64_t>(static_cast<uint64_t>(*tv_usec) *
|
||||
guest_time_scalar_);
|
||||
if (scaled_usec > std::numeric_limits<uint32_t>::max()) {
|
||||
uint64_t overflow_sec = scaled_usec / 1000000;
|
||||
scaled_usec -= overflow_sec * 1000000;
|
||||
scaled_sec += overflow_sec;
|
||||
|
|
|
@ -12,12 +12,25 @@
|
|||
|
||||
#include <cstdint>
|
||||
|
||||
#include "xenia/base/cvar.h"
|
||||
|
||||
DECLARE_bool(clock_no_scaling);
|
||||
DECLARE_bool(clock_source_raw);
|
||||
|
||||
namespace xe {
|
||||
|
||||
class Clock {
|
||||
public:
|
||||
// Host ticks-per-second.
|
||||
static uint64_t host_tick_frequency();
|
||||
// Host ticks-per-second. Generally QueryHostTickFrequency should be used.
|
||||
// Either from platform suplied time source or from hardware directly.
|
||||
static uint64_t host_tick_frequency_platform();
|
||||
static uint64_t host_tick_frequency_raw();
|
||||
// Host tick count. Generally QueryHostTickCount() should be used.
|
||||
static uint64_t host_tick_count_platform();
|
||||
static uint64_t host_tick_count_raw();
|
||||
|
||||
// Queries the host tick frequency.
|
||||
static uint64_t QueryHostTickFrequency();
|
||||
// Queries the current host tick count.
|
||||
static uint64_t QueryHostTickCount();
|
||||
// Host time, in FILETIME format.
|
||||
|
@ -30,6 +43,8 @@ class Clock {
|
|||
// Sets the guest time scalar, adjusting tick and wall clock speed.
|
||||
// Ex: 1x=normal, 2x=double speed, 1/2x=half speed.
|
||||
static void set_guest_time_scalar(double scalar);
|
||||
// Get the tick ration between host and guest including time scaling if set.
|
||||
static std::pair<uint64_t, uint64_t> guest_tick_ratio();
|
||||
// Guest ticks-per-second.
|
||||
static uint64_t guest_tick_frequency();
|
||||
// Sets the guest ticks-per-second.
|
||||
|
@ -39,6 +54,7 @@ class Clock {
|
|||
// Sets the guest time base, used for computing the system time.
|
||||
// By default this is the current system time.
|
||||
static void set_guest_system_time_base(uint64_t time_base);
|
||||
|
||||
// Queries the current guest tick count, accounting for frequency adjustment
|
||||
// and scaling.
|
||||
static uint64_t QueryGuestTickCount();
|
||||
|
@ -47,9 +63,7 @@ class Clock {
|
|||
// Queries the milliseconds since the guest began, accounting for scaling.
|
||||
static uint32_t QueryGuestUptimeMillis();
|
||||
|
||||
// Sets the guest tick count for the current thread.
|
||||
static void SetGuestTickCount(uint64_t tick_count);
|
||||
// Sets the system time for the current thread.
|
||||
// Sets the system time of the guest.
|
||||
static void SetGuestSystemTime(uint64_t system_time);
|
||||
|
||||
// Scales a time duration in milliseconds, from guest time.
|
||||
|
|
|
@ -14,14 +14,14 @@
|
|||
|
||||
namespace xe {
|
||||
|
||||
uint64_t Clock::host_tick_frequency() {
|
||||
uint64_t Clock::host_tick_frequency_platform() {
|
||||
timespec res;
|
||||
clock_getres(CLOCK_MONOTONIC_RAW, &res);
|
||||
|
||||
return uint64_t(res.tv_sec) + uint64_t(res.tv_nsec) * 1000000000ull;
|
||||
}
|
||||
|
||||
uint64_t Clock::QueryHostTickCount() {
|
||||
uint64_t Clock::host_tick_count_platform() {
|
||||
timespec res;
|
||||
clock_gettime(CLOCK_MONOTONIC_RAW, &res);
|
||||
|
||||
|
@ -40,7 +40,7 @@ uint64_t Clock::QueryHostSystemTime() {
|
|||
}
|
||||
|
||||
uint64_t Clock::QueryHostUptimeMillis() {
|
||||
return QueryHostTickCount() / (host_tick_frequency() / 1000);
|
||||
return host_tick_count_platform() * 1000 / host_tick_frequency_platform();
|
||||
}
|
||||
|
||||
} // namespace xe
|
|
@ -13,15 +13,13 @@
|
|||
|
||||
namespace xe {
|
||||
|
||||
uint64_t Clock::host_tick_frequency() {
|
||||
static LARGE_INTEGER frequency = {{0}};
|
||||
if (!frequency.QuadPart) {
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
}
|
||||
uint64_t Clock::host_tick_frequency_platform() {
|
||||
LARGE_INTEGER frequency;
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
return frequency.QuadPart;
|
||||
}
|
||||
|
||||
uint64_t Clock::QueryHostTickCount() {
|
||||
uint64_t Clock::host_tick_count_platform() {
|
||||
LARGE_INTEGER counter;
|
||||
uint64_t time = 0;
|
||||
if (QueryPerformanceCounter(&counter)) {
|
||||
|
@ -37,7 +35,7 @@ uint64_t Clock::QueryHostSystemTime() {
|
|||
}
|
||||
|
||||
uint64_t Clock::QueryHostUptimeMillis() {
|
||||
return QueryHostTickCount() / (host_tick_frequency() / 1000);
|
||||
return host_tick_count_platform() * 1000 / host_tick_frequency_platform();
|
||||
}
|
||||
|
||||
} // namespace xe
|
||||
|
|
|
@ -0,0 +1,105 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2019 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/base/platform.h"
|
||||
|
||||
#if XE_ARCH_AMD64
|
||||
|
||||
#include "xenia/base/clock.h"
|
||||
#include "xenia/base/logging.h"
|
||||
|
||||
// Wrap all these different cpu compiler intrinsics.
|
||||
// So no inline assembler here and the compiler will remove the clutter.
|
||||
#if XE_COMPILER_MSVC
|
||||
#define xe_cpu_cpuid(level, eax, ebx, ecx, edx) \
|
||||
{ \
|
||||
int __xe_cpuid_registers_[4]; \
|
||||
__cpuid(__xe_cpuid_registers_, (level)); \
|
||||
(eax) = static_cast<uint32_t>(__xe_cpuid_registers_[0]); \
|
||||
(ebx) = static_cast<uint32_t>(__xe_cpuid_registers_[1]); \
|
||||
(ecx) = static_cast<uint32_t>(__xe_cpuid_registers_[2]); \
|
||||
(edx) = static_cast<uint32_t>(__xe_cpuid_registers_[3]); \
|
||||
}
|
||||
#define xe_cpu_rdtsc() __rdtsc()
|
||||
#elif XE_COMPILER_CLANG || XE_COMPILER_GNUC
|
||||
#include <cpuid.h>
|
||||
#define xe_cpu_cpuid(level, eax, ebx, ecx, edx) \
|
||||
__cpuid((level), (eax), (ebx), (ecx), (edx));
|
||||
#define xe_cpu_rdtsc() __rdtsc()
|
||||
#else
|
||||
#error "No cpu instruction wrappers for current compiler implemented."
|
||||
#endif
|
||||
|
||||
#define CLOCK_FATAL(msg) \
|
||||
xe::FatalError( \
|
||||
"The raw clock source is not supported on your CPU. \n" \
|
||||
"%s \n" \
|
||||
"Set the cvar 'clock_source_raw' to 'false'.", \
|
||||
(msg));
|
||||
|
||||
namespace xe {
|
||||
// Getting the TSC frequency can be a bit tricky. This method here only works on
|
||||
// Intel as it seems. There is no easy way to get the frequency outside of ring0
|
||||
// on AMD, so we fail gracefully if not possible.
|
||||
uint64_t Clock::host_tick_frequency_raw() {
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
|
||||
// 00H Get max supported cpuid level.
|
||||
xe_cpu_cpuid(0x0, eax, ebx, ecx, edx);
|
||||
auto max_cpuid = eax;
|
||||
// 80000000H Get max extended cpuid level
|
||||
xe_cpu_cpuid(0x80000000, eax, ebx, ecx, edx);
|
||||
auto max_cpuid_ex = eax;
|
||||
|
||||
// 80000007H Get extended power feature info
|
||||
if (max_cpuid_ex >= 0x80000007) {
|
||||
xe_cpu_cpuid(0x80000007, eax, ebx, ecx, edx);
|
||||
// Invariant TSC bit at position 8
|
||||
auto tsc_invariant = edx & (1 << 8);
|
||||
// If the TSC is not invariant it will change its frequency with power
|
||||
// states and across cores.
|
||||
if (!tsc_invariant) {
|
||||
CLOCK_FATAL("The CPU has no invariant TSC.");
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
CLOCK_FATAL("Unclear if the CPU has an invariant TSC.")
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (max_cpuid >= 0x15) {
|
||||
// 15H Get TSC/Crystal ratio and Crystal Hz.
|
||||
xe_cpu_cpuid(0x15, eax, ebx, ecx, edx);
|
||||
uint64_t ratio_num = ebx;
|
||||
uint64_t ratio_den = eax;
|
||||
uint64_t cryst_freq = ecx;
|
||||
// For some CPUs, Crystal frequency is not reported.
|
||||
if (ratio_num && ratio_den && cryst_freq) {
|
||||
// If it is, calculate the TSC frequency
|
||||
auto tsc_freq = cryst_freq * ratio_num / ratio_den;
|
||||
}
|
||||
}
|
||||
|
||||
if (max_cpuid >= 0x16) {
|
||||
// 16H Get CPU base frequency MHz in EAX.
|
||||
xe_cpu_cpuid(0x16, eax, ebx, ecx, edx);
|
||||
uint64_t cpu_base_freq = static_cast<uint64_t>(eax) * 1000000;
|
||||
assert(cpu_base_freq);
|
||||
return cpu_base_freq;
|
||||
}
|
||||
|
||||
CLOCK_FATAL("The clock frequency could not be determined.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t Clock::host_tick_count_raw() { return xe_cpu_rdtsc(); }
|
||||
|
||||
} // namespace xe
|
||||
|
||||
#endif
|
|
@ -15,6 +15,7 @@
|
|||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include <type_traits>
|
||||
#include "xenia/base/platform.h"
|
||||
|
||||
|
@ -59,6 +60,34 @@ T next_pow2(T value) {
|
|||
return value;
|
||||
}
|
||||
|
||||
#if __cpp_lib_gcd_lcm
|
||||
template <typename T>
|
||||
inline constexpr T greatest_common_divisor(T a, T b) {
|
||||
return std::gcd(a, b);
|
||||
}
|
||||
#else
|
||||
template <typename T>
|
||||
constexpr T greatest_common_divisor(T a, T b) {
|
||||
// Use the Euclid algorithm to calculate the greatest common divisor
|
||||
while (b) {
|
||||
a = std::exchange(b, a % b);
|
||||
}
|
||||
return a;
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
inline constexpr void reduce_fraction(T& numerator, T& denominator) {
|
||||
auto gcd = greatest_common_divisor(numerator, denominator);
|
||||
numerator /= gcd;
|
||||
denominator /= gcd;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline constexpr void reduce_fraction(std::pair<T, T>& fraction) {
|
||||
reduce_fraction<T>(fraction.first, fraction.second);
|
||||
}
|
||||
|
||||
constexpr uint32_t make_bitmask(uint32_t a, uint32_t b) {
|
||||
return (static_cast<uint32_t>(-1) >> (31 - b)) & ~((1u << a) - 1);
|
||||
}
|
||||
|
|
|
@ -440,9 +440,34 @@ EMITTER_OPCODE_TABLE(OPCODE_ROUND, ROUND_F32, ROUND_F64, ROUND_V128);
|
|||
// ============================================================================
|
||||
struct LOAD_CLOCK : Sequence<LOAD_CLOCK, I<OPCODE_LOAD_CLOCK, I64Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
// It'd be cool to call QueryPerformanceCounter directly, but w/e.
|
||||
e.CallNative(LoadClock);
|
||||
e.mov(i.dest, e.rax);
|
||||
// When scaling is disabled and the raw clock source is selected, the code
|
||||
// in the Clock class is actually just forwarding tick counts after one
|
||||
// simple multiply and division. In that case we rather bake the scaling in
|
||||
// here to cut extra function calls with CPU cache misses and stack frame
|
||||
// overhead.
|
||||
if (cvars::clock_no_scaling && cvars::clock_source_raw) {
|
||||
auto ratio = Clock::guest_tick_ratio();
|
||||
// The 360 CPU is an in-order CPU, AMD64 usually isn't. Without
|
||||
// mfence/lfence magic the rdtsc instruction can be executed sooner or
|
||||
// later in the cache window. Since it's resolution however is much higher
|
||||
// than the 360's mftb instruction this can safely be ignored.
|
||||
|
||||
// Read time stamp in edx (high part) and eax (low part).
|
||||
e.rdtsc();
|
||||
// Make it a 64 bit number in rax.
|
||||
e.shl(e.rdx, 32);
|
||||
e.or_(e.rax, e.rdx);
|
||||
// Apply tick frequency scaling.
|
||||
e.mov(e.rcx, ratio.first);
|
||||
e.mul(e.rcx);
|
||||
// We actually now have a 128 bit number in rdx:rax.
|
||||
e.mov(e.rcx, ratio.second);
|
||||
e.div(e.rcx);
|
||||
e.mov(i.dest, e.rax);
|
||||
} else {
|
||||
e.CallNative(LoadClock);
|
||||
e.mov(i.dest, e.rax);
|
||||
}
|
||||
}
|
||||
static uint64_t LoadClock(void* raw_context) {
|
||||
return Clock::QueryGuestTickCount();
|
||||
|
|
|
@ -823,10 +823,6 @@ struct ThreadSavedState {
|
|||
bool is_main_thread; // Is this the main thread?
|
||||
bool is_running;
|
||||
|
||||
// Clock settings (invalid if not running)
|
||||
uint64_t tick_count_;
|
||||
uint64_t system_time_;
|
||||
|
||||
uint32_t apc_head;
|
||||
uint32_t tls_static_address;
|
||||
uint32_t tls_dynamic_address;
|
||||
|
@ -895,10 +891,6 @@ bool XThread::Save(ByteStream* stream) {
|
|||
state.stack_alloc_size = stack_alloc_size_;
|
||||
|
||||
if (running_) {
|
||||
state.tick_count_ = Clock::QueryGuestTickCount();
|
||||
state.system_time_ =
|
||||
Clock::QueryGuestSystemTime() - Clock::guest_system_time_base();
|
||||
|
||||
// Context information
|
||||
auto context = thread_state_->context();
|
||||
state.context.lr = context->lr;
|
||||
|
@ -1008,10 +1000,6 @@ object_ref<XThread> XThread::Restore(KernelState* kernel_state,
|
|||
// Profiler needs to know about the thread.
|
||||
xe::Profiler::ThreadEnter(thread->name().c_str());
|
||||
|
||||
// Setup the time now that we're in the thread.
|
||||
Clock::SetGuestTickCount(state.tick_count_);
|
||||
Clock::SetGuestSystemTime(state.system_time_);
|
||||
|
||||
current_xthread_tls_ = thread;
|
||||
current_thread_ = thread;
|
||||
|
||||
|
|
Loading…
Reference in New Issue