forked from ShuriZma/suyu
(wall, native)_clock: Add GetGPUTick
Allows us to directly calculate the GPU tick without double conversion to and from the host clock tick.
This commit is contained in:
parent
9dcc7bde8b
commit
907507886d
|
@ -32,6 +32,10 @@ public:
|
||||||
return GetHostTicksElapsed() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den;
|
return GetHostTicksElapsed() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 GetGPUTick() const override {
|
||||||
|
return GetHostTicksElapsed() * NsToGPUTickRatio::num / NsToGPUTickRatio::den;
|
||||||
|
}
|
||||||
|
|
||||||
u64 GetHostTicksNow() const override {
|
u64 GetHostTicksNow() const override {
|
||||||
return static_cast<u64>(SteadyClock::Now().time_since_epoch().count());
|
return static_cast<u64>(SteadyClock::Now().time_since_epoch().count());
|
||||||
}
|
}
|
||||||
|
@ -52,12 +56,12 @@ std::unique_ptr<WallClock> CreateOptimalClock() {
|
||||||
#ifdef ARCHITECTURE_x86_64
|
#ifdef ARCHITECTURE_x86_64
|
||||||
const auto& caps = GetCPUCaps();
|
const auto& caps = GetCPUCaps();
|
||||||
|
|
||||||
if (caps.invariant_tsc && caps.tsc_frequency >= WallClock::CNTFRQ) {
|
if (caps.invariant_tsc && caps.tsc_frequency >= WallClock::GPUTickFreq) {
|
||||||
return std::make_unique<X64::NativeClock>(caps.tsc_frequency);
|
return std::make_unique<X64::NativeClock>(caps.tsc_frequency);
|
||||||
} else {
|
} else {
|
||||||
// Fallback to StandardWallClock if the hardware TSC
|
// Fallback to StandardWallClock if the hardware TSC
|
||||||
// - Is not invariant
|
// - Is not invariant
|
||||||
// - Is not more precise than CNTFRQ
|
// - Is not more precise than GPUTickFreq
|
||||||
return std::make_unique<StandardWallClock>();
|
return std::make_unique<StandardWallClock>();
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -13,7 +13,8 @@ namespace Common {
|
||||||
|
|
||||||
class WallClock {
|
class WallClock {
|
||||||
public:
|
public:
|
||||||
static constexpr u64 CNTFRQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz
|
static constexpr u64 CNTFRQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz
|
||||||
|
static constexpr u64 GPUTickFreq = 614'400'000; // GM20B GPU Tick Frequency = 614.4 MHz
|
||||||
|
|
||||||
virtual ~WallClock() = default;
|
virtual ~WallClock() = default;
|
||||||
|
|
||||||
|
@ -29,6 +30,9 @@ public:
|
||||||
/// @returns The guest CNTPCT ticks since the construction of this clock.
|
/// @returns The guest CNTPCT ticks since the construction of this clock.
|
||||||
virtual u64 GetCNTPCT() const = 0;
|
virtual u64 GetCNTPCT() const = 0;
|
||||||
|
|
||||||
|
/// @returns The guest GPU ticks since the construction of this clock.
|
||||||
|
virtual u64 GetGPUTick() const = 0;
|
||||||
|
|
||||||
/// @returns The raw host timer ticks since an indeterminate epoch.
|
/// @returns The raw host timer ticks since an indeterminate epoch.
|
||||||
virtual u64 GetHostTicksNow() const = 0;
|
virtual u64 GetHostTicksNow() const = 0;
|
||||||
|
|
||||||
|
@ -46,6 +50,10 @@ public:
|
||||||
return us * UsToCNTPCTRatio::num / UsToCNTPCTRatio::den;
|
return us * UsToCNTPCTRatio::num / UsToCNTPCTRatio::den;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline u64 NSToGPUTick(u64 ns) {
|
||||||
|
return ns * NsToGPUTickRatio::num / NsToGPUTickRatio::den;
|
||||||
|
}
|
||||||
|
|
||||||
static inline u64 CNTPCTToNS(u64 cntpct) {
|
static inline u64 CNTPCTToNS(u64 cntpct) {
|
||||||
return cntpct * NsToCNTPCTRatio::den / NsToCNTPCTRatio::num;
|
return cntpct * NsToCNTPCTRatio::den / NsToCNTPCTRatio::num;
|
||||||
}
|
}
|
||||||
|
@ -54,6 +62,14 @@ public:
|
||||||
return cntpct * UsToCNTPCTRatio::den / UsToCNTPCTRatio::num;
|
return cntpct * UsToCNTPCTRatio::den / UsToCNTPCTRatio::num;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline u64 GPUTickToNS(u64 gpu_tick) {
|
||||||
|
return gpu_tick * NsToGPUTickRatio::den / NsToGPUTickRatio::num;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline u64 CNTPCTToGPUTick(u64 cntpct) {
|
||||||
|
return cntpct * CNTPCTToGPUTickRatio::num / CNTPCTToGPUTickRatio::den;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
using NsRatio = std::nano;
|
using NsRatio = std::nano;
|
||||||
using UsRatio = std::micro;
|
using UsRatio = std::micro;
|
||||||
|
@ -63,6 +79,8 @@ protected:
|
||||||
using NsToMsRatio = std::ratio_divide<std::nano, std::milli>;
|
using NsToMsRatio = std::ratio_divide<std::nano, std::milli>;
|
||||||
using NsToCNTPCTRatio = std::ratio<CNTFRQ, std::nano::den>;
|
using NsToCNTPCTRatio = std::ratio<CNTFRQ, std::nano::den>;
|
||||||
using UsToCNTPCTRatio = std::ratio<CNTFRQ, std::micro::den>;
|
using UsToCNTPCTRatio = std::ratio<CNTFRQ, std::micro::den>;
|
||||||
|
using NsToGPUTickRatio = std::ratio<GPUTickFreq, std::nano::den>;
|
||||||
|
using CNTPCTToGPUTickRatio = std::ratio<GPUTickFreq, CNTFRQ>;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::unique_ptr<WallClock> CreateOptimalClock();
|
std::unique_ptr<WallClock> CreateOptimalClock();
|
||||||
|
|
|
@ -12,7 +12,8 @@ NativeClock::NativeClock(u64 rdtsc_frequency_)
|
||||||
ns_rdtsc_factor{GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency)},
|
ns_rdtsc_factor{GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency)},
|
||||||
us_rdtsc_factor{GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency)},
|
us_rdtsc_factor{GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency)},
|
||||||
ms_rdtsc_factor{GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency)},
|
ms_rdtsc_factor{GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency)},
|
||||||
cntpct_rdtsc_factor{GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency)} {}
|
cntpct_rdtsc_factor{GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency)},
|
||||||
|
gputick_rdtsc_factor{GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency)} {}
|
||||||
|
|
||||||
std::chrono::nanoseconds NativeClock::GetTimeNS() const {
|
std::chrono::nanoseconds NativeClock::GetTimeNS() const {
|
||||||
return std::chrono::nanoseconds{MultiplyHigh(GetHostTicksElapsed(), ns_rdtsc_factor)};
|
return std::chrono::nanoseconds{MultiplyHigh(GetHostTicksElapsed(), ns_rdtsc_factor)};
|
||||||
|
@ -30,6 +31,10 @@ u64 NativeClock::GetCNTPCT() const {
|
||||||
return MultiplyHigh(GetHostTicksElapsed(), cntpct_rdtsc_factor);
|
return MultiplyHigh(GetHostTicksElapsed(), cntpct_rdtsc_factor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 NativeClock::GetGPUTick() const {
|
||||||
|
return MultiplyHigh(GetHostTicksElapsed(), gputick_rdtsc_factor);
|
||||||
|
}
|
||||||
|
|
||||||
u64 NativeClock::GetHostTicksNow() const {
|
u64 NativeClock::GetHostTicksNow() const {
|
||||||
return FencedRDTSC();
|
return FencedRDTSC();
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,8 @@ public:
|
||||||
|
|
||||||
u64 GetCNTPCT() const override;
|
u64 GetCNTPCT() const override;
|
||||||
|
|
||||||
|
u64 GetGPUTick() const override;
|
||||||
|
|
||||||
u64 GetHostTicksNow() const override;
|
u64 GetHostTicksNow() const override;
|
||||||
|
|
||||||
u64 GetHostTicksElapsed() const override;
|
u64 GetHostTicksElapsed() const override;
|
||||||
|
@ -33,6 +35,7 @@ private:
|
||||||
u64 us_rdtsc_factor;
|
u64 us_rdtsc_factor;
|
||||||
u64 ms_rdtsc_factor;
|
u64 ms_rdtsc_factor;
|
||||||
u64 cntpct_rdtsc_factor;
|
u64 cntpct_rdtsc_factor;
|
||||||
|
u64 gputick_rdtsc_factor;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Common::X64
|
} // namespace Common::X64
|
||||||
|
|
|
@ -197,6 +197,13 @@ u64 CoreTiming::GetClockTicks() const {
|
||||||
return ticks;
|
return ticks;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 CoreTiming::GetGPUTicks() const {
|
||||||
|
if (is_multicore) [[likely]] {
|
||||||
|
return clock->GetGPUTick();
|
||||||
|
}
|
||||||
|
return Common::WallClock::CNTPCTToGPUTick(ticks);
|
||||||
|
}
|
||||||
|
|
||||||
std::optional<s64> CoreTiming::Advance() {
|
std::optional<s64> CoreTiming::Advance() {
|
||||||
std::scoped_lock lock{advance_lock, basic_lock};
|
std::scoped_lock lock{advance_lock, basic_lock};
|
||||||
global_timer = GetGlobalTimeNs().count();
|
global_timer = GetGlobalTimeNs().count();
|
||||||
|
|
|
@ -119,6 +119,9 @@ public:
|
||||||
/// Returns the current CNTPCT tick value.
|
/// Returns the current CNTPCT tick value.
|
||||||
u64 GetClockTicks() const;
|
u64 GetClockTicks() const;
|
||||||
|
|
||||||
|
/// Returns the current GPU tick value.
|
||||||
|
u64 GetGPUTicks() const;
|
||||||
|
|
||||||
/// Returns current time in microseconds.
|
/// Returns current time in microseconds.
|
||||||
std::chrono::microseconds GetGlobalTimeUs() const;
|
std::chrono::microseconds GetGlobalTimeUs() const;
|
||||||
|
|
||||||
|
|
|
@ -193,18 +193,13 @@ struct GPU::Impl {
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] u64 GetTicks() const {
|
[[nodiscard]] u64 GetTicks() const {
|
||||||
// This values were reversed engineered by fincs from NVN
|
u64 gpu_tick = system.CoreTiming().GetGPUTicks();
|
||||||
// The GPU clock is 614.4 MHz
|
|
||||||
using NsToGPUTickRatio = std::ratio<614'400'000, std::nano::den>;
|
|
||||||
static_assert(NsToGPUTickRatio::num == 384 && NsToGPUTickRatio::den == 625);
|
|
||||||
|
|
||||||
u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
|
|
||||||
|
|
||||||
if (Settings::values.use_fast_gpu_time.GetValue()) {
|
if (Settings::values.use_fast_gpu_time.GetValue()) {
|
||||||
nanoseconds /= 256;
|
gpu_tick /= 256;
|
||||||
}
|
}
|
||||||
|
|
||||||
return nanoseconds * NsToGPUTickRatio::num / NsToGPUTickRatio::den;
|
return gpu_tick;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] bool IsAsync() const {
|
[[nodiscard]] bool IsAsync() const {
|
||||||
|
|
Loading…
Reference in New Issue