[CPU] Implement mftb instruction natively.
When the cvars clock_no_scaling and clock_source_raw are set, tick counts will be directly calculated in the emitted code.
This commit is contained in:
parent
15d422d988
commit
922f1f220a
|
@ -134,6 +134,11 @@ void Clock::set_guest_time_scalar(double scalar) {
|
||||||
RecomputeGuestTickScalar();
|
RecomputeGuestTickScalar();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::pair<uint64_t, uint64_t> Clock::guest_tick_ratio() {
|
||||||
|
std::lock_guard<std::mutex> lock(tick_mutex_);
|
||||||
|
return guest_tick_ratio_;
|
||||||
|
}
|
||||||
|
|
||||||
uint64_t Clock::guest_tick_frequency() { return guest_tick_frequency_; }
|
uint64_t Clock::guest_tick_frequency() { return guest_tick_frequency_; }
|
||||||
|
|
||||||
void Clock::set_guest_tick_frequency(uint64_t frequency) {
|
void Clock::set_guest_tick_frequency(uint64_t frequency) {
|
||||||
|
|
|
@ -43,6 +43,8 @@ class Clock {
|
||||||
// Sets the guest time scalar, adjusting tick and wall clock speed.
|
// Sets the guest time scalar, adjusting tick and wall clock speed.
|
||||||
// Ex: 1x=normal, 2x=double speed, 1/2x=half speed.
|
// Ex: 1x=normal, 2x=double speed, 1/2x=half speed.
|
||||||
static void set_guest_time_scalar(double scalar);
|
static void set_guest_time_scalar(double scalar);
|
||||||
|
// Get the tick ration between host and guest including time scaling if set.
|
||||||
|
static std::pair<uint64_t, uint64_t> guest_tick_ratio();
|
||||||
// Guest ticks-per-second.
|
// Guest ticks-per-second.
|
||||||
static uint64_t guest_tick_frequency();
|
static uint64_t guest_tick_frequency();
|
||||||
// Sets the guest ticks-per-second.
|
// Sets the guest ticks-per-second.
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
* Xenia : Xbox 360 Emulator Research Project *
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
* Copyright 2019 Ben Vanik. All rights reserved. *
|
||||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
*/
|
*/
|
||||||
|
@ -440,10 +440,35 @@ EMITTER_OPCODE_TABLE(OPCODE_ROUND, ROUND_F32, ROUND_F64, ROUND_V128);
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
struct LOAD_CLOCK : Sequence<LOAD_CLOCK, I<OPCODE_LOAD_CLOCK, I64Op>> {
|
struct LOAD_CLOCK : Sequence<LOAD_CLOCK, I<OPCODE_LOAD_CLOCK, I64Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
// It'd be cool to call QueryPerformanceCounter directly, but w/e.
|
// When scaling is disabled and the raw clock source is selected, the code
|
||||||
|
// in the Clock class is actually just forwarding tick counts after one
|
||||||
|
// simple multiply and division. In that case we rather bake the scaling in
|
||||||
|
// here to cut extra function calls with CPU cache misses and stack frame
|
||||||
|
// overhead.
|
||||||
|
if (cvars::clock_no_scaling && cvars::clock_source_raw) {
|
||||||
|
auto ratio = Clock::guest_tick_ratio();
|
||||||
|
// The 360 CPU is an in-order CPU, AMD64 usually isn't. Without
|
||||||
|
// mfence/lfence magic the rdtsc instruction can be executed sooner or
|
||||||
|
// later in the cache window. Since it's resolution however is much higher
|
||||||
|
// than the 360's mftb instruction this can safely be ignored.
|
||||||
|
|
||||||
|
// Read time stamp in edx (high part) and eax (low part).
|
||||||
|
e.rdtsc();
|
||||||
|
// Make it a 64 bit number in rax.
|
||||||
|
e.shl(e.rdx, 32);
|
||||||
|
e.or_(e.rax, e.rdx);
|
||||||
|
// Apply tick frequency scaling.
|
||||||
|
e.mov(e.rcx, ratio.first);
|
||||||
|
e.mul(e.rcx);
|
||||||
|
// We actually now have a 128 bit number in rdx:rax.
|
||||||
|
e.mov(e.rcx, ratio.second);
|
||||||
|
e.div(e.rcx);
|
||||||
|
e.mov(i.dest, e.rax);
|
||||||
|
} else {
|
||||||
e.CallNative(LoadClock);
|
e.CallNative(LoadClock);
|
||||||
e.mov(i.dest, e.rax);
|
e.mov(i.dest, e.rax);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
static uint64_t LoadClock(void* raw_context) {
|
static uint64_t LoadClock(void* raw_context) {
|
||||||
return Clock::QueryGuestTickCount();
|
return Clock::QueryGuestTickCount();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue