[CPU] Implement mftb instruction natively.

When the cvars clock_no_scaling and clock_source_raw are set, tick counts will be directly calculated in the emitted code.
This commit is contained in:
Joel Linn 2019-11-22 03:42:15 +01:00 committed by Rick Gibbed
parent 15d422d988
commit 922f1f220a
3 changed files with 36 additions and 4 deletions

View File

@ -134,6 +134,11 @@ void Clock::set_guest_time_scalar(double scalar) {
RecomputeGuestTickScalar();
}
std::pair<uint64_t, uint64_t> Clock::guest_tick_ratio() {
std::lock_guard<std::mutex> lock(tick_mutex_);
return guest_tick_ratio_;
}
uint64_t Clock::guest_tick_frequency() { return guest_tick_frequency_; }
void Clock::set_guest_tick_frequency(uint64_t frequency) {

View File

@ -43,6 +43,8 @@ class Clock {
// Sets the guest time scalar, adjusting tick and wall clock speed.
// Ex: 1x=normal, 2x=double speed, 1/2x=half speed.
static void set_guest_time_scalar(double scalar);
// Get the tick ration between host and guest including time scaling if set.
static std::pair<uint64_t, uint64_t> guest_tick_ratio();
// Guest ticks-per-second.
static uint64_t guest_tick_frequency();
// Sets the guest ticks-per-second.

View File

@ -2,7 +2,7 @@
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2014 Ben Vanik. All rights reserved. *
* Copyright 2019 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
@ -440,10 +440,35 @@ EMITTER_OPCODE_TABLE(OPCODE_ROUND, ROUND_F32, ROUND_F64, ROUND_V128);
// ============================================================================
struct LOAD_CLOCK : Sequence<LOAD_CLOCK, I<OPCODE_LOAD_CLOCK, I64Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
// It'd be cool to call QueryPerformanceCounter directly, but w/e.
// When scaling is disabled and the raw clock source is selected, the code
// in the Clock class is actually just forwarding tick counts after one
// simple multiply and division. In that case we rather bake the scaling in
// here to cut extra function calls with CPU cache misses and stack frame
// overhead.
if (cvars::clock_no_scaling && cvars::clock_source_raw) {
auto ratio = Clock::guest_tick_ratio();
// The 360 CPU is an in-order CPU, AMD64 usually isn't. Without
// mfence/lfence magic the rdtsc instruction can be executed sooner or
// later in the cache window. Since it's resolution however is much higher
// than the 360's mftb instruction this can safely be ignored.
// Read time stamp in edx (high part) and eax (low part).
e.rdtsc();
// Make it a 64 bit number in rax.
e.shl(e.rdx, 32);
e.or_(e.rax, e.rdx);
// Apply tick frequency scaling.
e.mov(e.rcx, ratio.first);
e.mul(e.rcx);
// We actually now have a 128 bit number in rdx:rax.
e.mov(e.rcx, ratio.second);
e.div(e.rcx);
e.mov(i.dest, e.rax);
} else {
e.CallNative(LoadClock);
e.mov(i.dest, e.rax);
}
}
static uint64_t LoadClock(void* raw_context) {
return Clock::QueryGuestTickCount();
}