Several changes for timestamp bundle:

Fully defined the structure.
Single copy of it + single timer across all modules, managing it is now the responsibility of KernelState.

add global_critical_region::PrepareToAcquire, which uses Prefetchw on the global crit. We now know we can use Prefetchw on all cpus that have AVX.
add  KeQueryInterruptTime, which is used by some dashboards.

add threading::NanoSleep
This commit is contained in:
chss95cs@gmail.com 2023-04-16 10:08:01 -04:00
parent 12c9135843
commit ab21e1e0f0
15 changed files with 147 additions and 54 deletions

View File

@ -50,8 +50,7 @@ uint64_t last_guest_tick_count_ = 0;
// Last sampled host tick count.
uint64_t last_host_tick_count_ = Clock::QueryHostTickCount();
using tick_mutex_type = std::mutex;
using tick_mutex_type = std::mutex;
// Mutex to ensure last_host_tick_count_ and last_guest_tick_count_ are in sync
// std::mutex tick_mutex_;
@ -180,6 +179,10 @@ uint64_t Clock::QueryGuestSystemTime() {
return guest_system_time_base_ + guest_system_time_offset;
}
uint64_t Clock::QueryGuestInterruptTime() {
return Clock::QueryHostInterruptTime();
}
uint32_t Clock::QueryGuestUptimeMillis() {
return static_cast<uint32_t>(
std::min<uint64_t>(QueryGuestSystemTimeOffset() / 10000,

View File

@ -54,6 +54,8 @@ class Clock {
// Queries the milliseconds since the host began.
static uint64_t QueryHostUptimeMillis();
static uint64_t QueryHostInterruptTime();
// Guest time scalar.
static double guest_time_scalar();
// Sets the guest time scalar, adjusting tick and wall clock speed.
@ -81,6 +83,8 @@ class Clock {
// Queries the milliseconds since the guest began, accounting for scaling.
static uint32_t QueryGuestUptimeMillis();
static uint64_t QueryGuestInterruptTime();
// Sets the system time of the guest.
static void SetGuestSystemTime(uint64_t system_time);

View File

@ -47,5 +47,10 @@ uint64_t Clock::QueryHostSystemTime() {
uint64_t Clock::QueryHostUptimeMillis() {
return host_tick_count_platform() * 1000 / host_tick_frequency_platform();
}
// todo: we only take the low part of interrupttime! this is actually a 96-bit
// int!
uint64_t Clock::QueryHostInterruptTime() {
return *reinterpret_cast<uint64_t*>(KUserShared() +
KUSER_SHARED_INTERRUPTTIME_OFFSET);
}
} // namespace xe

View File

@ -25,17 +25,6 @@ class StartupCpuFeatureCheck {
"the "
"FAQ for system requirements at https://xenia.jp";
}
#if 0
if (!error_message) {
unsigned int data[4];
Xbyak::util::Cpu::getCpuid(0x80000001, data);
if (!(data[2] & (1U << 8))) {
error_message =
"Your cpu does not support PrefetchW, which Xenia Canary "
"requires.";
}
}
#endif
if (error_message == nullptr) {
return;
} else {

View File

@ -11,7 +11,7 @@
#define XENIA_BASE_MUTEX_H_
#include <mutex>
#include "platform.h"
#include "memory.h"
#define XE_ENABLE_FAST_WIN32_MUTEX 1
namespace xe {
@ -149,6 +149,12 @@ class global_critical_region {
return global_unique_lock_type(mutex());
}
static inline void PrepareToAcquire() {
#if XE_PLATFORM_WIN32 == 1
swcache::PrefetchW(&mutex());
#endif
}
// Acquires a deferred lock on the global critical section.
static inline global_unique_lock_type AcquireDeferred() {
return global_unique_lock_type(mutex(), std::defer_lock);

View File

@ -35,8 +35,9 @@
#undef GetFirstChild
#define XE_USE_NTDLL_FUNCTIONS 1
//chrispy: disabling this for now, more research needs to be done imo, although it does work very well on my machine
//
// chrispy: disabling this for now, more research needs to be done imo, although
// it does work very well on my machine
//
#define XE_USE_KUSER_SHARED 0
#if XE_USE_NTDLL_FUNCTIONS == 1
/*
@ -63,7 +64,11 @@
#define XE_NTDLL_IMPORT(name, cls, clsvar) static constexpr bool clsvar = false
#endif
#if XE_USE_KUSER_SHARED==1
static constexpr size_t KSUER_SHARED_SYSTEMTIME_OFFSET = 0x14;
static constexpr size_t KUSER_SHARED_INTERRUPTTIME_OFFSET = 8;
static unsigned char* KUserShared() { return (unsigned char*)0x7FFE0000ULL; }
#if XE_USE_KUSER_SHARED == 1
// KUSER_SHARED
struct __declspec(align(4)) _KSYSTEM_TIME {
unsigned int LowPart;
@ -71,8 +76,6 @@ struct __declspec(align(4)) _KSYSTEM_TIME {
int High2Time;
};
static constexpr size_t KSUER_SHARED_SYSTEMTIME_OFFSET = 0x14;
static unsigned char* KUserShared() { return (unsigned char*)0x7FFE0000ULL; }
static volatile _KSYSTEM_TIME* GetKUserSharedSystemTime() {
return reinterpret_cast<volatile _KSYSTEM_TIME*>(
KUserShared() + KSUER_SHARED_SYSTEMTIME_OFFSET);

View File

@ -115,6 +115,7 @@ void SyncMemory();
// Sleeps the current thread for at least as long as the given duration.
void Sleep(std::chrono::microseconds duration);
void NanoSleep(int64_t ns);
template <typename Rep, typename Period>
void Sleep(std::chrono::duration<Rep, Period> duration) {
Sleep(std::chrono::duration_cast<std::chrono::microseconds>(duration));
@ -148,7 +149,7 @@ bool SetTlsValue(TlsHandle handle, uintptr_t value);
// be kept short or else all timers will be impacted. This is a simplified
// wrapper around QueueTimerRecurring which automatically cancels the timer on
// destruction.
//only used by XboxkrnlModule::XboxkrnlModule
// only used by XboxkrnlModule::XboxkrnlModule
class HighResolutionTimer {
HighResolutionTimer(std::chrono::milliseconds interval,
std::function<void()> callback) {
@ -302,14 +303,14 @@ class Event : public WaitHandle {
// the nonsignaled state after releasing the appropriate number of waiting
// threads.
virtual void Pulse() = 0;
virtual EventInfo Query() = 0;
#if XE_PLATFORM_WIN32 ==1
//SetEvent, but if there is a waiter we immediately transfer execution to it
#if XE_PLATFORM_WIN32 == 1
// SetEvent, but if there is a waiter we immediately transfer execution to it
virtual void SetBoostPriority() = 0;
#else
#else
void SetBoostPriority() { Set(); }
#endif
#endif
};
// Models a Win32-like semaphore object.

View File

@ -148,7 +148,16 @@ void MaybeYield() {
// memorybarrier is really not necessary here...
// MemoryBarrier();
}
void NanoSleep(int64_t ns) {
//nanosleep is done in 100 nanosecond increments
int64_t in_nt_increments = ns / 100LL;
if (in_nt_increments == 0 && ns != 0) {
//if we're explicitly requesting a delay of 0 ns, let it go through, otherwise if it was less than a 100ns increment we round up to 100ns
in_nt_increments = 1;
}
in_nt_increments = -in_nt_increments;
NtDelayExecutionPointer.invoke(0, &in_nt_increments);
}
void SyncMemory() { MemoryBarrier(); }
void Sleep(std::chrono::microseconds duration) {

View File

@ -940,6 +940,57 @@ bool KernelState::Save(ByteStream* stream) {
return true;
}
// this only gets triggered once per ms at most, so fields other than tick count
// will probably not be updated in a timely manner for guest code that uses them
void KernelState::UpdateKeTimestampBundle() {
X_TIME_STAMP_BUNDLE* lpKeTimeStampBundle =
memory_->TranslateVirtual<X_TIME_STAMP_BUNDLE*>(ke_timestamp_bundle_ptr_);
uint32_t uptime_ms = Clock::QueryGuestUptimeMillis();
xe::store_and_swap<uint64_t>(&lpKeTimeStampBundle->interrupt_time,
Clock::QueryGuestInterruptTime());
xe::store_and_swap<uint64_t>(&lpKeTimeStampBundle->system_time,
Clock::QueryGuestSystemTime());
xe::store_and_swap<uint32_t>(&lpKeTimeStampBundle->tick_count, uptime_ms);
}
uint32_t KernelState::GetKeTimestampBundle() {
XE_LIKELY_IF(ke_timestamp_bundle_ptr_) {
return ke_timestamp_bundle_ptr_;
}
else {
global_critical_region::PrepareToAcquire();
return CreateKeTimestampBundle();
}
}
XE_NOINLINE
XE_COLD
uint32_t KernelState::CreateKeTimestampBundle() {
auto crit = global_critical_region::Acquire();
uint32_t pKeTimeStampBundle =
memory_->SystemHeapAlloc(sizeof(X_TIME_STAMP_BUNDLE));
X_TIME_STAMP_BUNDLE* lpKeTimeStampBundle =
memory_->TranslateVirtual<X_TIME_STAMP_BUNDLE*>(pKeTimeStampBundle);
xe::store_and_swap<uint64_t>(&lpKeTimeStampBundle->interrupt_time,
Clock::QueryGuestInterruptTime());
xe::store_and_swap<uint64_t>(&lpKeTimeStampBundle->system_time,
Clock::QueryGuestSystemTime());
xe::store_and_swap<uint32_t>(&lpKeTimeStampBundle->tick_count,
Clock::QueryGuestUptimeMillis());
xe::store_and_swap<uint32_t>(&lpKeTimeStampBundle->padding, 0);
timestamp_timer_ = xe::threading::HighResolutionTimer::CreateRepeating(
std::chrono::milliseconds(1),
[this]() { this->UpdateKeTimestampBundle(); });
ke_timestamp_bundle_ptr_ = pKeTimeStampBundle;
return pKeTimeStampBundle;
}
bool KernelState::Restore(ByteStream* stream) {
// Check the magic value.
if (stream->Read<uint32_t>() != kKernelSaveSignature) {

View File

@ -17,6 +17,7 @@
#include <memory>
#include <vector>
#include "achievement_manager.h"
#include "xenia/base/bit_map.h"
#include "xenia/base/cvar.h"
#include "xenia/base/mutex.h"
@ -30,7 +31,6 @@
#include "xenia/memory.h"
#include "xenia/vfs/virtual_file_system.h"
#include "xenia/xbox.h"
#include "achievement_manager.h"
namespace xe {
class ByteStream;
@ -88,6 +88,17 @@ struct TerminateNotification {
uint32_t priority;
};
// structure for KeTimeStampBuindle
// a bit like the timers on KUSER_SHARED on normal win32
// https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/api/ntexapi_x/kuser_shared_data/index.htm
struct X_TIME_STAMP_BUNDLE {
uint64_t interrupt_time;
// i assume system_time is in 100 ns intervals like on win32
uint64_t system_time;
uint32_t tick_count;
uint32_t padding;
};
class KernelState {
public:
explicit KernelState(Emulator* emulator);
@ -234,6 +245,14 @@ class KernelState {
bool Restore(ByteStream* stream);
uint32_t notification_position_ = 2;
uint32_t GetKeTimestampBundle();
XE_NOINLINE
XE_COLD
uint32_t CreateKeTimestampBundle();
void UpdateKeTimestampBundle();
private:
void LoadKernelModule(object_ref<KernelModule> kernel_module);
@ -271,7 +290,8 @@ class KernelState {
std::list<std::function<void()>> dispatch_queue_;
BitMap tls_bitmap_;
uint32_t ke_timestamp_bundle_ptr_ = 0;
std::unique_ptr<xe::threading::HighResolutionTimer> timestamp_timer_;
friend class XObject;
};

View File

@ -382,6 +382,7 @@ using pointer_t = const shim::TypedPointerParam<T>&;
using int_result_t = shim::ResultBase<int32_t>;
using dword_result_t = shim::ResultBase<uint32_t>;
using qword_result_t = shim::ResultBase<uint64_t>;
using pointer_result_t = shim::ResultBase<uint32_t>;
using X_HRESULT_result_t = shim::ResultBase<X_HRESULT>;
using ppc_context_t = shim::ContextParam;

View File

@ -22,7 +22,9 @@ namespace xboxkrnl {
void KeEnableFpuExceptions_entry(
const ppc_context_t& ctx) { // dword_t enabled) {
// TODO(benvanik): can we do anything about exceptions?
// theres a lot more thats supposed to happen here, the floating point state has to be saved to kthread, the irql changes, the machine state register is changed to enable exceptions
// theres a lot more thats supposed to happen here, the floating point state
// has to be saved to kthread, the irql changes, the machine state register is
// changed to enable exceptions
X_KTHREAD* kthread = ctx->TranslateVirtual<X_KTHREAD*>(
ctx->TranslateVirtualGPR<X_KPCR*>(ctx->r[13])->current_thread);
@ -117,7 +119,15 @@ void KeSaveFloatingPointState_entry(ppc_context_t& ctx) {
DECLARE_XBOXKRNL_EXPORT1(KeSaveFloatingPointState, kNone, kImplemented);
#endif
static qword_result_t KeQueryInterruptTime_entry(const ppc_context_t& ctx) {
auto kstate = ctx->kernel_state;
uint32_t ts_bundle = kstate->GetKeTimestampBundle();
X_TIME_STAMP_BUNDLE* bundle =
ctx->TranslateVirtual<X_TIME_STAMP_BUNDLE*>(ts_bundle);
return xe::load_and_swap<uint64_t>(&bundle->interrupt_time);
}
DECLARE_XBOXKRNL_EXPORT1(KeQueryInterruptTime, kNone, kImplemented);
} // namespace xboxkrnl
} // namespace kernel
} // namespace xe

View File

@ -78,8 +78,7 @@ bool XboxkrnlModule::SendPIXCommand(const char* cmd) {
}
XboxkrnlModule::XboxkrnlModule(Emulator* emulator, KernelState* kernel_state)
: KernelModule(kernel_state, "xe:\\xboxkrnl.exe"),
timestamp_timer_(nullptr) {
: KernelModule(kernel_state, "xe:\\xboxkrnl.exe") {
RegisterExportTable(export_resolver_);
// Register all exported functions.
@ -216,23 +215,9 @@ XboxkrnlModule::XboxkrnlModule(Emulator* emulator, KernelState* kernel_state)
xe::store_and_swap<uint8_t>(lpXboxKrnlVersion + 6, 0x80);
xe::store_and_swap<uint8_t>(lpXboxKrnlVersion + 7, 0x00);
// KeTimeStampBundle (ad)
// This must be updated during execution, at 1ms intevals.
// We setup a system timer here to do that.
uint32_t pKeTimeStampBundle = memory_->SystemHeapAlloc(24);
auto lpKeTimeStampBundle = memory_->TranslateVirtual(pKeTimeStampBundle);
export_resolver_->SetVariableMapping(
"xboxkrnl.exe", ordinals::KeTimeStampBundle, pKeTimeStampBundle);
xe::store_and_swap<uint64_t>(lpKeTimeStampBundle + 0, 0);
xe::store_and_swap<uint64_t>(lpKeTimeStampBundle + 8, 0);
xe::store_and_swap<uint32_t>(lpKeTimeStampBundle + 16,
Clock::QueryGuestUptimeMillis());
xe::store_and_swap<uint32_t>(lpKeTimeStampBundle + 20, 0);
timestamp_timer_ = xe::threading::HighResolutionTimer::CreateRepeating(
std::chrono::milliseconds(1), [lpKeTimeStampBundle]() {
xe::store_and_swap<uint32_t>(lpKeTimeStampBundle + 16,
Clock::QueryGuestUptimeMillis());
});
export_resolver_->SetVariableMapping("xboxkrnl.exe",
ordinals::KeTimeStampBundle,
kernel_state->GetKeTimestampBundle());
}
static auto& get_xboxkrnl_exports() {

View File

@ -40,9 +40,6 @@ class XboxkrnlModule : public KernelModule {
protected:
uint32_t pix_function_ = 0;
private:
std::unique_ptr<xe::threading::HighResolutionTimer> timestamp_timer_;
};
} // namespace xboxkrnl

View File

@ -363,9 +363,18 @@ dword_result_t NtYieldExecution_entry() {
DECLARE_XBOXKRNL_EXPORT2(NtYieldExecution, kThreading, kImplemented,
kHighFrequency);
void KeQuerySystemTime_entry(lpqword_t time_ptr) {
uint64_t time = Clock::QueryGuestSystemTime();
void KeQuerySystemTime_entry(lpqword_t time_ptr, const ppc_context_t& ctx) {
if (time_ptr) {
// update the timestamp bundle to the time we queried.
// this is a race, but i don't of any sw that requires it, it just seems
// like we ought to keep it consistent with ketimestampbundle in case
// something uses this function, but also reads it directly
uint32_t ts_bundle = ctx->kernel_state->GetKeTimestampBundle();
uint64_t time = Clock::QueryGuestSystemTime();
//todo: cmpxchg?
xe::store_and_swap<uint64_t>(
&ctx->TranslateVirtual<X_TIME_STAMP_BUNDLE*>(ts_bundle)->system_time,
time);
*time_ptr = time;
}
}