From ab21e1e0f09d304ff7b03fd234315ca2c06bf66f Mon Sep 17 00:00:00 2001 From: "chss95cs@gmail.com" Date: Sun, 16 Apr 2023 10:08:01 -0400 Subject: [PATCH] Several changes for timestamp bundle: Fully defined the structure. Single copy of it + single timer across all modules, managing it is now the responsibility of KernelState. add global_critical_region::PrepareToAcquire, which uses Prefetchw on the global crit. We now know we can use Prefetchw on all cpus that have AVX. add KeQueryInterruptTime, which is used by some dashboards. add threading::NanoSleep --- src/xenia/base/clock.cc | 7 ++- src/xenia/base/clock.h | 4 ++ src/xenia/base/clock_win.cc | 7 ++- src/xenia/base/main_init_win.cc | 11 ---- src/xenia/base/mutex.h | 8 ++- src/xenia/base/platform_win.h | 13 +++-- src/xenia/base/threading.h | 13 ++--- src/xenia/base/threading_win.cc | 11 +++- src/xenia/kernel/kernel_state.cc | 51 +++++++++++++++++++ src/xenia/kernel/kernel_state.h | 24 ++++++++- src/xenia/kernel/util/shim_utils.h | 1 + src/xenia/kernel/xboxkrnl/xboxkrnl_misc.cc | 12 ++++- src/xenia/kernel/xboxkrnl/xboxkrnl_module.cc | 23 ++------- src/xenia/kernel/xboxkrnl/xboxkrnl_module.h | 3 -- .../kernel/xboxkrnl/xboxkrnl_threading.cc | 13 ++++- 15 files changed, 147 insertions(+), 54 deletions(-) diff --git a/src/xenia/base/clock.cc b/src/xenia/base/clock.cc index 593a4f39e..5cbb48bd0 100644 --- a/src/xenia/base/clock.cc +++ b/src/xenia/base/clock.cc @@ -50,8 +50,7 @@ uint64_t last_guest_tick_count_ = 0; // Last sampled host tick count. uint64_t last_host_tick_count_ = Clock::QueryHostTickCount(); - -using tick_mutex_type = std::mutex; +using tick_mutex_type = std::mutex; // Mutex to ensure last_host_tick_count_ and last_guest_tick_count_ are in sync // std::mutex tick_mutex_; @@ -180,6 +179,10 @@ uint64_t Clock::QueryGuestSystemTime() { return guest_system_time_base_ + guest_system_time_offset; } +uint64_t Clock::QueryGuestInterruptTime() { + return Clock::QueryHostInterruptTime(); +} + uint32_t Clock::QueryGuestUptimeMillis() { return static_cast( std::min(QueryGuestSystemTimeOffset() / 10000, diff --git a/src/xenia/base/clock.h b/src/xenia/base/clock.h index 0f27ce081..be3e4b37c 100644 --- a/src/xenia/base/clock.h +++ b/src/xenia/base/clock.h @@ -54,6 +54,8 @@ class Clock { // Queries the milliseconds since the host began. static uint64_t QueryHostUptimeMillis(); + static uint64_t QueryHostInterruptTime(); + // Guest time scalar. static double guest_time_scalar(); // Sets the guest time scalar, adjusting tick and wall clock speed. @@ -81,6 +83,8 @@ class Clock { // Queries the milliseconds since the guest began, accounting for scaling. static uint32_t QueryGuestUptimeMillis(); + static uint64_t QueryGuestInterruptTime(); + // Sets the system time of the guest. static void SetGuestSystemTime(uint64_t system_time); diff --git a/src/xenia/base/clock_win.cc b/src/xenia/base/clock_win.cc index c391731d3..466d4deac 100644 --- a/src/xenia/base/clock_win.cc +++ b/src/xenia/base/clock_win.cc @@ -47,5 +47,10 @@ uint64_t Clock::QueryHostSystemTime() { uint64_t Clock::QueryHostUptimeMillis() { return host_tick_count_platform() * 1000 / host_tick_frequency_platform(); } - +// todo: we only take the low part of interrupttime! this is actually a 96-bit +// int! +uint64_t Clock::QueryHostInterruptTime() { + return *reinterpret_cast(KUserShared() + + KUSER_SHARED_INTERRUPTTIME_OFFSET); +} } // namespace xe diff --git a/src/xenia/base/main_init_win.cc b/src/xenia/base/main_init_win.cc index ec4e032e6..a0065aac6 100644 --- a/src/xenia/base/main_init_win.cc +++ b/src/xenia/base/main_init_win.cc @@ -25,17 +25,6 @@ class StartupCpuFeatureCheck { "the " "FAQ for system requirements at https://xenia.jp"; } -#if 0 - if (!error_message) { - unsigned int data[4]; - Xbyak::util::Cpu::getCpuid(0x80000001, data); - if (!(data[2] & (1U << 8))) { - error_message = - "Your cpu does not support PrefetchW, which Xenia Canary " - "requires."; - } - } -#endif if (error_message == nullptr) { return; } else { diff --git a/src/xenia/base/mutex.h b/src/xenia/base/mutex.h index 5f1bc8a60..c75d2ce78 100644 --- a/src/xenia/base/mutex.h +++ b/src/xenia/base/mutex.h @@ -11,7 +11,7 @@ #define XENIA_BASE_MUTEX_H_ #include #include "platform.h" - +#include "memory.h" #define XE_ENABLE_FAST_WIN32_MUTEX 1 namespace xe { @@ -149,6 +149,12 @@ class global_critical_region { return global_unique_lock_type(mutex()); } + static inline void PrepareToAcquire() { +#if XE_PLATFORM_WIN32 == 1 + swcache::PrefetchW(&mutex()); +#endif + } + // Acquires a deferred lock on the global critical section. static inline global_unique_lock_type AcquireDeferred() { return global_unique_lock_type(mutex(), std::defer_lock); diff --git a/src/xenia/base/platform_win.h b/src/xenia/base/platform_win.h index a608f04b4..84d2fd150 100644 --- a/src/xenia/base/platform_win.h +++ b/src/xenia/base/platform_win.h @@ -35,8 +35,9 @@ #undef GetFirstChild #define XE_USE_NTDLL_FUNCTIONS 1 -//chrispy: disabling this for now, more research needs to be done imo, although it does work very well on my machine -// +// chrispy: disabling this for now, more research needs to be done imo, although +// it does work very well on my machine +// #define XE_USE_KUSER_SHARED 0 #if XE_USE_NTDLL_FUNCTIONS == 1 /* @@ -63,7 +64,11 @@ #define XE_NTDLL_IMPORT(name, cls, clsvar) static constexpr bool clsvar = false #endif -#if XE_USE_KUSER_SHARED==1 +static constexpr size_t KSUER_SHARED_SYSTEMTIME_OFFSET = 0x14; + +static constexpr size_t KUSER_SHARED_INTERRUPTTIME_OFFSET = 8; +static unsigned char* KUserShared() { return (unsigned char*)0x7FFE0000ULL; } +#if XE_USE_KUSER_SHARED == 1 // KUSER_SHARED struct __declspec(align(4)) _KSYSTEM_TIME { unsigned int LowPart; @@ -71,8 +76,6 @@ struct __declspec(align(4)) _KSYSTEM_TIME { int High2Time; }; -static constexpr size_t KSUER_SHARED_SYSTEMTIME_OFFSET = 0x14; -static unsigned char* KUserShared() { return (unsigned char*)0x7FFE0000ULL; } static volatile _KSYSTEM_TIME* GetKUserSharedSystemTime() { return reinterpret_cast( KUserShared() + KSUER_SHARED_SYSTEMTIME_OFFSET); diff --git a/src/xenia/base/threading.h b/src/xenia/base/threading.h index 9e80f29ad..a145c3830 100644 --- a/src/xenia/base/threading.h +++ b/src/xenia/base/threading.h @@ -115,6 +115,7 @@ void SyncMemory(); // Sleeps the current thread for at least as long as the given duration. void Sleep(std::chrono::microseconds duration); +void NanoSleep(int64_t ns); template void Sleep(std::chrono::duration duration) { Sleep(std::chrono::duration_cast(duration)); @@ -148,7 +149,7 @@ bool SetTlsValue(TlsHandle handle, uintptr_t value); // be kept short or else all timers will be impacted. This is a simplified // wrapper around QueueTimerRecurring which automatically cancels the timer on // destruction. -//only used by XboxkrnlModule::XboxkrnlModule +// only used by XboxkrnlModule::XboxkrnlModule class HighResolutionTimer { HighResolutionTimer(std::chrono::milliseconds interval, std::function callback) { @@ -302,14 +303,14 @@ class Event : public WaitHandle { // the nonsignaled state after releasing the appropriate number of waiting // threads. virtual void Pulse() = 0; - + virtual EventInfo Query() = 0; - #if XE_PLATFORM_WIN32 ==1 - //SetEvent, but if there is a waiter we immediately transfer execution to it +#if XE_PLATFORM_WIN32 == 1 + // SetEvent, but if there is a waiter we immediately transfer execution to it virtual void SetBoostPriority() = 0; - #else +#else void SetBoostPriority() { Set(); } - #endif +#endif }; // Models a Win32-like semaphore object. diff --git a/src/xenia/base/threading_win.cc b/src/xenia/base/threading_win.cc index c5b0cf0ad..139d69c59 100644 --- a/src/xenia/base/threading_win.cc +++ b/src/xenia/base/threading_win.cc @@ -148,7 +148,16 @@ void MaybeYield() { // memorybarrier is really not necessary here... // MemoryBarrier(); } - +void NanoSleep(int64_t ns) { +//nanosleep is done in 100 nanosecond increments + int64_t in_nt_increments = ns / 100LL; + if (in_nt_increments == 0 && ns != 0) { + //if we're explicitly requesting a delay of 0 ns, let it go through, otherwise if it was less than a 100ns increment we round up to 100ns + in_nt_increments = 1; + } + in_nt_increments = -in_nt_increments; + NtDelayExecutionPointer.invoke(0, &in_nt_increments); +} void SyncMemory() { MemoryBarrier(); } void Sleep(std::chrono::microseconds duration) { diff --git a/src/xenia/kernel/kernel_state.cc b/src/xenia/kernel/kernel_state.cc index b48bfd5e9..54b357e13 100644 --- a/src/xenia/kernel/kernel_state.cc +++ b/src/xenia/kernel/kernel_state.cc @@ -940,6 +940,57 @@ bool KernelState::Save(ByteStream* stream) { return true; } +// this only gets triggered once per ms at most, so fields other than tick count +// will probably not be updated in a timely manner for guest code that uses them +void KernelState::UpdateKeTimestampBundle() { + X_TIME_STAMP_BUNDLE* lpKeTimeStampBundle = + memory_->TranslateVirtual(ke_timestamp_bundle_ptr_); + uint32_t uptime_ms = Clock::QueryGuestUptimeMillis(); + xe::store_and_swap(&lpKeTimeStampBundle->interrupt_time, + Clock::QueryGuestInterruptTime()); + xe::store_and_swap(&lpKeTimeStampBundle->system_time, + Clock::QueryGuestSystemTime()); + xe::store_and_swap(&lpKeTimeStampBundle->tick_count, uptime_ms); +} + +uint32_t KernelState::GetKeTimestampBundle() { + XE_LIKELY_IF(ke_timestamp_bundle_ptr_) { + return ke_timestamp_bundle_ptr_; + } + else { + global_critical_region::PrepareToAcquire(); + return CreateKeTimestampBundle(); + } +} + +XE_NOINLINE +XE_COLD +uint32_t KernelState::CreateKeTimestampBundle() { + auto crit = global_critical_region::Acquire(); + + uint32_t pKeTimeStampBundle = + memory_->SystemHeapAlloc(sizeof(X_TIME_STAMP_BUNDLE)); + X_TIME_STAMP_BUNDLE* lpKeTimeStampBundle = + memory_->TranslateVirtual(pKeTimeStampBundle); + + xe::store_and_swap(&lpKeTimeStampBundle->interrupt_time, + Clock::QueryGuestInterruptTime()); + + xe::store_and_swap(&lpKeTimeStampBundle->system_time, + Clock::QueryGuestSystemTime()); + + xe::store_and_swap(&lpKeTimeStampBundle->tick_count, + Clock::QueryGuestUptimeMillis()); + + xe::store_and_swap(&lpKeTimeStampBundle->padding, 0); + + timestamp_timer_ = xe::threading::HighResolutionTimer::CreateRepeating( + std::chrono::milliseconds(1), + [this]() { this->UpdateKeTimestampBundle(); }); + ke_timestamp_bundle_ptr_ = pKeTimeStampBundle; + return pKeTimeStampBundle; +} + bool KernelState::Restore(ByteStream* stream) { // Check the magic value. if (stream->Read() != kKernelSaveSignature) { diff --git a/src/xenia/kernel/kernel_state.h b/src/xenia/kernel/kernel_state.h index e4b479ae2..c917cdfb7 100644 --- a/src/xenia/kernel/kernel_state.h +++ b/src/xenia/kernel/kernel_state.h @@ -17,6 +17,7 @@ #include #include +#include "achievement_manager.h" #include "xenia/base/bit_map.h" #include "xenia/base/cvar.h" #include "xenia/base/mutex.h" @@ -30,7 +31,6 @@ #include "xenia/memory.h" #include "xenia/vfs/virtual_file_system.h" #include "xenia/xbox.h" -#include "achievement_manager.h" namespace xe { class ByteStream; @@ -88,6 +88,17 @@ struct TerminateNotification { uint32_t priority; }; +// structure for KeTimeStampBuindle +// a bit like the timers on KUSER_SHARED on normal win32 +// https://www.geoffchappell.com/studies/windows/km/ntoskrnl/inc/api/ntexapi_x/kuser_shared_data/index.htm +struct X_TIME_STAMP_BUNDLE { + uint64_t interrupt_time; + // i assume system_time is in 100 ns intervals like on win32 + uint64_t system_time; + uint32_t tick_count; + uint32_t padding; +}; + class KernelState { public: explicit KernelState(Emulator* emulator); @@ -234,6 +245,14 @@ class KernelState { bool Restore(ByteStream* stream); uint32_t notification_position_ = 2; + + uint32_t GetKeTimestampBundle(); + + XE_NOINLINE + XE_COLD + uint32_t CreateKeTimestampBundle(); + void UpdateKeTimestampBundle(); + private: void LoadKernelModule(object_ref kernel_module); @@ -271,7 +290,8 @@ class KernelState { std::list> dispatch_queue_; BitMap tls_bitmap_; - + uint32_t ke_timestamp_bundle_ptr_ = 0; + std::unique_ptr timestamp_timer_; friend class XObject; }; diff --git a/src/xenia/kernel/util/shim_utils.h b/src/xenia/kernel/util/shim_utils.h index fe49fd05f..976b73514 100644 --- a/src/xenia/kernel/util/shim_utils.h +++ b/src/xenia/kernel/util/shim_utils.h @@ -382,6 +382,7 @@ using pointer_t = const shim::TypedPointerParam&; using int_result_t = shim::ResultBase; using dword_result_t = shim::ResultBase; +using qword_result_t = shim::ResultBase; using pointer_result_t = shim::ResultBase; using X_HRESULT_result_t = shim::ResultBase; using ppc_context_t = shim::ContextParam; diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_misc.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_misc.cc index 0db0c7b56..0df1744f8 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_misc.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_misc.cc @@ -22,7 +22,9 @@ namespace xboxkrnl { void KeEnableFpuExceptions_entry( const ppc_context_t& ctx) { // dword_t enabled) { // TODO(benvanik): can we do anything about exceptions? - // theres a lot more thats supposed to happen here, the floating point state has to be saved to kthread, the irql changes, the machine state register is changed to enable exceptions + // theres a lot more thats supposed to happen here, the floating point state + // has to be saved to kthread, the irql changes, the machine state register is + // changed to enable exceptions X_KTHREAD* kthread = ctx->TranslateVirtual( ctx->TranslateVirtualGPR(ctx->r[13])->current_thread); @@ -117,7 +119,15 @@ void KeSaveFloatingPointState_entry(ppc_context_t& ctx) { DECLARE_XBOXKRNL_EXPORT1(KeSaveFloatingPointState, kNone, kImplemented); #endif +static qword_result_t KeQueryInterruptTime_entry(const ppc_context_t& ctx) { + auto kstate = ctx->kernel_state; + uint32_t ts_bundle = kstate->GetKeTimestampBundle(); + X_TIME_STAMP_BUNDLE* bundle = + ctx->TranslateVirtual(ts_bundle); + return xe::load_and_swap(&bundle->interrupt_time); +} +DECLARE_XBOXKRNL_EXPORT1(KeQueryInterruptTime, kNone, kImplemented); } // namespace xboxkrnl } // namespace kernel } // namespace xe diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_module.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_module.cc index f70f30bd0..fd4fe805b 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_module.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_module.cc @@ -78,8 +78,7 @@ bool XboxkrnlModule::SendPIXCommand(const char* cmd) { } XboxkrnlModule::XboxkrnlModule(Emulator* emulator, KernelState* kernel_state) - : KernelModule(kernel_state, "xe:\\xboxkrnl.exe"), - timestamp_timer_(nullptr) { + : KernelModule(kernel_state, "xe:\\xboxkrnl.exe") { RegisterExportTable(export_resolver_); // Register all exported functions. @@ -216,23 +215,9 @@ XboxkrnlModule::XboxkrnlModule(Emulator* emulator, KernelState* kernel_state) xe::store_and_swap(lpXboxKrnlVersion + 6, 0x80); xe::store_and_swap(lpXboxKrnlVersion + 7, 0x00); - // KeTimeStampBundle (ad) - // This must be updated during execution, at 1ms intevals. - // We setup a system timer here to do that. - uint32_t pKeTimeStampBundle = memory_->SystemHeapAlloc(24); - auto lpKeTimeStampBundle = memory_->TranslateVirtual(pKeTimeStampBundle); - export_resolver_->SetVariableMapping( - "xboxkrnl.exe", ordinals::KeTimeStampBundle, pKeTimeStampBundle); - xe::store_and_swap(lpKeTimeStampBundle + 0, 0); - xe::store_and_swap(lpKeTimeStampBundle + 8, 0); - xe::store_and_swap(lpKeTimeStampBundle + 16, - Clock::QueryGuestUptimeMillis()); - xe::store_and_swap(lpKeTimeStampBundle + 20, 0); - timestamp_timer_ = xe::threading::HighResolutionTimer::CreateRepeating( - std::chrono::milliseconds(1), [lpKeTimeStampBundle]() { - xe::store_and_swap(lpKeTimeStampBundle + 16, - Clock::QueryGuestUptimeMillis()); - }); + export_resolver_->SetVariableMapping("xboxkrnl.exe", + ordinals::KeTimeStampBundle, + kernel_state->GetKeTimestampBundle()); } static auto& get_xboxkrnl_exports() { diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_module.h b/src/xenia/kernel/xboxkrnl/xboxkrnl_module.h index 39a9c95b2..348f07d67 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_module.h +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_module.h @@ -40,9 +40,6 @@ class XboxkrnlModule : public KernelModule { protected: uint32_t pix_function_ = 0; - - private: - std::unique_ptr timestamp_timer_; }; } // namespace xboxkrnl diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc index f9a0301d3..6fbf2b0be 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc @@ -363,9 +363,18 @@ dword_result_t NtYieldExecution_entry() { DECLARE_XBOXKRNL_EXPORT2(NtYieldExecution, kThreading, kImplemented, kHighFrequency); -void KeQuerySystemTime_entry(lpqword_t time_ptr) { - uint64_t time = Clock::QueryGuestSystemTime(); +void KeQuerySystemTime_entry(lpqword_t time_ptr, const ppc_context_t& ctx) { if (time_ptr) { + // update the timestamp bundle to the time we queried. + // this is a race, but i don't of any sw that requires it, it just seems + // like we ought to keep it consistent with ketimestampbundle in case + // something uses this function, but also reads it directly + uint32_t ts_bundle = ctx->kernel_state->GetKeTimestampBundle(); + uint64_t time = Clock::QueryGuestSystemTime(); + //todo: cmpxchg? + xe::store_and_swap( + &ctx->TranslateVirtual(ts_bundle)->system_time, + time); *time_ptr = time; } }