From a3196171853c7f3d61011da0f42309bef32fb836 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 14 Nov 2020 18:09:47 +0300 Subject: [PATCH] [Kernel] Thread affinity cleanup --- .../kernel/xboxkrnl/xboxkrnl_threading.cc | 10 +-- src/xenia/kernel/xthread.cc | 79 +++++++++---------- src/xenia/kernel/xthread.h | 12 ++- 3 files changed, 53 insertions(+), 48 deletions(-) diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc index 29b064841..1f0cd2cc2 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc @@ -224,17 +224,17 @@ DECLARE_XBOXKRNL_EXPORT1(KeSetCurrentStackPointers, kThreading, kImplemented); dword_result_t KeSetAffinityThread(lpvoid_t thread_ptr, dword_t affinity, lpdword_t previous_affinity_ptr) { - // Xbox 360 uses additional parameter (in comparation to NT equivalent) - // which is used only for returning previous thread affinity. (Based on code - // dissasembly) + // The Xbox 360, according to disassembly of KeSetAffinityThread, unlike + // Windows NT, stores the previous affinity via the pointer provided as an + // argument, not in the return value - the return value is used for the + // result. if (!affinity) { return X_STATUS_INVALID_PARAMETER; } - auto thread = XObject::GetNativeObject(kernel_state(), thread_ptr); if (thread) { if (previous_affinity_ptr) { - *previous_affinity_ptr = 1 << thread->active_cpu(); + *previous_affinity_ptr = uint32_t(1) << thread->active_cpu(); } thread->SetAffinity(affinity); } diff --git a/src/xenia/kernel/xthread.cc b/src/xenia/kernel/xthread.cc index 1e4753053..1e723ff65 100644 --- a/src/xenia/kernel/xthread.cc +++ b/src/xenia/kernel/xthread.cc @@ -156,11 +156,17 @@ void XThread::set_name(const std::string_view name) { } } -uint8_t next_cpu = 0; -uint8_t GetFakeCpuNumber(uint8_t proc_mask) { +static uint8_t next_cpu = 0; +static uint8_t GetFakeCpuNumber(uint8_t proc_mask) { + // NOTE: proc_mask is logical processors, not physical processors or cores. if (!proc_mask) { next_cpu = (next_cpu + 1) % 6; return next_cpu; // is this reasonable? + // TODO(Triang3l): Does the following apply here? + // https://docs.microsoft.com/en-us/windows/win32/dxtecharts/coding-for-multiple-cores + // "On Xbox 360, you must explicitly assign software threads to a particular + // hardware thread by using XSetThreadProcessor. Otherwise, all child + // threads will stay on the same hardware thread as the parent." } assert_false(proc_mask & 0xC0); @@ -205,7 +211,7 @@ void XThread::InitializeGuestObject() { // 0xA88 = APC // 0x18 = timer xe::store_and_swap(p + 0x09C, 0xFDFFD7FF); - xe::store_and_swap(p + 0xBF, 0); + // current_cpu is expected to be initialized externally via SetActiveCpu. xe::store_and_swap(p + 0x0D0, stack_base_); xe::store_and_swap(p + 0x130, Clock::QueryGuestSystemTime()); xe::store_and_swap(p + 0x144, guest_object() + 0x144); @@ -347,6 +353,9 @@ X_STATUS XThread::Create() { // Exports use this to get the kernel. thread_state_->context()->kernel_state = kernel_state_; + uint8_t cpu_index = GetFakeCpuNumber( + static_cast(creation_params_.creation_flags >> 24)); + // Initialize the KTHREAD object. InitializeGuestObject(); @@ -361,10 +370,9 @@ X_STATUS XThread::Create() { pcr->dpc_active = 0; // DPC active bool? - uint8_t proc_mask = - static_cast(creation_params_.creation_flags >> 24); - // Assign cpu core used by thread on guest side - SetAffinity(1 << GetFakeCpuNumber(proc_mask)); + // Assign the thread to the logical processor, and also set up the current CPU + // in KPCR and KTHREAD. + SetActiveCpu(cpu_index); // Always retain when starting - the thread owns itself until exited. RetainHandle(); @@ -417,10 +425,6 @@ X_STATUS XThread::Create() { return X_STATUS_NO_MEMORY; } - if (!cvars::ignore_thread_affinities) { - thread_->set_affinity_mask(proc_mask); - } - // Set the thread name based on host ID (for easier debugging). if (thread_name_.empty()) { set_name(fmt::format("XThread{:04X}", thread_->system_id())); @@ -702,40 +706,33 @@ void XThread::SetPriority(int32_t increment) { } void XThread::SetAffinity(uint32_t affinity) { - // Affinity mask, as in SetThreadAffinityMask. - // Xbox thread IDs: - // 0 - core 0, thread 0 - user - // 1 - core 0, thread 1 - user - // 2 - core 1, thread 0 - sometimes xcontent - // 3 - core 1, thread 1 - user - // 4 - core 2, thread 0 - xaudio - // 5 - core 2, thread 1 - user - // TODO(benvanik): implement better thread distribution. - // NOTE: these are logical processors, not physical processors or cores. + SetActiveCpu(GetFakeCpuNumber(affinity)); +} + +uint8_t XThread::active_cpu() const { + const X_KPCR& pcr = *memory()->TranslateVirtual(pcr_address_); + return pcr.current_cpu; +} + +void XThread::SetActiveCpu(uint8_t cpu_index) { + // May be called during thread creation - don't skip if current == new. + + assert_true(cpu_index < 6); + + X_KPCR& pcr = *memory()->TranslateVirtual(pcr_address_); + pcr.current_cpu = cpu_index; + + if (is_guest_thread()) { + X_KTHREAD& thread_object = + *memory()->TranslateVirtual(guest_object()); + thread_object.current_cpu = cpu_index; + } + if (xe::threading::logical_processor_count() < 6) { XELOGW("Too few processors - scheduling will be wonky"); } - SetActiveCpu(GetFakeCpuNumber(affinity)); - if (!cvars::ignore_thread_affinities) { - thread_->set_affinity_mask(affinity); - } -} - -uint32_t XThread::active_cpu() const { - uint8_t* pcr = memory()->TranslateVirtual(pcr_address_); - return xe::load_and_swap(pcr + 0x10C); -} - -void XThread::SetActiveCpu(uint32_t cpu_index) { - assert_true(cpu_index < 6); - uint8_t* pcr = memory()->TranslateVirtual(pcr_address_); - xe::store_and_swap(pcr + 0x10C, cpu_index); - - if (is_guest_thread()) { - X_KTHREAD* thread_object = - memory()->TranslateVirtual(guest_object()); - thread_object->current_cpu = cpu_index; + thread_->set_affinity_mask(uint64_t(1) << cpu_index); } } diff --git a/src/xenia/kernel/xthread.h b/src/xenia/kernel/xthread.h index de813bb49..7ab55c686 100644 --- a/src/xenia/kernel/xthread.h +++ b/src/xenia/kernel/xthread.h @@ -166,9 +166,17 @@ class XThread : public XObject, public cpu::Thread { int32_t priority() const { return priority_; } int32_t QueryPriority(); void SetPriority(int32_t increment); + + // Xbox thread IDs: + // 0 - core 0, thread 0 - user + // 1 - core 0, thread 1 - user + // 2 - core 1, thread 0 - sometimes xcontent + // 3 - core 1, thread 1 - user + // 4 - core 2, thread 0 - xaudio + // 5 - core 2, thread 1 - user void SetAffinity(uint32_t affinity); - uint32_t active_cpu() const; - void SetActiveCpu(uint32_t cpu_index); + uint8_t active_cpu() const; + void SetActiveCpu(uint8_t cpu_index); bool GetTLSValue(uint32_t slot, uint32_t* value_out); bool SetTLSValue(uint32_t slot, uint32_t value);