From eee0bb070cbf41c08ed37e7f6f88a29345a709f7 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Sun, 27 Dec 2015 13:16:08 -0600 Subject: [PATCH] Store thread TLS in guest memory rather than using host APIs --- src/xenia/kernel/kernel_state.cc | 29 ++++++++ src/xenia/kernel/kernel_state.h | 6 ++ .../kernel/xboxkrnl/xboxkrnl_threading.cc | 71 +++++++------------ src/xenia/kernel/xthread.cc | 37 +++++++--- src/xenia/kernel/xthread.h | 4 ++ 5 files changed, 95 insertions(+), 52 deletions(-) diff --git a/src/xenia/kernel/kernel_state.cc b/src/xenia/kernel/kernel_state.cc index c2d559194..7e9bd48b3 100644 --- a/src/xenia/kernel/kernel_state.cc +++ b/src/xenia/kernel/kernel_state.cc @@ -79,6 +79,9 @@ KernelState::KernelState(Emulator* emulator) // TODO(benvanik): figure out what this list is. pib->unk_54 = pib->unk_58 = 0; + // Hardcoded maximum of 2048 TLS slots. + tls_bitmap_.Resize(64 * 4); + xam::AppManager::RegisterApps(this, app_manager_.get()); } @@ -136,6 +139,14 @@ void KernelState::set_process_type(uint32_t value) { pib->process_type = uint8_t(value); } +uint32_t KernelState::AllocateTLS() { + return uint32_t(tls_bitmap_.Acquire()); +} + +void KernelState::FreeTLS(uint32_t slot) { + tls_bitmap_.Release(slot); +} + void KernelState::RegisterTitleTerminateNotification(uint32_t routine, uint32_t priority) { TerminateNotification notify; @@ -428,6 +439,9 @@ void KernelState::TerminateTitle() { // Unregister all notify listeners. notify_listeners_.clear(); + // Clear the TLS map. + tls_bitmap_.Reset(); + if (XThread::IsInThread()) { threads_by_id_.erase(XThread::GetCurrentThread()->thread_id()); @@ -634,6 +648,13 @@ bool KernelState::Save(ByteStream* stream) { // Save the object table object_table_.Save(stream); + // Write the TLS allocation bitmap + auto tls_bitmap = tls_bitmap_.data(); + stream->Write(uint32_t(tls_bitmap.size())); + for (size_t i = 0; i < tls_bitmap.size(); i++) { + stream->Write(tls_bitmap[i]); + } + // We save XThreads absolutely first, as they will execute code upon save // (which could modify the kernel state) auto threads = object_table_.GetObjectsByType(); @@ -698,6 +719,14 @@ bool KernelState::Restore(ByteStream* stream) { // Restore the object table object_table_.Restore(stream); + // Read the TLS allocation bitmap + auto num_bitmap_entries = stream->Read(); + auto& tls_bitmap = tls_bitmap_.data(); + tls_bitmap.resize(num_bitmap_entries); + for (uint32_t i = 0; i < num_bitmap_entries; i++) { + tls_bitmap[i] = stream->Read(); + } + uint32_t num_threads = stream->Read(); XELOGD("Loading %d threads...", num_threads); for (uint32_t i = 0; i < num_threads; i++) { diff --git a/src/xenia/kernel/kernel_state.h b/src/xenia/kernel/kernel_state.h index 053e52851..691444aad 100644 --- a/src/xenia/kernel/kernel_state.h +++ b/src/xenia/kernel/kernel_state.h @@ -19,6 +19,7 @@ #include #include +#include "xenia/base/bit_map.h" #include "xenia/base/mutex.h" #include "xenia/cpu/export_resolver.h" #include "xenia/kernel/util/native_list.h" @@ -115,6 +116,9 @@ class KernelState { return process_info_block_address_; } + uint32_t AllocateTLS(); + void FreeTLS(uint32_t slot); + void RegisterTitleTerminateNotification(uint32_t routine, uint32_t priority); void RemoveTitleTerminateNotification(uint32_t routine); @@ -209,6 +213,8 @@ class KernelState { std::condition_variable_any dispatch_cond_; std::list> dispatch_queue_; + BitMap tls_bitmap_; + friend class XObject; }; diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc index ec37bcea9..2ee7372d8 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc @@ -382,62 +382,50 @@ SHIM_CALL KeQuerySystemTime_shim(PPCContext* ppc_context, // hoping for the best. // http://msdn.microsoft.com/en-us/library/ms686801 -SHIM_CALL KeTlsAlloc_shim(PPCContext* ppc_context, KernelState* kernel_state) { - XELOGD("KeTlsAlloc()"); +dword_result_t KeTlsAlloc() { + uint32_t slot = kernel_state()->AllocateTLS(); + XThread::GetCurrentThread()->SetTLSValue(slot, 0); - auto tls_index = xe::threading::AllocateTlsHandle(); - if (tls_index == xe::threading::kInvalidTlsHandle) { - tls_index = X_TLS_OUT_OF_INDEXES; - } - - SHIM_SET_RETURN_32(tls_index); + return slot; } +DECLARE_XBOXKRNL_EXPORT(KeTlsAlloc, ExportTag::kImplemented); // http://msdn.microsoft.com/en-us/library/ms686804 -SHIM_CALL KeTlsFree_shim(PPCContext* ppc_context, KernelState* kernel_state) { - uint32_t tls_index = SHIM_GET_ARG_32(0); - - XELOGD("KeTlsFree(%.8X)", tls_index); - +dword_result_t KeTlsFree(dword_t tls_index) { if (tls_index == X_TLS_OUT_OF_INDEXES) { - SHIM_SET_RETURN_32(0); - return; + return 0; } - uint32_t result = xe::threading::FreeTlsHandle(tls_index) ? 1 : 0; - SHIM_SET_RETURN_32(result); + kernel_state()->FreeTLS(tls_index); + return 1; } +DECLARE_XBOXKRNL_EXPORT(KeTlsFree, ExportTag::kImplemented); // http://msdn.microsoft.com/en-us/library/ms686812 -SHIM_CALL KeTlsGetValue_shim(PPCContext* ppc_context, - KernelState* kernel_state) { - uint32_t tls_index = SHIM_GET_ARG_32(0); - - // Logging disabled, as some games spam this. - // XELOGD( - // "KeTlsGetValue(%.8X)", - // tls_index); - - uint32_t value = static_cast(xe::threading::GetTlsValue(tls_index)); - if (!value) { - // XELOGW("KeTlsGetValue should SetLastError if result is NULL"); - // TODO(benvanik): SetLastError? Or does user code do this? +dword_result_t KeTlsGetValue(dword_t tls_index) { + // xboxkrnl doesn't actually have an error branch - it always succeeds, even + // if it overflows the TLS. + uint32_t value = 0; + if (XThread::GetCurrentThread()->GetTLSValue(tls_index, &value)) { + return value; } - SHIM_SET_RETURN_32(value); + return 0; } +DECLARE_XBOXKRNL_EXPORT(KeTlsGetValue, + ExportTag::kImplemented | ExportTag::kHighFrequency); // http://msdn.microsoft.com/en-us/library/ms686818 -SHIM_CALL KeTlsSetValue_shim(PPCContext* ppc_context, - KernelState* kernel_state) { - uint32_t tls_index = SHIM_GET_ARG_32(0); - uint32_t tls_value = SHIM_GET_ARG_32(1); +dword_result_t KeTlsSetValue(dword_t tls_index, dword_t tls_value) { + // xboxkrnl doesn't actually have an error branch - it always succeeds, even + // if it overflows the TLS. + if (XThread::GetCurrentThread()->SetTLSValue(tls_index, tls_value)) { + return 1; + } - XELOGD("KeTlsSetValue(%.8X, %.8X)", tls_index, tls_value); - - uint32_t result = xe::threading::SetTlsValue(tls_index, tls_value) ? 1 : 0; - SHIM_SET_RETURN_32(result); + return 0; } +DECLARE_XBOXKRNL_EXPORT(KeTlsSetValue, ExportTag::kImplemented); void KeInitializeEvent(pointer_t event_ptr, dword_t event_type, dword_t initial_state) { @@ -1362,11 +1350,6 @@ void RegisterThreadingExports(xe::cpu::ExportResolver* export_resolver, SHIM_SET_MAPPING("xboxkrnl.exe", NtYieldExecution, state); SHIM_SET_MAPPING("xboxkrnl.exe", KeQuerySystemTime, state); - SHIM_SET_MAPPING("xboxkrnl.exe", KeTlsAlloc, state); - SHIM_SET_MAPPING("xboxkrnl.exe", KeTlsFree, state); - SHIM_SET_MAPPING("xboxkrnl.exe", KeTlsGetValue, state); - SHIM_SET_MAPPING("xboxkrnl.exe", KeTlsSetValue, state); - SHIM_SET_MAPPING("xboxkrnl.exe", NtCreateSemaphore, state); SHIM_SET_MAPPING("xboxkrnl.exe", NtReleaseSemaphore, state); diff --git a/src/xenia/kernel/xthread.cc b/src/xenia/kernel/xthread.cc index 1c70663fc..72e7c9e74 100644 --- a/src/xenia/kernel/xthread.cc +++ b/src/xenia/kernel/xthread.cc @@ -281,7 +281,7 @@ X_STATUS XThread::Create() { module->GetOptHeader(XEX_HEADER_TLS_INFO, &tls_header); } - const uint32_t kDefaultTlsSlotCount = 32; + const uint32_t kDefaultTlsSlotCount = 1024; uint32_t tls_slots = kDefaultTlsSlotCount; uint32_t tls_extended_size = 0; if (tls_header && tls_header->slot_count) { @@ -293,16 +293,16 @@ X_STATUS XThread::Create() { // HACK: we're currently not using the extra memory allocated for TLS slots // and instead relying on native TLS slots, so don't allocate anything for // the slots. - uint32_t tls_slot_size = 0; // tls_slots * 4; - uint32_t tls_total_size = tls_slot_size + tls_extended_size; - tls_address_ = memory()->SystemHeapAlloc(tls_total_size); + uint32_t tls_slot_size = tls_slots * 4; + tls_total_size_ = tls_slot_size + tls_extended_size; + tls_address_ = memory()->SystemHeapAlloc(tls_total_size_); if (!tls_address_) { XELOGW("Unable to allocate thread local storage block"); return X_STATUS_NO_MEMORY; } // Zero all of TLS. - memory()->Fill(tls_address_, tls_total_size, 0); + memory()->Fill(tls_address_, tls_total_size_, 0); if (tls_extended_size) { // If game has extended data, copy in the default values. assert_not_zero(tls_header->raw_data_address); @@ -441,7 +441,6 @@ X_STATUS XThread::Exit(int exit_code) { running_ = false; Release(); - ReleaseHandle(); // NOTE: this does not return! xe::threading::Thread::Exit(exit_code); @@ -462,7 +461,6 @@ X_STATUS XThread::Terminate(int exit_code) { running_ = false; Release(); - ReleaseHandle(); thread_->Terminate(exit_code); return X_STATUS_SUCCESS; @@ -479,7 +477,7 @@ void XThread::Execute() { // All threads get a mandatory sleep. This is to deal with some buggy // games that are assuming the 360 is so slow to create threads that they // have time to initialize shared structures AFTER CreateThread (RR). - xe::threading::Sleep(std::chrono::milliseconds(100)); + xe::threading::Sleep(std::chrono::milliseconds(10)); int exit_code = 0; @@ -708,6 +706,26 @@ void XThread::SetActiveCpu(uint32_t cpu_index) { xe::store_and_swap(pcr + 0x10C, cpu_index); } +bool XThread::GetTLSValue(uint32_t slot, uint32_t* value_out) { + if (slot * 4 > tls_total_size_) { + return false; + } + + auto mem = memory()->TranslateVirtual(tls_address_ + slot * 4); + *value_out = xe::load_and_swap(mem); + return true; +} + +bool XThread::SetTLSValue(uint32_t slot, uint32_t value) { + if (slot * 4 >= tls_total_size_) { + return false; + } + + auto mem = memory()->TranslateVirtual(tls_address_ + slot * 4); + xe::store_and_swap(mem, value); + return true; +} + uint32_t XThread::suspend_count() { return guest_object()->suspend_count; } @@ -1021,6 +1039,7 @@ struct ThreadSavedState { uint32_t apc_head; uint32_t tls_address; + uint32_t tls_total_size; uint32_t pcr_address; uint32_t stack_base; // High address uint32_t stack_limit; // Low address @@ -1076,6 +1095,7 @@ bool XThread::Save(ByteStream* stream) { state.is_running = running_; state.apc_head = apc_list_.head(); state.tls_address = tls_address_; + state.tls_total_size = tls_total_size_; state.pcr_address = pcr_address_; state.stack_base = stack_base_; state.stack_limit = stack_limit_; @@ -1140,6 +1160,7 @@ object_ref XThread::Restore(KernelState* kernel_state, thread->main_thread_ = state.is_main_thread; thread->apc_list_.set_head(state.apc_head); thread->tls_address_ = state.tls_address; + thread->tls_total_size_ = state.tls_total_size; thread->pcr_address_ = state.pcr_address; thread->stack_base_ = state.stack_base; thread->stack_limit_ = state.stack_limit; diff --git a/src/xenia/kernel/xthread.h b/src/xenia/kernel/xthread.h index 1d5b34ee9..a8a137702 100644 --- a/src/xenia/kernel/xthread.h +++ b/src/xenia/kernel/xthread.h @@ -175,6 +175,9 @@ class XThread : public XObject { uint32_t active_cpu() const; void SetActiveCpu(uint32_t cpu_index); + bool GetTLSValue(uint32_t slot, uint32_t* value_out); + bool SetTLSValue(uint32_t slot, uint32_t value); + uint32_t suspend_count(); X_STATUS Resume(uint32_t* out_suspend_count = nullptr); X_STATUS Suspend(uint32_t* out_suspend_count = nullptr); @@ -212,6 +215,7 @@ class XThread : public XObject { uint32_t scratch_address_ = 0; uint32_t scratch_size_ = 0; uint32_t tls_address_ = 0; + uint32_t tls_total_size_ = 0; uint32_t pcr_address_ = 0; uint32_t stack_alloc_base_ = 0; // Stack alloc base uint32_t stack_alloc_size_ = 0; // Stack alloc size