Store thread TLS in guest memory rather than using host APIs

This commit is contained in:
Dr. Chat 2015-12-27 13:16:08 -06:00 committed by Ben Vanik
parent c5ac4185ac
commit eee0bb070c
5 changed files with 95 additions and 52 deletions

View File

@ -79,6 +79,9 @@ KernelState::KernelState(Emulator* emulator)
// TODO(benvanik): figure out what this list is. // TODO(benvanik): figure out what this list is.
pib->unk_54 = pib->unk_58 = 0; pib->unk_54 = pib->unk_58 = 0;
// Hardcoded maximum of 2048 TLS slots.
tls_bitmap_.Resize(64 * 4);
xam::AppManager::RegisterApps(this, app_manager_.get()); xam::AppManager::RegisterApps(this, app_manager_.get());
} }
@ -136,6 +139,14 @@ void KernelState::set_process_type(uint32_t value) {
pib->process_type = uint8_t(value); pib->process_type = uint8_t(value);
} }
uint32_t KernelState::AllocateTLS() {
return uint32_t(tls_bitmap_.Acquire());
}
void KernelState::FreeTLS(uint32_t slot) {
tls_bitmap_.Release(slot);
}
void KernelState::RegisterTitleTerminateNotification(uint32_t routine, void KernelState::RegisterTitleTerminateNotification(uint32_t routine,
uint32_t priority) { uint32_t priority) {
TerminateNotification notify; TerminateNotification notify;
@ -428,6 +439,9 @@ void KernelState::TerminateTitle() {
// Unregister all notify listeners. // Unregister all notify listeners.
notify_listeners_.clear(); notify_listeners_.clear();
// Clear the TLS map.
tls_bitmap_.Reset();
if (XThread::IsInThread()) { if (XThread::IsInThread()) {
threads_by_id_.erase(XThread::GetCurrentThread()->thread_id()); threads_by_id_.erase(XThread::GetCurrentThread()->thread_id());
@ -634,6 +648,13 @@ bool KernelState::Save(ByteStream* stream) {
// Save the object table // Save the object table
object_table_.Save(stream); object_table_.Save(stream);
// Write the TLS allocation bitmap
auto tls_bitmap = tls_bitmap_.data();
stream->Write(uint32_t(tls_bitmap.size()));
for (size_t i = 0; i < tls_bitmap.size(); i++) {
stream->Write<uint32_t>(tls_bitmap[i]);
}
// We save XThreads absolutely first, as they will execute code upon save // We save XThreads absolutely first, as they will execute code upon save
// (which could modify the kernel state) // (which could modify the kernel state)
auto threads = object_table_.GetObjectsByType<XThread>(); auto threads = object_table_.GetObjectsByType<XThread>();
@ -698,6 +719,14 @@ bool KernelState::Restore(ByteStream* stream) {
// Restore the object table // Restore the object table
object_table_.Restore(stream); object_table_.Restore(stream);
// Read the TLS allocation bitmap
auto num_bitmap_entries = stream->Read<uint32_t>();
auto& tls_bitmap = tls_bitmap_.data();
tls_bitmap.resize(num_bitmap_entries);
for (uint32_t i = 0; i < num_bitmap_entries; i++) {
tls_bitmap[i] = stream->Read<uint32_t>();
}
uint32_t num_threads = stream->Read<uint32_t>(); uint32_t num_threads = stream->Read<uint32_t>();
XELOGD("Loading %d threads...", num_threads); XELOGD("Loading %d threads...", num_threads);
for (uint32_t i = 0; i < num_threads; i++) { for (uint32_t i = 0; i < num_threads; i++) {

View File

@ -19,6 +19,7 @@
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "xenia/base/bit_map.h"
#include "xenia/base/mutex.h" #include "xenia/base/mutex.h"
#include "xenia/cpu/export_resolver.h" #include "xenia/cpu/export_resolver.h"
#include "xenia/kernel/util/native_list.h" #include "xenia/kernel/util/native_list.h"
@ -115,6 +116,9 @@ class KernelState {
return process_info_block_address_; return process_info_block_address_;
} }
uint32_t AllocateTLS();
void FreeTLS(uint32_t slot);
void RegisterTitleTerminateNotification(uint32_t routine, uint32_t priority); void RegisterTitleTerminateNotification(uint32_t routine, uint32_t priority);
void RemoveTitleTerminateNotification(uint32_t routine); void RemoveTitleTerminateNotification(uint32_t routine);
@ -209,6 +213,8 @@ class KernelState {
std::condition_variable_any dispatch_cond_; std::condition_variable_any dispatch_cond_;
std::list<std::function<void()>> dispatch_queue_; std::list<std::function<void()>> dispatch_queue_;
BitMap tls_bitmap_;
friend class XObject; friend class XObject;
}; };

View File

@ -382,62 +382,50 @@ SHIM_CALL KeQuerySystemTime_shim(PPCContext* ppc_context,
// hoping for the best. // hoping for the best.
// http://msdn.microsoft.com/en-us/library/ms686801 // http://msdn.microsoft.com/en-us/library/ms686801
SHIM_CALL KeTlsAlloc_shim(PPCContext* ppc_context, KernelState* kernel_state) { dword_result_t KeTlsAlloc() {
XELOGD("KeTlsAlloc()"); uint32_t slot = kernel_state()->AllocateTLS();
XThread::GetCurrentThread()->SetTLSValue(slot, 0);
auto tls_index = xe::threading::AllocateTlsHandle(); return slot;
if (tls_index == xe::threading::kInvalidTlsHandle) {
tls_index = X_TLS_OUT_OF_INDEXES;
}
SHIM_SET_RETURN_32(tls_index);
} }
DECLARE_XBOXKRNL_EXPORT(KeTlsAlloc, ExportTag::kImplemented);
// http://msdn.microsoft.com/en-us/library/ms686804 // http://msdn.microsoft.com/en-us/library/ms686804
SHIM_CALL KeTlsFree_shim(PPCContext* ppc_context, KernelState* kernel_state) { dword_result_t KeTlsFree(dword_t tls_index) {
uint32_t tls_index = SHIM_GET_ARG_32(0);
XELOGD("KeTlsFree(%.8X)", tls_index);
if (tls_index == X_TLS_OUT_OF_INDEXES) { if (tls_index == X_TLS_OUT_OF_INDEXES) {
SHIM_SET_RETURN_32(0); return 0;
return;
} }
uint32_t result = xe::threading::FreeTlsHandle(tls_index) ? 1 : 0; kernel_state()->FreeTLS(tls_index);
SHIM_SET_RETURN_32(result); return 1;
} }
DECLARE_XBOXKRNL_EXPORT(KeTlsFree, ExportTag::kImplemented);
// http://msdn.microsoft.com/en-us/library/ms686812 // http://msdn.microsoft.com/en-us/library/ms686812
SHIM_CALL KeTlsGetValue_shim(PPCContext* ppc_context, dword_result_t KeTlsGetValue(dword_t tls_index) {
KernelState* kernel_state) { // xboxkrnl doesn't actually have an error branch - it always succeeds, even
uint32_t tls_index = SHIM_GET_ARG_32(0); // if it overflows the TLS.
uint32_t value = 0;
// Logging disabled, as some games spam this. if (XThread::GetCurrentThread()->GetTLSValue(tls_index, &value)) {
// XELOGD( return value;
// "KeTlsGetValue(%.8X)",
// tls_index);
uint32_t value = static_cast<uint32_t>(xe::threading::GetTlsValue(tls_index));
if (!value) {
// XELOGW("KeTlsGetValue should SetLastError if result is NULL");
// TODO(benvanik): SetLastError? Or does user code do this?
} }
SHIM_SET_RETURN_32(value); return 0;
} }
DECLARE_XBOXKRNL_EXPORT(KeTlsGetValue,
ExportTag::kImplemented | ExportTag::kHighFrequency);
// http://msdn.microsoft.com/en-us/library/ms686818 // http://msdn.microsoft.com/en-us/library/ms686818
SHIM_CALL KeTlsSetValue_shim(PPCContext* ppc_context, dword_result_t KeTlsSetValue(dword_t tls_index, dword_t tls_value) {
KernelState* kernel_state) { // xboxkrnl doesn't actually have an error branch - it always succeeds, even
uint32_t tls_index = SHIM_GET_ARG_32(0); // if it overflows the TLS.
uint32_t tls_value = SHIM_GET_ARG_32(1); if (XThread::GetCurrentThread()->SetTLSValue(tls_index, tls_value)) {
return 1;
}
XELOGD("KeTlsSetValue(%.8X, %.8X)", tls_index, tls_value); return 0;
uint32_t result = xe::threading::SetTlsValue(tls_index, tls_value) ? 1 : 0;
SHIM_SET_RETURN_32(result);
} }
DECLARE_XBOXKRNL_EXPORT(KeTlsSetValue, ExportTag::kImplemented);
void KeInitializeEvent(pointer_t<X_KEVENT> event_ptr, dword_t event_type, void KeInitializeEvent(pointer_t<X_KEVENT> event_ptr, dword_t event_type,
dword_t initial_state) { dword_t initial_state) {
@ -1362,11 +1350,6 @@ void RegisterThreadingExports(xe::cpu::ExportResolver* export_resolver,
SHIM_SET_MAPPING("xboxkrnl.exe", NtYieldExecution, state); SHIM_SET_MAPPING("xboxkrnl.exe", NtYieldExecution, state);
SHIM_SET_MAPPING("xboxkrnl.exe", KeQuerySystemTime, state); SHIM_SET_MAPPING("xboxkrnl.exe", KeQuerySystemTime, state);
SHIM_SET_MAPPING("xboxkrnl.exe", KeTlsAlloc, state);
SHIM_SET_MAPPING("xboxkrnl.exe", KeTlsFree, state);
SHIM_SET_MAPPING("xboxkrnl.exe", KeTlsGetValue, state);
SHIM_SET_MAPPING("xboxkrnl.exe", KeTlsSetValue, state);
SHIM_SET_MAPPING("xboxkrnl.exe", NtCreateSemaphore, state); SHIM_SET_MAPPING("xboxkrnl.exe", NtCreateSemaphore, state);
SHIM_SET_MAPPING("xboxkrnl.exe", NtReleaseSemaphore, state); SHIM_SET_MAPPING("xboxkrnl.exe", NtReleaseSemaphore, state);

View File

@ -281,7 +281,7 @@ X_STATUS XThread::Create() {
module->GetOptHeader(XEX_HEADER_TLS_INFO, &tls_header); module->GetOptHeader(XEX_HEADER_TLS_INFO, &tls_header);
} }
const uint32_t kDefaultTlsSlotCount = 32; const uint32_t kDefaultTlsSlotCount = 1024;
uint32_t tls_slots = kDefaultTlsSlotCount; uint32_t tls_slots = kDefaultTlsSlotCount;
uint32_t tls_extended_size = 0; uint32_t tls_extended_size = 0;
if (tls_header && tls_header->slot_count) { if (tls_header && tls_header->slot_count) {
@ -293,16 +293,16 @@ X_STATUS XThread::Create() {
// HACK: we're currently not using the extra memory allocated for TLS slots // HACK: we're currently not using the extra memory allocated for TLS slots
// and instead relying on native TLS slots, so don't allocate anything for // and instead relying on native TLS slots, so don't allocate anything for
// the slots. // the slots.
uint32_t tls_slot_size = 0; // tls_slots * 4; uint32_t tls_slot_size = tls_slots * 4;
uint32_t tls_total_size = tls_slot_size + tls_extended_size; tls_total_size_ = tls_slot_size + tls_extended_size;
tls_address_ = memory()->SystemHeapAlloc(tls_total_size); tls_address_ = memory()->SystemHeapAlloc(tls_total_size_);
if (!tls_address_) { if (!tls_address_) {
XELOGW("Unable to allocate thread local storage block"); XELOGW("Unable to allocate thread local storage block");
return X_STATUS_NO_MEMORY; return X_STATUS_NO_MEMORY;
} }
// Zero all of TLS. // Zero all of TLS.
memory()->Fill(tls_address_, tls_total_size, 0); memory()->Fill(tls_address_, tls_total_size_, 0);
if (tls_extended_size) { if (tls_extended_size) {
// If game has extended data, copy in the default values. // If game has extended data, copy in the default values.
assert_not_zero(tls_header->raw_data_address); assert_not_zero(tls_header->raw_data_address);
@ -441,7 +441,6 @@ X_STATUS XThread::Exit(int exit_code) {
running_ = false; running_ = false;
Release(); Release();
ReleaseHandle();
// NOTE: this does not return! // NOTE: this does not return!
xe::threading::Thread::Exit(exit_code); xe::threading::Thread::Exit(exit_code);
@ -462,7 +461,6 @@ X_STATUS XThread::Terminate(int exit_code) {
running_ = false; running_ = false;
Release(); Release();
ReleaseHandle();
thread_->Terminate(exit_code); thread_->Terminate(exit_code);
return X_STATUS_SUCCESS; return X_STATUS_SUCCESS;
@ -479,7 +477,7 @@ void XThread::Execute() {
// All threads get a mandatory sleep. This is to deal with some buggy // All threads get a mandatory sleep. This is to deal with some buggy
// games that are assuming the 360 is so slow to create threads that they // games that are assuming the 360 is so slow to create threads that they
// have time to initialize shared structures AFTER CreateThread (RR). // have time to initialize shared structures AFTER CreateThread (RR).
xe::threading::Sleep(std::chrono::milliseconds(100)); xe::threading::Sleep(std::chrono::milliseconds(10));
int exit_code = 0; int exit_code = 0;
@ -708,6 +706,26 @@ void XThread::SetActiveCpu(uint32_t cpu_index) {
xe::store_and_swap<uint8_t>(pcr + 0x10C, cpu_index); xe::store_and_swap<uint8_t>(pcr + 0x10C, cpu_index);
} }
bool XThread::GetTLSValue(uint32_t slot, uint32_t* value_out) {
if (slot * 4 > tls_total_size_) {
return false;
}
auto mem = memory()->TranslateVirtual(tls_address_ + slot * 4);
*value_out = xe::load_and_swap<uint32_t>(mem);
return true;
}
bool XThread::SetTLSValue(uint32_t slot, uint32_t value) {
if (slot * 4 >= tls_total_size_) {
return false;
}
auto mem = memory()->TranslateVirtual(tls_address_ + slot * 4);
xe::store_and_swap<uint32_t>(mem, value);
return true;
}
uint32_t XThread::suspend_count() { uint32_t XThread::suspend_count() {
return guest_object<X_KTHREAD>()->suspend_count; return guest_object<X_KTHREAD>()->suspend_count;
} }
@ -1021,6 +1039,7 @@ struct ThreadSavedState {
uint32_t apc_head; uint32_t apc_head;
uint32_t tls_address; uint32_t tls_address;
uint32_t tls_total_size;
uint32_t pcr_address; uint32_t pcr_address;
uint32_t stack_base; // High address uint32_t stack_base; // High address
uint32_t stack_limit; // Low address uint32_t stack_limit; // Low address
@ -1076,6 +1095,7 @@ bool XThread::Save(ByteStream* stream) {
state.is_running = running_; state.is_running = running_;
state.apc_head = apc_list_.head(); state.apc_head = apc_list_.head();
state.tls_address = tls_address_; state.tls_address = tls_address_;
state.tls_total_size = tls_total_size_;
state.pcr_address = pcr_address_; state.pcr_address = pcr_address_;
state.stack_base = stack_base_; state.stack_base = stack_base_;
state.stack_limit = stack_limit_; state.stack_limit = stack_limit_;
@ -1140,6 +1160,7 @@ object_ref<XThread> XThread::Restore(KernelState* kernel_state,
thread->main_thread_ = state.is_main_thread; thread->main_thread_ = state.is_main_thread;
thread->apc_list_.set_head(state.apc_head); thread->apc_list_.set_head(state.apc_head);
thread->tls_address_ = state.tls_address; thread->tls_address_ = state.tls_address;
thread->tls_total_size_ = state.tls_total_size;
thread->pcr_address_ = state.pcr_address; thread->pcr_address_ = state.pcr_address;
thread->stack_base_ = state.stack_base; thread->stack_base_ = state.stack_base;
thread->stack_limit_ = state.stack_limit; thread->stack_limit_ = state.stack_limit;

View File

@ -175,6 +175,9 @@ class XThread : public XObject {
uint32_t active_cpu() const; uint32_t active_cpu() const;
void SetActiveCpu(uint32_t cpu_index); void SetActiveCpu(uint32_t cpu_index);
bool GetTLSValue(uint32_t slot, uint32_t* value_out);
bool SetTLSValue(uint32_t slot, uint32_t value);
uint32_t suspend_count(); uint32_t suspend_count();
X_STATUS Resume(uint32_t* out_suspend_count = nullptr); X_STATUS Resume(uint32_t* out_suspend_count = nullptr);
X_STATUS Suspend(uint32_t* out_suspend_count = nullptr); X_STATUS Suspend(uint32_t* out_suspend_count = nullptr);
@ -212,6 +215,7 @@ class XThread : public XObject {
uint32_t scratch_address_ = 0; uint32_t scratch_address_ = 0;
uint32_t scratch_size_ = 0; uint32_t scratch_size_ = 0;
uint32_t tls_address_ = 0; uint32_t tls_address_ = 0;
uint32_t tls_total_size_ = 0;
uint32_t pcr_address_ = 0; uint32_t pcr_address_ = 0;
uint32_t stack_alloc_base_ = 0; // Stack alloc base uint32_t stack_alloc_base_ = 0; // Stack alloc base
uint32_t stack_alloc_size_ = 0; // Stack alloc size uint32_t stack_alloc_size_ = 0; // Stack alloc size