Store thread TLS in guest memory rather than using host APIs
This commit is contained in:
parent
c5ac4185ac
commit
eee0bb070c
|
@ -79,6 +79,9 @@ KernelState::KernelState(Emulator* emulator)
|
|||
// TODO(benvanik): figure out what this list is.
|
||||
pib->unk_54 = pib->unk_58 = 0;
|
||||
|
||||
// Hardcoded maximum of 2048 TLS slots.
|
||||
tls_bitmap_.Resize(64 * 4);
|
||||
|
||||
xam::AppManager::RegisterApps(this, app_manager_.get());
|
||||
}
|
||||
|
||||
|
@ -136,6 +139,14 @@ void KernelState::set_process_type(uint32_t value) {
|
|||
pib->process_type = uint8_t(value);
|
||||
}
|
||||
|
||||
uint32_t KernelState::AllocateTLS() {
|
||||
return uint32_t(tls_bitmap_.Acquire());
|
||||
}
|
||||
|
||||
void KernelState::FreeTLS(uint32_t slot) {
|
||||
tls_bitmap_.Release(slot);
|
||||
}
|
||||
|
||||
void KernelState::RegisterTitleTerminateNotification(uint32_t routine,
|
||||
uint32_t priority) {
|
||||
TerminateNotification notify;
|
||||
|
@ -428,6 +439,9 @@ void KernelState::TerminateTitle() {
|
|||
// Unregister all notify listeners.
|
||||
notify_listeners_.clear();
|
||||
|
||||
// Clear the TLS map.
|
||||
tls_bitmap_.Reset();
|
||||
|
||||
if (XThread::IsInThread()) {
|
||||
threads_by_id_.erase(XThread::GetCurrentThread()->thread_id());
|
||||
|
||||
|
@ -634,6 +648,13 @@ bool KernelState::Save(ByteStream* stream) {
|
|||
// Save the object table
|
||||
object_table_.Save(stream);
|
||||
|
||||
// Write the TLS allocation bitmap
|
||||
auto tls_bitmap = tls_bitmap_.data();
|
||||
stream->Write(uint32_t(tls_bitmap.size()));
|
||||
for (size_t i = 0; i < tls_bitmap.size(); i++) {
|
||||
stream->Write<uint32_t>(tls_bitmap[i]);
|
||||
}
|
||||
|
||||
// We save XThreads absolutely first, as they will execute code upon save
|
||||
// (which could modify the kernel state)
|
||||
auto threads = object_table_.GetObjectsByType<XThread>();
|
||||
|
@ -698,6 +719,14 @@ bool KernelState::Restore(ByteStream* stream) {
|
|||
// Restore the object table
|
||||
object_table_.Restore(stream);
|
||||
|
||||
// Read the TLS allocation bitmap
|
||||
auto num_bitmap_entries = stream->Read<uint32_t>();
|
||||
auto& tls_bitmap = tls_bitmap_.data();
|
||||
tls_bitmap.resize(num_bitmap_entries);
|
||||
for (uint32_t i = 0; i < num_bitmap_entries; i++) {
|
||||
tls_bitmap[i] = stream->Read<uint32_t>();
|
||||
}
|
||||
|
||||
uint32_t num_threads = stream->Read<uint32_t>();
|
||||
XELOGD("Loading %d threads...", num_threads);
|
||||
for (uint32_t i = 0; i < num_threads; i++) {
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/bit_map.h"
|
||||
#include "xenia/base/mutex.h"
|
||||
#include "xenia/cpu/export_resolver.h"
|
||||
#include "xenia/kernel/util/native_list.h"
|
||||
|
@ -115,6 +116,9 @@ class KernelState {
|
|||
return process_info_block_address_;
|
||||
}
|
||||
|
||||
uint32_t AllocateTLS();
|
||||
void FreeTLS(uint32_t slot);
|
||||
|
||||
void RegisterTitleTerminateNotification(uint32_t routine, uint32_t priority);
|
||||
void RemoveTitleTerminateNotification(uint32_t routine);
|
||||
|
||||
|
@ -209,6 +213,8 @@ class KernelState {
|
|||
std::condition_variable_any dispatch_cond_;
|
||||
std::list<std::function<void()>> dispatch_queue_;
|
||||
|
||||
BitMap tls_bitmap_;
|
||||
|
||||
friend class XObject;
|
||||
};
|
||||
|
||||
|
|
|
@ -382,62 +382,50 @@ SHIM_CALL KeQuerySystemTime_shim(PPCContext* ppc_context,
|
|||
// hoping for the best.
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ms686801
|
||||
SHIM_CALL KeTlsAlloc_shim(PPCContext* ppc_context, KernelState* kernel_state) {
|
||||
XELOGD("KeTlsAlloc()");
|
||||
dword_result_t KeTlsAlloc() {
|
||||
uint32_t slot = kernel_state()->AllocateTLS();
|
||||
XThread::GetCurrentThread()->SetTLSValue(slot, 0);
|
||||
|
||||
auto tls_index = xe::threading::AllocateTlsHandle();
|
||||
if (tls_index == xe::threading::kInvalidTlsHandle) {
|
||||
tls_index = X_TLS_OUT_OF_INDEXES;
|
||||
}
|
||||
|
||||
SHIM_SET_RETURN_32(tls_index);
|
||||
return slot;
|
||||
}
|
||||
DECLARE_XBOXKRNL_EXPORT(KeTlsAlloc, ExportTag::kImplemented);
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ms686804
|
||||
SHIM_CALL KeTlsFree_shim(PPCContext* ppc_context, KernelState* kernel_state) {
|
||||
uint32_t tls_index = SHIM_GET_ARG_32(0);
|
||||
|
||||
XELOGD("KeTlsFree(%.8X)", tls_index);
|
||||
|
||||
dword_result_t KeTlsFree(dword_t tls_index) {
|
||||
if (tls_index == X_TLS_OUT_OF_INDEXES) {
|
||||
SHIM_SET_RETURN_32(0);
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t result = xe::threading::FreeTlsHandle(tls_index) ? 1 : 0;
|
||||
SHIM_SET_RETURN_32(result);
|
||||
kernel_state()->FreeTLS(tls_index);
|
||||
return 1;
|
||||
}
|
||||
DECLARE_XBOXKRNL_EXPORT(KeTlsFree, ExportTag::kImplemented);
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ms686812
|
||||
SHIM_CALL KeTlsGetValue_shim(PPCContext* ppc_context,
|
||||
KernelState* kernel_state) {
|
||||
uint32_t tls_index = SHIM_GET_ARG_32(0);
|
||||
|
||||
// Logging disabled, as some games spam this.
|
||||
// XELOGD(
|
||||
// "KeTlsGetValue(%.8X)",
|
||||
// tls_index);
|
||||
|
||||
uint32_t value = static_cast<uint32_t>(xe::threading::GetTlsValue(tls_index));
|
||||
if (!value) {
|
||||
// XELOGW("KeTlsGetValue should SetLastError if result is NULL");
|
||||
// TODO(benvanik): SetLastError? Or does user code do this?
|
||||
dword_result_t KeTlsGetValue(dword_t tls_index) {
|
||||
// xboxkrnl doesn't actually have an error branch - it always succeeds, even
|
||||
// if it overflows the TLS.
|
||||
uint32_t value = 0;
|
||||
if (XThread::GetCurrentThread()->GetTLSValue(tls_index, &value)) {
|
||||
return value;
|
||||
}
|
||||
|
||||
SHIM_SET_RETURN_32(value);
|
||||
return 0;
|
||||
}
|
||||
DECLARE_XBOXKRNL_EXPORT(KeTlsGetValue,
|
||||
ExportTag::kImplemented | ExportTag::kHighFrequency);
|
||||
|
||||
// http://msdn.microsoft.com/en-us/library/ms686818
|
||||
SHIM_CALL KeTlsSetValue_shim(PPCContext* ppc_context,
|
||||
KernelState* kernel_state) {
|
||||
uint32_t tls_index = SHIM_GET_ARG_32(0);
|
||||
uint32_t tls_value = SHIM_GET_ARG_32(1);
|
||||
dword_result_t KeTlsSetValue(dword_t tls_index, dword_t tls_value) {
|
||||
// xboxkrnl doesn't actually have an error branch - it always succeeds, even
|
||||
// if it overflows the TLS.
|
||||
if (XThread::GetCurrentThread()->SetTLSValue(tls_index, tls_value)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
XELOGD("KeTlsSetValue(%.8X, %.8X)", tls_index, tls_value);
|
||||
|
||||
uint32_t result = xe::threading::SetTlsValue(tls_index, tls_value) ? 1 : 0;
|
||||
SHIM_SET_RETURN_32(result);
|
||||
return 0;
|
||||
}
|
||||
DECLARE_XBOXKRNL_EXPORT(KeTlsSetValue, ExportTag::kImplemented);
|
||||
|
||||
void KeInitializeEvent(pointer_t<X_KEVENT> event_ptr, dword_t event_type,
|
||||
dword_t initial_state) {
|
||||
|
@ -1362,11 +1350,6 @@ void RegisterThreadingExports(xe::cpu::ExportResolver* export_resolver,
|
|||
SHIM_SET_MAPPING("xboxkrnl.exe", NtYieldExecution, state);
|
||||
SHIM_SET_MAPPING("xboxkrnl.exe", KeQuerySystemTime, state);
|
||||
|
||||
SHIM_SET_MAPPING("xboxkrnl.exe", KeTlsAlloc, state);
|
||||
SHIM_SET_MAPPING("xboxkrnl.exe", KeTlsFree, state);
|
||||
SHIM_SET_MAPPING("xboxkrnl.exe", KeTlsGetValue, state);
|
||||
SHIM_SET_MAPPING("xboxkrnl.exe", KeTlsSetValue, state);
|
||||
|
||||
SHIM_SET_MAPPING("xboxkrnl.exe", NtCreateSemaphore, state);
|
||||
SHIM_SET_MAPPING("xboxkrnl.exe", NtReleaseSemaphore, state);
|
||||
|
||||
|
|
|
@ -281,7 +281,7 @@ X_STATUS XThread::Create() {
|
|||
module->GetOptHeader(XEX_HEADER_TLS_INFO, &tls_header);
|
||||
}
|
||||
|
||||
const uint32_t kDefaultTlsSlotCount = 32;
|
||||
const uint32_t kDefaultTlsSlotCount = 1024;
|
||||
uint32_t tls_slots = kDefaultTlsSlotCount;
|
||||
uint32_t tls_extended_size = 0;
|
||||
if (tls_header && tls_header->slot_count) {
|
||||
|
@ -293,16 +293,16 @@ X_STATUS XThread::Create() {
|
|||
// HACK: we're currently not using the extra memory allocated for TLS slots
|
||||
// and instead relying on native TLS slots, so don't allocate anything for
|
||||
// the slots.
|
||||
uint32_t tls_slot_size = 0; // tls_slots * 4;
|
||||
uint32_t tls_total_size = tls_slot_size + tls_extended_size;
|
||||
tls_address_ = memory()->SystemHeapAlloc(tls_total_size);
|
||||
uint32_t tls_slot_size = tls_slots * 4;
|
||||
tls_total_size_ = tls_slot_size + tls_extended_size;
|
||||
tls_address_ = memory()->SystemHeapAlloc(tls_total_size_);
|
||||
if (!tls_address_) {
|
||||
XELOGW("Unable to allocate thread local storage block");
|
||||
return X_STATUS_NO_MEMORY;
|
||||
}
|
||||
|
||||
// Zero all of TLS.
|
||||
memory()->Fill(tls_address_, tls_total_size, 0);
|
||||
memory()->Fill(tls_address_, tls_total_size_, 0);
|
||||
if (tls_extended_size) {
|
||||
// If game has extended data, copy in the default values.
|
||||
assert_not_zero(tls_header->raw_data_address);
|
||||
|
@ -441,7 +441,6 @@ X_STATUS XThread::Exit(int exit_code) {
|
|||
|
||||
running_ = false;
|
||||
Release();
|
||||
ReleaseHandle();
|
||||
|
||||
// NOTE: this does not return!
|
||||
xe::threading::Thread::Exit(exit_code);
|
||||
|
@ -462,7 +461,6 @@ X_STATUS XThread::Terminate(int exit_code) {
|
|||
|
||||
running_ = false;
|
||||
Release();
|
||||
ReleaseHandle();
|
||||
|
||||
thread_->Terminate(exit_code);
|
||||
return X_STATUS_SUCCESS;
|
||||
|
@ -479,7 +477,7 @@ void XThread::Execute() {
|
|||
// All threads get a mandatory sleep. This is to deal with some buggy
|
||||
// games that are assuming the 360 is so slow to create threads that they
|
||||
// have time to initialize shared structures AFTER CreateThread (RR).
|
||||
xe::threading::Sleep(std::chrono::milliseconds(100));
|
||||
xe::threading::Sleep(std::chrono::milliseconds(10));
|
||||
|
||||
int exit_code = 0;
|
||||
|
||||
|
@ -708,6 +706,26 @@ void XThread::SetActiveCpu(uint32_t cpu_index) {
|
|||
xe::store_and_swap<uint8_t>(pcr + 0x10C, cpu_index);
|
||||
}
|
||||
|
||||
bool XThread::GetTLSValue(uint32_t slot, uint32_t* value_out) {
|
||||
if (slot * 4 > tls_total_size_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto mem = memory()->TranslateVirtual(tls_address_ + slot * 4);
|
||||
*value_out = xe::load_and_swap<uint32_t>(mem);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool XThread::SetTLSValue(uint32_t slot, uint32_t value) {
|
||||
if (slot * 4 >= tls_total_size_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto mem = memory()->TranslateVirtual(tls_address_ + slot * 4);
|
||||
xe::store_and_swap<uint32_t>(mem, value);
|
||||
return true;
|
||||
}
|
||||
|
||||
uint32_t XThread::suspend_count() {
|
||||
return guest_object<X_KTHREAD>()->suspend_count;
|
||||
}
|
||||
|
@ -1021,6 +1039,7 @@ struct ThreadSavedState {
|
|||
|
||||
uint32_t apc_head;
|
||||
uint32_t tls_address;
|
||||
uint32_t tls_total_size;
|
||||
uint32_t pcr_address;
|
||||
uint32_t stack_base; // High address
|
||||
uint32_t stack_limit; // Low address
|
||||
|
@ -1076,6 +1095,7 @@ bool XThread::Save(ByteStream* stream) {
|
|||
state.is_running = running_;
|
||||
state.apc_head = apc_list_.head();
|
||||
state.tls_address = tls_address_;
|
||||
state.tls_total_size = tls_total_size_;
|
||||
state.pcr_address = pcr_address_;
|
||||
state.stack_base = stack_base_;
|
||||
state.stack_limit = stack_limit_;
|
||||
|
@ -1140,6 +1160,7 @@ object_ref<XThread> XThread::Restore(KernelState* kernel_state,
|
|||
thread->main_thread_ = state.is_main_thread;
|
||||
thread->apc_list_.set_head(state.apc_head);
|
||||
thread->tls_address_ = state.tls_address;
|
||||
thread->tls_total_size_ = state.tls_total_size;
|
||||
thread->pcr_address_ = state.pcr_address;
|
||||
thread->stack_base_ = state.stack_base;
|
||||
thread->stack_limit_ = state.stack_limit;
|
||||
|
|
|
@ -175,6 +175,9 @@ class XThread : public XObject {
|
|||
uint32_t active_cpu() const;
|
||||
void SetActiveCpu(uint32_t cpu_index);
|
||||
|
||||
bool GetTLSValue(uint32_t slot, uint32_t* value_out);
|
||||
bool SetTLSValue(uint32_t slot, uint32_t value);
|
||||
|
||||
uint32_t suspend_count();
|
||||
X_STATUS Resume(uint32_t* out_suspend_count = nullptr);
|
||||
X_STATUS Suspend(uint32_t* out_suspend_count = nullptr);
|
||||
|
@ -212,6 +215,7 @@ class XThread : public XObject {
|
|||
uint32_t scratch_address_ = 0;
|
||||
uint32_t scratch_size_ = 0;
|
||||
uint32_t tls_address_ = 0;
|
||||
uint32_t tls_total_size_ = 0;
|
||||
uint32_t pcr_address_ = 0;
|
||||
uint32_t stack_alloc_base_ = 0; // Stack alloc base
|
||||
uint32_t stack_alloc_size_ = 0; // Stack alloc size
|
||||
|
|
Loading…
Reference in New Issue