Better arg passing for Execute and experimenting with APCs.

This commit is contained in:
Ben Vanik 2014-07-02 15:39:30 -07:00
parent d55a31e210
commit d5f27dbf34
8 changed files with 135 additions and 27 deletions

View File

@ -95,7 +95,8 @@ void AudioSystem::ThreadStart() {
uint32_t client_callback_arg = clients_[index].wrapped_callback_arg; uint32_t client_callback_arg = clients_[index].wrapped_callback_arg;
xe_mutex_unlock(lock_); xe_mutex_unlock(lock_);
if (client_callback) { if (client_callback) {
processor->Execute(thread_state_, client_callback, client_callback_arg, 0); uint64_t args[] = { client_callback_arg };
processor->Execute(thread_state_, client_callback, args, XECOUNT(args));
} }
pumped++; pumped++;
index++; index++;

View File

@ -167,25 +167,15 @@ int Processor::Execute(XenonThreadState* thread_state, uint64_t address) {
} }
uint64_t Processor::Execute( uint64_t Processor::Execute(
XenonThreadState* thread_state, uint64_t address, uint64_t arg0) { XenonThreadState* thread_state, uint64_t address, uint64_t args[],
size_t arg_count) {
SCOPE_profile_cpu_f("cpu"); SCOPE_profile_cpu_f("cpu");
PPCContext* context = thread_state->context(); PPCContext* context = thread_state->context();
context->r[3] = arg0; XEASSERT(arg_count <= 5);
if (Execute(thread_state, address)) { for (size_t i = 0; i < arg_count; ++i) {
return 0xDEADBABE; context->r[3 + i] = args[i];
} }
return context->r[3];
}
uint64_t Processor::Execute(
XenonThreadState* thread_state, uint64_t address, uint64_t arg0,
uint64_t arg1) {
SCOPE_profile_cpu_f("cpu");
PPCContext* context = thread_state->context();
context->r[3] = arg0;
context->r[4] = arg1;
if (Execute(thread_state, address)) { if (Execute(thread_state, address)) {
return 0xDEADBABE; return 0xDEADBABE;
} }
@ -193,7 +183,7 @@ uint64_t Processor::Execute(
} }
uint64_t Processor::ExecuteInterrupt( uint64_t Processor::ExecuteInterrupt(
uint32_t cpu, uint64_t address, uint64_t arg0, uint64_t arg1) { uint32_t cpu, uint64_t address, uint64_t args[], size_t arg_count) {
SCOPE_profile_cpu_f("cpu"); SCOPE_profile_cpu_f("cpu");
// Acquire lock on interrupt thread (we can only dispatch one at a time). // Acquire lock on interrupt thread (we can only dispatch one at a time).
@ -204,7 +194,7 @@ uint64_t Processor::ExecuteInterrupt(
XESETUINT8BE(p + interrupt_thread_block_ + 0x10C, cpu); XESETUINT8BE(p + interrupt_thread_block_ + 0x10C, cpu);
// Execute interrupt. // Execute interrupt.
uint64_t result = Execute(interrupt_thread_state_, address, arg0, arg1); uint64_t result = Execute(interrupt_thread_state_, address, args, arg_count);
xe_mutex_unlock(interrupt_thread_lock_); xe_mutex_unlock(interrupt_thread_lock_);
return result; return result;

View File

@ -42,13 +42,11 @@ public:
int Execute( int Execute(
XenonThreadState* thread_state, uint64_t address); XenonThreadState* thread_state, uint64_t address);
uint64_t Execute( uint64_t Execute(
XenonThreadState* thread_state, uint64_t address, uint64_t arg0); XenonThreadState* thread_state, uint64_t address, uint64_t args[],
uint64_t Execute( size_t arg_count);
XenonThreadState* thread_state, uint64_t address, uint64_t arg0,
uint64_t arg1);
uint64_t ExecuteInterrupt( uint64_t ExecuteInterrupt(
uint32_t cpu, uint64_t address, uint64_t arg0, uint64_t arg1); uint32_t cpu, uint64_t address, uint64_t args[], size_t arg_count);
virtual void OnDebugClientConnected(uint32_t client_id); virtual void OnDebugClientConnected(uint32_t client_id);
virtual void OnDebugClientDisconnected(uint32_t client_id); virtual void OnDebugClientDisconnected(uint32_t client_id);

View File

@ -195,6 +195,7 @@ void GraphicsSystem::DispatchInterruptCallback(
if (!interrupt_callback_) { if (!interrupt_callback_) {
return; return;
} }
uint64_t args[] = { source, interrupt_callback_data_ };
processor_->ExecuteInterrupt( processor_->ExecuteInterrupt(
cpu, interrupt_callback_, source, interrupt_callback_data_); cpu, interrupt_callback_, args, XECOUNT(args));
} }

View File

@ -66,3 +66,7 @@ uint32_t NativeList::Shift() {
Remove(ptr); Remove(ptr);
return ptr; return ptr;
} }
bool NativeList::HasPending() {
return head_ != kInvalidPointer;
}

View File

@ -38,6 +38,7 @@ public:
bool IsQueued(uint32_t list_entry_ptr); bool IsQueued(uint32_t list_entry_ptr);
void Remove(uint32_t list_entry_ptr); void Remove(uint32_t list_entry_ptr);
uint32_t Shift(); uint32_t Shift();
bool HasPending();
private: private:
const uint32_t kInvalidPointer = 0xE0FE0FFF; const uint32_t kInvalidPointer = 0xE0FE0FFF;

View File

@ -78,6 +78,9 @@ XThread::~XThread() {
if (thread_state_) { if (thread_state_) {
delete thread_state_; delete thread_state_;
} }
if (scratch_address_) {
kernel_state()->memory()->HeapFree(scratch_address_, 0);
}
if (tls_address_) { if (tls_address_) {
kernel_state()->memory()->HeapFree(tls_address_, 0); kernel_state()->memory()->HeapFree(tls_address_, 0);
} }
@ -194,6 +197,12 @@ X_STATUS XThread::Create() {
XUserModule* module = kernel_state()->GetExecutableModule(); XUserModule* module = kernel_state()->GetExecutableModule();
// Allocate thread scratch.
// This is used by interrupts/APCs/etc so we can round-trip pointers through.
scratch_size_ = 4 * 16;
scratch_address_ = (uint32_t)memory()->HeapAlloc(
0, scratch_size_, MEMORY_FLAG_ZERO);
// Allocate TLS block. // Allocate TLS block.
const xe_xex2_header_t* header = module->xex_header(); const xe_xex2_header_t* header = module->xex_header();
uint32_t tls_size = header->tls_info.slot_count * header->tls_info.data_size; uint32_t tls_size = header->tls_info.slot_count * header->tls_info.data_size;
@ -244,6 +253,7 @@ X_STATUS XThread::Exit(int exit_code) {
// TODO(benvanik); dispatch events? waiters? etc? // TODO(benvanik); dispatch events? waiters? etc?
event_->Set(0, false); event_->Set(0, false);
RundownAPCs();
// NOTE: unless PlatformExit fails, expect it to never return! // NOTE: unless PlatformExit fails, expect it to never return!
X_STATUS return_code = PlatformExit(exit_code); X_STATUS return_code = PlatformExit(exit_code);
@ -365,15 +375,21 @@ void XThread::Execute() {
// If a XapiThreadStartup value is present, we use that as a trampoline. // If a XapiThreadStartup value is present, we use that as a trampoline.
// Otherwise, we are a raw thread. // Otherwise, we are a raw thread.
if (creation_params_.xapi_thread_startup) { if (creation_params_.xapi_thread_startup) {
uint64_t args[] = {
creation_params_.start_address,
creation_params_.start_context
};
kernel_state()->processor()->Execute( kernel_state()->processor()->Execute(
thread_state_, thread_state_,
creation_params_.xapi_thread_startup, creation_params_.xapi_thread_startup, args, XECOUNT(args));
creation_params_.start_address, creation_params_.start_context);
} else { } else {
// Run user code. // Run user code.
uint64_t args[] = {
creation_params_.start_context
};
int exit_code = (int)kernel_state()->processor()->Execute( int exit_code = (int)kernel_state()->processor()->Execute(
thread_state_, thread_state_,
creation_params_.start_address, creation_params_.start_context); creation_params_.start_address, args, XECOUNT(args));
// If we got here it means the execute completed without an exit being called. // If we got here it means the execute completed without an exit being called.
// Treat the return code as an implicit exit code. // Treat the return code as an implicit exit code.
Exit(exit_code); Exit(exit_code);
@ -402,7 +418,99 @@ void XThread::LockApc() {
} }
void XThread::UnlockApc() { void XThread::UnlockApc() {
bool needs_apc = apc_list_->HasPending();
xe_mutex_unlock(apc_lock_); xe_mutex_unlock(apc_lock_);
if (needs_apc) {
QueueUserAPC(reinterpret_cast<PAPCFUNC>(DeliverAPCs),
thread_handle_,
reinterpret_cast<ULONG_PTR>(this));
}
}
void XThread::DeliverAPCs(void* data) {
// http://www.drdobbs.com/inside-nts-asynchronous-procedure-call/184416590?pgno=1
// http://www.drdobbs.com/inside-nts-asynchronous-procedure-call/184416590?pgno=7
XThread* thread = reinterpret_cast<XThread*>(data);
auto membase = thread->memory()->membase();
auto processor = thread->kernel_state()->processor();
auto apc_list = thread->apc_list();
thread->LockApc();
while (apc_list->HasPending()) {
// Get APC entry (offset for LIST_ENTRY offset) and cache what we need.
// Calling the routine may delete the memory/overwrite it.
uint32_t apc_address = apc_list->Shift() - 8;
uint8_t* apc_ptr = membase + apc_address;
uint32_t kernel_routine = XEGETUINT32BE(apc_ptr + 16);
uint32_t normal_routine = XEGETUINT32BE(apc_ptr + 24);
uint32_t normal_context = XEGETUINT32BE(apc_ptr + 28);
uint32_t system_arg1 = XEGETUINT32BE(apc_ptr + 32);
uint32_t system_arg2 = XEGETUINT32BE(apc_ptr + 36);
// Mark as uninserted so that it can be reinserted again by the routine.
uint32_t old_flags = XEGETUINT32BE(apc_ptr + 40);
XESETUINT32BE(apc_ptr + 40, old_flags & ~0xFF00);
// Call kernel routine.
// The routine can modify all of its arguments before passing it on.
// Since we need to give guest accessible pointers over, we copy things
// into and out of scratch.
uint8_t* scratch_ptr = membase + thread->scratch_address_;
XESETUINT32BE(scratch_ptr + 0, normal_routine);
XESETUINT32BE(scratch_ptr + 4, normal_context);
XESETUINT32BE(scratch_ptr + 8, system_arg1);
XESETUINT32BE(scratch_ptr + 12, system_arg2);
// kernel_routine(apc_address, &normal_routine, &normal_context, &system_arg1, &system_arg2)
uint64_t kernel_args[] = {
apc_address,
thread->scratch_address_ + 0,
thread->scratch_address_ + 4,
thread->scratch_address_ + 8,
thread->scratch_address_ + 12,
};
processor->ExecuteInterrupt(
0, kernel_routine, kernel_args, XECOUNT(kernel_args));
normal_routine = XEGETUINT32BE(scratch_ptr + 0);
normal_context = XEGETUINT32BE(scratch_ptr + 4);
system_arg1 = XEGETUINT32BE(scratch_ptr + 8);
system_arg2 = XEGETUINT32BE(scratch_ptr + 12);
// Call the normal routine. Note that it may have been killed by the kernel
// routine.
if (normal_routine) {
thread->UnlockApc();
// normal_routine(normal_context, system_arg1, system_arg2)
uint64_t normal_args[] = { normal_context, system_arg1, system_arg2 };
processor->ExecuteInterrupt(
0, normal_routine, normal_args, XECOUNT(normal_args));
thread->LockApc();
}
}
thread->UnlockApc();
}
void XThread::RundownAPCs() {
auto membase = memory()->membase();
LockApc();
while (apc_list_->HasPending()) {
// Get APC entry (offset for LIST_ENTRY offset) and cache what we need.
// Calling the routine may delete the memory/overwrite it.
uint32_t apc_address = apc_list_->Shift() - 8;
uint8_t* apc_ptr = membase + apc_address;
uint32_t rundown_routine = XEGETUINT32BE(apc_ptr + 20);
// Mark as uninserted so that it can be reinserted again by the routine.
uint32_t old_flags = XEGETUINT32BE(apc_ptr + 40);
XESETUINT32BE(apc_ptr + 40, old_flags & ~0xFF00);
// Call the rundown routine.
if (rundown_routine) {
// rundown_routine(apc)
uint64_t args[] = { apc_address };
kernel_state()->processor()->ExecuteInterrupt(
0, rundown_routine, args, XECOUNT(args));
}
}
UnlockApc();
} }
int32_t XThread::QueryPriority() { int32_t XThread::QueryPriority() {

View File

@ -73,6 +73,9 @@ private:
void PlatformDestroy(); void PlatformDestroy();
X_STATUS PlatformExit(int exit_code); X_STATUS PlatformExit(int exit_code);
static void DeliverAPCs(void* data);
void RundownAPCs();
struct { struct {
uint32_t stack_size; uint32_t stack_size;
uint32_t xapi_thread_startup; uint32_t xapi_thread_startup;
@ -83,6 +86,8 @@ private:
uint32_t thread_id_; uint32_t thread_id_;
void* thread_handle_; void* thread_handle_;
uint32_t scratch_address_;
uint32_t scratch_size_;
uint32_t tls_address_; uint32_t tls_address_;
uint32_t thread_state_address_; uint32_t thread_state_address_;
cpu::XenonThreadState* thread_state_; cpu::XenonThreadState* thread_state_;