Better arg passing for Execute and experimenting with APCs.

This commit is contained in:
Ben Vanik 2014-07-02 15:39:30 -07:00
parent d55a31e210
commit d5f27dbf34
8 changed files with 135 additions and 27 deletions

View File

@ -95,7 +95,8 @@ void AudioSystem::ThreadStart() {
uint32_t client_callback_arg = clients_[index].wrapped_callback_arg;
xe_mutex_unlock(lock_);
if (client_callback) {
processor->Execute(thread_state_, client_callback, client_callback_arg, 0);
uint64_t args[] = { client_callback_arg };
processor->Execute(thread_state_, client_callback, args, XECOUNT(args));
}
pumped++;
index++;

View File

@ -167,25 +167,15 @@ int Processor::Execute(XenonThreadState* thread_state, uint64_t address) {
}
uint64_t Processor::Execute(
XenonThreadState* thread_state, uint64_t address, uint64_t arg0) {
XenonThreadState* thread_state, uint64_t address, uint64_t args[],
size_t arg_count) {
SCOPE_profile_cpu_f("cpu");
PPCContext* context = thread_state->context();
context->r[3] = arg0;
if (Execute(thread_state, address)) {
return 0xDEADBABE;
XEASSERT(arg_count <= 5);
for (size_t i = 0; i < arg_count; ++i) {
context->r[3 + i] = args[i];
}
return context->r[3];
}
uint64_t Processor::Execute(
XenonThreadState* thread_state, uint64_t address, uint64_t arg0,
uint64_t arg1) {
SCOPE_profile_cpu_f("cpu");
PPCContext* context = thread_state->context();
context->r[3] = arg0;
context->r[4] = arg1;
if (Execute(thread_state, address)) {
return 0xDEADBABE;
}
@ -193,7 +183,7 @@ uint64_t Processor::Execute(
}
uint64_t Processor::ExecuteInterrupt(
uint32_t cpu, uint64_t address, uint64_t arg0, uint64_t arg1) {
uint32_t cpu, uint64_t address, uint64_t args[], size_t arg_count) {
SCOPE_profile_cpu_f("cpu");
// Acquire lock on interrupt thread (we can only dispatch one at a time).
@ -204,7 +194,7 @@ uint64_t Processor::ExecuteInterrupt(
XESETUINT8BE(p + interrupt_thread_block_ + 0x10C, cpu);
// Execute interrupt.
uint64_t result = Execute(interrupt_thread_state_, address, arg0, arg1);
uint64_t result = Execute(interrupt_thread_state_, address, args, arg_count);
xe_mutex_unlock(interrupt_thread_lock_);
return result;

View File

@ -42,13 +42,11 @@ public:
int Execute(
XenonThreadState* thread_state, uint64_t address);
uint64_t Execute(
XenonThreadState* thread_state, uint64_t address, uint64_t arg0);
uint64_t Execute(
XenonThreadState* thread_state, uint64_t address, uint64_t arg0,
uint64_t arg1);
XenonThreadState* thread_state, uint64_t address, uint64_t args[],
size_t arg_count);
uint64_t ExecuteInterrupt(
uint32_t cpu, uint64_t address, uint64_t arg0, uint64_t arg1);
uint32_t cpu, uint64_t address, uint64_t args[], size_t arg_count);
virtual void OnDebugClientConnected(uint32_t client_id);
virtual void OnDebugClientDisconnected(uint32_t client_id);

View File

@ -195,6 +195,7 @@ void GraphicsSystem::DispatchInterruptCallback(
if (!interrupt_callback_) {
return;
}
uint64_t args[] = { source, interrupt_callback_data_ };
processor_->ExecuteInterrupt(
cpu, interrupt_callback_, source, interrupt_callback_data_);
cpu, interrupt_callback_, args, XECOUNT(args));
}

View File

@ -66,3 +66,7 @@ uint32_t NativeList::Shift() {
Remove(ptr);
return ptr;
}
bool NativeList::HasPending() {
return head_ != kInvalidPointer;
}

View File

@ -38,6 +38,7 @@ public:
bool IsQueued(uint32_t list_entry_ptr);
void Remove(uint32_t list_entry_ptr);
uint32_t Shift();
bool HasPending();
private:
const uint32_t kInvalidPointer = 0xE0FE0FFF;

View File

@ -78,6 +78,9 @@ XThread::~XThread() {
if (thread_state_) {
delete thread_state_;
}
if (scratch_address_) {
kernel_state()->memory()->HeapFree(scratch_address_, 0);
}
if (tls_address_) {
kernel_state()->memory()->HeapFree(tls_address_, 0);
}
@ -194,6 +197,12 @@ X_STATUS XThread::Create() {
XUserModule* module = kernel_state()->GetExecutableModule();
// Allocate thread scratch.
// This is used by interrupts/APCs/etc so we can round-trip pointers through.
scratch_size_ = 4 * 16;
scratch_address_ = (uint32_t)memory()->HeapAlloc(
0, scratch_size_, MEMORY_FLAG_ZERO);
// Allocate TLS block.
const xe_xex2_header_t* header = module->xex_header();
uint32_t tls_size = header->tls_info.slot_count * header->tls_info.data_size;
@ -244,6 +253,7 @@ X_STATUS XThread::Exit(int exit_code) {
// TODO(benvanik); dispatch events? waiters? etc?
event_->Set(0, false);
RundownAPCs();
// NOTE: unless PlatformExit fails, expect it to never return!
X_STATUS return_code = PlatformExit(exit_code);
@ -365,15 +375,21 @@ void XThread::Execute() {
// If a XapiThreadStartup value is present, we use that as a trampoline.
// Otherwise, we are a raw thread.
if (creation_params_.xapi_thread_startup) {
uint64_t args[] = {
creation_params_.start_address,
creation_params_.start_context
};
kernel_state()->processor()->Execute(
thread_state_,
creation_params_.xapi_thread_startup,
creation_params_.start_address, creation_params_.start_context);
creation_params_.xapi_thread_startup, args, XECOUNT(args));
} else {
// Run user code.
uint64_t args[] = {
creation_params_.start_context
};
int exit_code = (int)kernel_state()->processor()->Execute(
thread_state_,
creation_params_.start_address, creation_params_.start_context);
creation_params_.start_address, args, XECOUNT(args));
// If we got here it means the execute completed without an exit being called.
// Treat the return code as an implicit exit code.
Exit(exit_code);
@ -402,7 +418,99 @@ void XThread::LockApc() {
}
void XThread::UnlockApc() {
bool needs_apc = apc_list_->HasPending();
xe_mutex_unlock(apc_lock_);
if (needs_apc) {
QueueUserAPC(reinterpret_cast<PAPCFUNC>(DeliverAPCs),
thread_handle_,
reinterpret_cast<ULONG_PTR>(this));
}
}
void XThread::DeliverAPCs(void* data) {
// http://www.drdobbs.com/inside-nts-asynchronous-procedure-call/184416590?pgno=1
// http://www.drdobbs.com/inside-nts-asynchronous-procedure-call/184416590?pgno=7
XThread* thread = reinterpret_cast<XThread*>(data);
auto membase = thread->memory()->membase();
auto processor = thread->kernel_state()->processor();
auto apc_list = thread->apc_list();
thread->LockApc();
while (apc_list->HasPending()) {
// Get APC entry (offset for LIST_ENTRY offset) and cache what we need.
// Calling the routine may delete the memory/overwrite it.
uint32_t apc_address = apc_list->Shift() - 8;
uint8_t* apc_ptr = membase + apc_address;
uint32_t kernel_routine = XEGETUINT32BE(apc_ptr + 16);
uint32_t normal_routine = XEGETUINT32BE(apc_ptr + 24);
uint32_t normal_context = XEGETUINT32BE(apc_ptr + 28);
uint32_t system_arg1 = XEGETUINT32BE(apc_ptr + 32);
uint32_t system_arg2 = XEGETUINT32BE(apc_ptr + 36);
// Mark as uninserted so that it can be reinserted again by the routine.
uint32_t old_flags = XEGETUINT32BE(apc_ptr + 40);
XESETUINT32BE(apc_ptr + 40, old_flags & ~0xFF00);
// Call kernel routine.
// The routine can modify all of its arguments before passing it on.
// Since we need to give guest accessible pointers over, we copy things
// into and out of scratch.
uint8_t* scratch_ptr = membase + thread->scratch_address_;
XESETUINT32BE(scratch_ptr + 0, normal_routine);
XESETUINT32BE(scratch_ptr + 4, normal_context);
XESETUINT32BE(scratch_ptr + 8, system_arg1);
XESETUINT32BE(scratch_ptr + 12, system_arg2);
// kernel_routine(apc_address, &normal_routine, &normal_context, &system_arg1, &system_arg2)
uint64_t kernel_args[] = {
apc_address,
thread->scratch_address_ + 0,
thread->scratch_address_ + 4,
thread->scratch_address_ + 8,
thread->scratch_address_ + 12,
};
processor->ExecuteInterrupt(
0, kernel_routine, kernel_args, XECOUNT(kernel_args));
normal_routine = XEGETUINT32BE(scratch_ptr + 0);
normal_context = XEGETUINT32BE(scratch_ptr + 4);
system_arg1 = XEGETUINT32BE(scratch_ptr + 8);
system_arg2 = XEGETUINT32BE(scratch_ptr + 12);
// Call the normal routine. Note that it may have been killed by the kernel
// routine.
if (normal_routine) {
thread->UnlockApc();
// normal_routine(normal_context, system_arg1, system_arg2)
uint64_t normal_args[] = { normal_context, system_arg1, system_arg2 };
processor->ExecuteInterrupt(
0, normal_routine, normal_args, XECOUNT(normal_args));
thread->LockApc();
}
}
thread->UnlockApc();
}
void XThread::RundownAPCs() {
auto membase = memory()->membase();
LockApc();
while (apc_list_->HasPending()) {
// Get APC entry (offset for LIST_ENTRY offset) and cache what we need.
// Calling the routine may delete the memory/overwrite it.
uint32_t apc_address = apc_list_->Shift() - 8;
uint8_t* apc_ptr = membase + apc_address;
uint32_t rundown_routine = XEGETUINT32BE(apc_ptr + 20);
// Mark as uninserted so that it can be reinserted again by the routine.
uint32_t old_flags = XEGETUINT32BE(apc_ptr + 40);
XESETUINT32BE(apc_ptr + 40, old_flags & ~0xFF00);
// Call the rundown routine.
if (rundown_routine) {
// rundown_routine(apc)
uint64_t args[] = { apc_address };
kernel_state()->processor()->ExecuteInterrupt(
0, rundown_routine, args, XECOUNT(args));
}
}
UnlockApc();
}
int32_t XThread::QueryPriority() {

View File

@ -73,6 +73,9 @@ private:
void PlatformDestroy();
X_STATUS PlatformExit(int exit_code);
static void DeliverAPCs(void* data);
void RundownAPCs();
struct {
uint32_t stack_size;
uint32_t xapi_thread_startup;
@ -83,6 +86,8 @@ private:
uint32_t thread_id_;
void* thread_handle_;
uint32_t scratch_address_;
uint32_t scratch_size_;
uint32_t tls_address_;
uint32_t thread_state_address_;
cpu::XenonThreadState* thread_state_;