From a63f424c0a3ec518b08215353883953ce9a644f4 Mon Sep 17 00:00:00 2001 From: "chss95cs@gmail.com" Date: Sun, 4 Dec 2022 12:38:19 -0800 Subject: [PATCH] Directly check PEB for IsDebuggerAttached Add constexpr getters to magicdiv class so it can be used from jitted x64/dxbc Track the guest return address as well for guest/host sync, if multiple entries have the same guest stack find the first one with a matching guest retaddr. this fixes epic mickey 2 (which the previous guest-stack change had allowed to go ingame for a bit) and potentially also a crash in fable3. Break if under debugger when stackpoints are overflowed Add much more useful output for host exceptions, print out xenia_canary.exe relative offsets if exception is in module, formatmessage for ntstatus/win32err, strerror Minor d3d12 microoptimization, instead of doing SetEventOnCompletion + WaitForSingleObject do SetEventOnCompletion w/ nullptr so that the wait happens in kernel mode, avoiding two extra context switches add unimplemented kernel functions: ExAllocatePoolWithTag ObReferenceObject ObDereferenceObject has no return value. Log a message when ObDereferenceObject/Reference receive unregistered guest kernel objects gave ObLookupThreadByThreadId its correct error status hoist object_types initialization out of ObReferenceObjectByHandle Fix out parameter values on error for a few kernel funcs add note about msr to KeSetCurrentStackPointers add X_STATUS_OBJECT_TYPE_MISMATCH check for xeNtSetEvent add msr_mask field to X_KPCR --- src/xenia/base/debugging.h | 3 + src/xenia/base/debugging_win.cc | 5 +- src/xenia/base/math.h | 16 ++ src/xenia/cpu/backend/x64/x64_backend.cc | 72 ++++--- src/xenia/cpu/backend/x64/x64_backend.h | 2 +- src/xenia/cpu/backend/x64/x64_emitter.cc | 62 +++--- src/xenia/cpu/ppc/ppc_translator.cc | 43 +++- src/xenia/cpu/ppc/ppc_translator.h | 2 + .../gpu/d3d12/d3d12_command_processor.cc | 10 +- .../gpu/d3d12/d3d12_render_target_cache.cc | 17 +- src/xenia/gpu/dxbc.h | 1 + src/xenia/kernel/xam/xam_task.cc | 18 +- src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc | 4 + src/xenia/kernel/xboxkrnl/xboxkrnl_ob.cc | 52 +++-- .../kernel/xboxkrnl/xboxkrnl_threading.cc | 6 +- src/xenia/kernel/xthread.h | 3 +- .../ui/d3d12/d3d12_submission_tracker.cc | 7 +- src/xenia/ui/windowed_app_main_win.cc | 194 ++++++++++++++++-- 18 files changed, 412 insertions(+), 105 deletions(-) diff --git a/src/xenia/base/debugging.h b/src/xenia/base/debugging.h index b0a99913c..1933bdd58 100644 --- a/src/xenia/base/debugging.h +++ b/src/xenia/base/debugging.h @@ -21,6 +21,9 @@ namespace debugging { // The state may change at any time (attach after launch, etc), so do not // cache this value. Determining if the debugger is attached is expensive, // though, so avoid calling it frequently. +// chrispy: no longer expensive to check on windows, we're directly using the +// teb/peb now to find this + bool IsDebuggerAttached(); // Breaks into the debugger if it is attached. diff --git a/src/xenia/base/debugging_win.cc b/src/xenia/base/debugging_win.cc index fbe269fb2..9571b2600 100644 --- a/src/xenia/base/debugging_win.cc +++ b/src/xenia/base/debugging_win.cc @@ -15,7 +15,10 @@ namespace xe { namespace debugging { -bool IsDebuggerAttached() { return IsDebuggerPresent() ? true : false; } +bool IsDebuggerAttached() { + return reinterpret_cast( + __readgsqword(0x60))[2]; // get BeingDebugged field of PEB +} void Break() { __debugbreak(); } diff --git a/src/xenia/base/math.h b/src/xenia/base/math.h index b1ab4d82b..4956f4415 100644 --- a/src/xenia/base/math.h +++ b/src/xenia/base/math.h @@ -664,6 +664,22 @@ struct MagicDiv { multiplier_ = PregenerateUint32Div(original, extradata_); } + constexpr uint32_t GetRightShift() const { + IDivExtraInfo extra{}; + + extra.value_ = extradata_; + return extra.info.shift_; + } + + constexpr bool AddFlag() const { + IDivExtraInfo extra{}; + + extra.value_ = extradata_; + return extra.info.shift_; + } + + constexpr uint32_t GetMultiplier() const { return multiplier_; + } constexpr uint32_t Apply(uint32_t numerator) const { return ApplyUint32Div(numerator, multiplier_, extradata_); } diff --git a/src/xenia/cpu/backend/x64/x64_backend.cc b/src/xenia/cpu/backend/x64/x64_backend.cc index 99576ea85..b1c1ff40e 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.cc +++ b/src/xenia/cpu/backend/x64/x64_backend.cc @@ -725,10 +725,11 @@ ResolveFunctionThunk X64HelperEmitter::EmitResolveFunctionThunk() { return (ResolveFunctionThunk)fn; } // r11 = size of callers stack, r8 = return address w/ adjustment -//i'm not proud of this code, but it shouldn't be executed frequently at all +// i'm not proud of this code, but it shouldn't be executed frequently at all void* X64HelperEmitter::EmitGuestAndHostSynchronizeStackHelper() { _code_offsets code_offsets = {}; code_offsets.prolog = getSize(); + push(rbx); mov(rbx, GetBackendCtxPtr(offsetof(X64BackendContext, stackpoints))); mov(eax, GetBackendCtxPtr(offsetof(X64BackendContext, current_stackpoint_depth))); @@ -741,8 +742,9 @@ void* X64HelperEmitter::EmitGuestAndHostSynchronizeStackHelper() { Xbyak::Label signed_underflow{}; xor_(r12d, r12d); - //todo: should use Loop instruction here if hasFastLoop, - //currently xbyak does not support it but its super easy to modify xbyak to have it + // todo: should use Loop instruction here if hasFastLoop, + // currently xbyak does not support it but its super easy to modify xbyak to + // have it L(looper); imul(edx, ecx, sizeof(X64BackendStackpoint)); mov(r10d, ptr[rbx + rdx + offsetof(X64BackendStackpoint, guest_stack_)]); @@ -760,12 +762,47 @@ void* X64HelperEmitter::EmitGuestAndHostSynchronizeStackHelper() { } js(signed_underflow, T_NEAR); // should be impossible!! - jmp(looper, T_NEAR); L(loopout); Xbyak::Label skip_adjust{}; - cmp(r12d, 1);//should never happen? + cmp(r12d, 1); // should never happen? jle(skip_adjust, T_NEAR); + Xbyak::Label we_good{}; + + // now we need to make sure that the return address matches + + // mov(r9d, ptr[GetContextReg() + offsetof(ppc::PPCContext, lr)]); + pop(r9); // guest retaddr + // r10d = the guest_stack + // while guest_stack is equal and return address is not equal, decrement + + Xbyak::Label search_for_retaddr{}; + Xbyak::Label we_good_but_increment{}; + L(search_for_retaddr); + + imul(edx, ecx, sizeof(X64BackendStackpoint)); + + cmp(r10d, ptr[rbx + rdx + offsetof(X64BackendStackpoint, guest_stack_)]); + + jnz(we_good_but_increment, T_NEAR); + + cmp(r9d, + ptr[rbx + rdx + offsetof(X64BackendStackpoint, guest_return_address_)]); + jz(we_good, T_NEAR); // stack is equal, return address is equal, we've got + // our destination stack + dec(ecx); + jmp(search_for_retaddr, T_NEAR); + Xbyak::Label checkbp{}; + + L(we_good_but_increment); + add(edx, sizeof(X64BackendStackpoint)); + inc(ecx); + jmp(checkbp, T_NEAR); + L(we_good); + //we're popping this return address, so go down by one + sub(edx, sizeof(X64BackendStackpoint)); + dec(ecx); + L(checkbp); mov(rsp, ptr[rbx + rdx + offsetof(X64BackendStackpoint, host_stack_)]); if (IsFeatureEnabled(kX64FlagsIndependentVars)) { inc(ecx); @@ -773,13 +810,13 @@ void* X64HelperEmitter::EmitGuestAndHostSynchronizeStackHelper() { add(ecx, 1); } - // this->DebugBreak(); sub(rsp, r11); // adjust stack mov(GetBackendCtxPtr(offsetof(X64BackendContext, current_stackpoint_depth)), ecx); // set next stackpoint index to be after the one we restored to + jmp(r8); L(skip_adjust); - + pop(rbx); jmp(r8); // return to caller code_offsets.prolog_stack_alloc = getSize(); code_offsets.body = getSize(); @@ -787,24 +824,11 @@ void* X64HelperEmitter::EmitGuestAndHostSynchronizeStackHelper() { code_offsets.tail = getSize(); L(signed_underflow); - //find a good, compact way to signal error here - // maybe an invalid opcode that we execute, then detect in an exception handler? - + // find a good, compact way to signal error here + // maybe an invalid opcode that we execute, then detect in an exception + // handler? + this->DebugBreak(); - // stack unwinding, take first entry - //actually, no reason to have this - - /*mov(rsp, ptr[rbx + offsetof(X64BackendStackpoint, host_stack_)]); - mov(ptr[rbx + offsetof(X64BackendStackpoint, guest_stack_)], r9d); - sub(rsp, r11); - xor_(eax, eax); - inc(eax); - mov(GetBackendCtxPtr(offsetof(X64BackendContext, current_stackpoint_depth)), - eax); - - jmp(r8);*/ - // this->DebugBreak(); // err, add an xe::FatalError to call for this - return EmitCurrentForOffsets(code_offsets); } diff --git a/src/xenia/cpu/backend/x64/x64_backend.h b/src/xenia/cpu/backend/x64/x64_backend.h index 92ee0f7a4..79f635722 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.h +++ b/src/xenia/cpu/backend/x64/x64_backend.h @@ -48,7 +48,7 @@ struct X64BackendStackpoint { // pad to 16 bytes so we never end up having a 64 bit load/store for // host_stack_ straddling two lines. Consider this field reserved for future // use - unsigned unused_; + unsigned guest_return_address_; }; // located prior to the ctx register // some things it would be nice to have be per-emulator instance instead of per diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index 463b245d0..da9816a35 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -486,6 +486,7 @@ uint64_t ResolveFunction(void* raw_context, uint64_t target_address) { if (cvars::enable_host_guest_stack_synchronization) { auto processor = thread_state->processor(); + auto module_for_address = processor->LookupModule(static_cast(target_address)); @@ -498,6 +499,7 @@ uint64_t ResolveFunction(void* raw_context, uint64_t target_address) { if (flags->is_return_site) { auto ones_with_address = processor->FindFunctionsWithAddress( static_cast(target_address)); + if (ones_with_address.size() != 0) { // this loop to find a host address for the guest address is // necessary because FindFunctionsWithAddress works via a range @@ -618,21 +620,42 @@ uint64_t ResolveFunction(void* raw_context, uint64_t target_address) { and 5 bytes for the jmp with no cycles taken for the jump which will be predicted not taken. - Our handling for the check is implemented in X64HelperEmitter::EmitGuestAndHostSynchronizeStackHelper. we don't call it directly though, - instead we go through backend()->synchronize_guest_and_host_stack_helper_for_size(num_bytes_needed_to_represent_stack_size). we place the stack size after the - call instruction so we can load it in the helper and readjust the return address to point after the literal value. + Our handling for the check is implemented in + X64HelperEmitter::EmitGuestAndHostSynchronizeStackHelper. we + don't call it directly though, instead we go through + backend()->synchronize_guest_and_host_stack_helper_for_size(num_bytes_needed_to_represent_stack_size). + we place the stack size after the call instruction so we can + load it in the helper and readjust the return address to point + after the literal value. - The helper is going to search the array of stackpoints to find the first one that is greater than or equal to the current stack pointer, when it finds - the entry it will set the currently host rsp to the host stack pointer value in the entry, and then subtract the stack size of the caller from that. - the current stackpoint index is adjusted to point to the one after the stackpoint we restored to. + The helper is going to search the array of + stackpoints to find the first one that is greater than or + equal to the current stack pointer, when it finds the entry it + will set the currently host rsp to the host stack pointer + value in the entry, and then subtract the stack size of the + caller from that. the current stackpoint index is adjusted to + point to the one after the stackpoint we restored to. - The helper then jumps back to the function that was longjmp'ed to, with the host stack in its proper state. it just works! + The helper then jumps back to the function + that was longjmp'ed to, with the host stack in its proper + state. it just works! */ if (num_frames_bigger > 1) { + /* + * can't do anything about this right now :( + * epic mickey is quite slow due to having to call resolve on + * every longjmp, and it longjmps a lot but if we add an + * indirection we lose our stack misalignment check + */ + /* reinterpret_cast(backend->code_cache()) + ->AddIndirection(static_cast(target_address), + static_cast(host_address)); + */ + return host_address; } } @@ -1649,6 +1672,9 @@ Xbyak::Address X64Emitter::GetBackendFlagsPtr() const { } void X64Emitter::HandleStackpointOverflowError(ppc::PPCContext* context) { + if (debugging::IsDebuggerAttached()) { + debugging::Break(); + } // context->lr // todo: show lr in message? xe::FatalError( @@ -1674,6 +1700,9 @@ void X64Emitter::PushStackpoint() { mov(qword[rbx + offsetof(X64BackendStackpoint, host_stack_)], rsp); mov(dword[rbx + offsetof(X64BackendStackpoint, guest_stack_)], r8d); + mov(r8d, qword[GetContextReg() + offsetof(ppc::PPCContext, lr)]); + mov(dword[rbx + offsetof(X64BackendStackpoint, guest_return_address_)], r8d); + if (IsFeatureEnabled(kX64FlagsIndependentVars)) { inc(eax); } else { @@ -1716,24 +1745,6 @@ void X64Emitter::EnsureSynchronizedGuestAndHostStack() { // need to be made // that result in the stack not being 8 byte misaligned on context reentry -#if 0 - Xbyak::Label skip{}; - mov(r8, qword[GetContextReg() + offsetof(ppc::PPCContext, r[1])]); - mov(rbx, GetBackendCtxPtr(offsetof(X64BackendContext, stackpoints))); - imul(eax, - GetBackendCtxPtr(offsetof(X64BackendContext, current_stackpoint_depth)), - sizeof(X64BackendStackpoint)); - sub(eax, sizeof(X64BackendStackpoint)); - add(rbx, rax); - - cmp(r8d, dword[rbx + offsetof(X64BackendStackpoint, guest_stack_)]); - jle(skip, T_NEAR); - Xbyak::Label skip{}; - mov(r11d, stack_size()); - call(backend_->synchronize_guest_and_host_stack_helper()); - L(skip); -#endif - Xbyak::Label& return_from_sync = this->NewCachedLabel(); // if we got here somehow from setjmp or the like we ought to have a @@ -1747,7 +1758,6 @@ void X64Emitter::EnsureSynchronizedGuestAndHostStack() { uint32_t stack32 = static_cast(e.stack_size()); auto backend = e.backend(); - if (stack32 < 256) { e.call(backend->synchronize_guest_and_host_stack_helper_for_size(1)); e.db(stack32); diff --git a/src/xenia/cpu/ppc/ppc_translator.cc b/src/xenia/cpu/ppc/ppc_translator.cc index af8159d97..69b0da4e3 100644 --- a/src/xenia/cpu/ppc/ppc_translator.cc +++ b/src/xenia/cpu/ppc/ppc_translator.cc @@ -11,6 +11,7 @@ #include "xenia/base/assert.h" #include "xenia/base/byte_order.h" +#include "xenia/base/cvar.h" #include "xenia/base/memory.h" #include "xenia/base/profiling.h" #include "xenia/base/reset_scope.h" @@ -22,6 +23,10 @@ #include "xenia/cpu/ppc/ppc_opcode_info.h" #include "xenia/cpu/ppc/ppc_scanner.h" #include "xenia/cpu/processor.h" +#include "xenia/cpu/xex_module.h" + +DEFINE_bool(dump_translated_hir_functions, false, "dumps translated hir", + "CPU"); namespace xe { namespace cpu { @@ -107,10 +112,44 @@ class HirBuilderScope { ~HirBuilderScope() { if (builder_) { builder_->RemoveCurrent(); - } + } } }; +void PPCTranslator::DumpHIR(GuestFunction* function, PPCHIRBuilder* builder) { + if (cvars::dump_translated_hir_functions) { + StringBuffer buffer{}; + builder_->Dump(&buffer); + XexModule* mod = dynamic_cast(function->module()); + + std::wstring folder_name = L"hirdump"; + + if (mod) { + xex2_opt_execution_info* opt_exec_info = nullptr; + if (mod->GetOptHeader(XEX_HEADER_EXECUTION_INFO, &opt_exec_info)) { + folder_name = + L"hirdump_title_" + std::to_wstring(opt_exec_info->title_id); + } + } + std::filesystem::path folder_path{folder_name}; + + if (!std::filesystem::exists(folder_path)) { + std::filesystem::create_directory(folder_path); + } + + { + wchar_t tmpbuf[64]; + _snwprintf(tmpbuf, 64, L"%X", function->address()); + folder_path.append(&tmpbuf[0]); + } + + FILE* f = fopen(folder_path.generic_u8string().c_str(), "w"); + if (f) { + fputs(buffer.buffer(), f); + fclose(f); + } + } +} bool PPCTranslator::Translate(GuestFunction* function, uint32_t debug_info_flags) { SCOPE_profile_cpu_f("cpu"); @@ -203,6 +242,8 @@ bool PPCTranslator::Translate(GuestFunction* function, string_buffer_.Reset(); } + DumpHIR(function, builder_.get()); + // Assemble to backend machine code. if (!assembler_->Assemble(function, builder_.get(), debug_info_flags, std::move(debug_info))) { diff --git a/src/xenia/cpu/ppc/ppc_translator.h b/src/xenia/cpu/ppc/ppc_translator.h index 483be9c95..240204883 100644 --- a/src/xenia/cpu/ppc/ppc_translator.h +++ b/src/xenia/cpu/ppc/ppc_translator.h @@ -31,7 +31,9 @@ class PPCTranslator { ~PPCTranslator(); bool Translate(GuestFunction* function, uint32_t debug_info_flags); + void DumpHIR(GuestFunction* function, PPCHIRBuilder* builder); void Reset(); + private: void DumpSource(GuestFunction* function, StringBuffer* string_buffer); diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index c99afd595..fc236aa6a 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -3124,7 +3124,6 @@ bool D3D12CommandProcessor::IssueCopy_ReadbackResolvePath() { auto physaddr = memory_->TranslatePhysical(written_address); memory::vastcpy(physaddr, (uint8_t*)readback_mapping, written_length); - // XEDmaCpy(physaddr, readback_mapping, written_length); D3D12_RANGE readback_write_range = {}; readback_buffer->Unmap(0, &readback_write_range); } @@ -3152,9 +3151,7 @@ void D3D12CommandProcessor::CheckSubmissionFence(uint64_t await_submission) { direct_queue->Signal(queue_operations_since_submission_fence_, fence_value) && SUCCEEDED(queue_operations_since_submission_fence_ - ->SetEventOnCompletion(fence_value, - fence_completion_event_)))) { - WaitForSingleObject(fence_completion_event_, INFINITE); + ->SetEventOnCompletion(fence_value, nullptr)))) { queue_operations_done_since_submission_signal_ = false; } else { XELOGE( @@ -3170,9 +3167,8 @@ void D3D12CommandProcessor::CheckSubmissionFence(uint64_t await_submission) { uint64_t submission_completed_before = submission_completed_; submission_completed_ = submission_fence_->GetCompletedValue(); if (submission_completed_ < await_submission) { - if (SUCCEEDED(submission_fence_->SetEventOnCompletion( - await_submission, fence_completion_event_))) { - WaitForSingleObject(fence_completion_event_, INFINITE); + if (SUCCEEDED(submission_fence_->SetEventOnCompletion(await_submission, + nullptr))) { submission_completed_ = submission_fence_->GetCompletedValue(); } } diff --git a/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc b/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc index 238bbea05..9dadff46e 100644 --- a/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc +++ b/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc @@ -43,6 +43,7 @@ DEFINE_bool( "Allow stencil reference output usage on Direct3D 12 on Intel GPUs - not " "working on UHD Graphics 630 as of March 2021 (driver 27.20.0100.8336).", "GPU"); +DEFINE_bool(no_discard_stencil_in_transfer_pipelines, false, "bleh", "GPU"); // TODO(Triang3l): Make ROV the default when it's optimized better (for // instance, using static shader modifications to pass render target // parameters). @@ -2940,7 +2941,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { // r0.xy = destination pixel XY index within the 32bpp tile // r0.zw = 32bpp tile XY index a.OpUDiv(dxbc::Dest::R(0, 0b1100), dxbc::Dest::R(0, 0b0011), - dxbc::Src::R(0, 0b01000100), + dxbc::Src::R(0, dxbc::Src::kXYXY), dxbc::Src::LU(dest_tile_width_pixels, dest_tile_height_pixels, dest_tile_width_pixels, dest_tile_height_pixels)); @@ -4189,12 +4190,14 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { break; case TransferOutput::kStencilBit: // Discard the sample if the needed stencil bit is not set. - assert_true(cbuffer_index_stencil_mask != UINT32_MAX); - a.OpAnd(dxbc::Dest::R(0, 0b0001), dxbc::Src::R(1, dxbc::Src::kXXXX), - dxbc::Src::CB(cbuffer_index_stencil_mask, - kTransferCBVRegisterStencilMask, 0, - dxbc::Src::kXXXX)); - a.OpDiscard(false, dxbc::Src::R(0, dxbc::Src::kXXXX)); + if (!cvars::no_discard_stencil_in_transfer_pipelines) { + assert_true(cbuffer_index_stencil_mask != UINT32_MAX); + a.OpAnd(dxbc::Dest::R(0, 0b0001), dxbc::Src::R(1, dxbc::Src::kXXXX), + dxbc::Src::CB(cbuffer_index_stencil_mask, + kTransferCBVRegisterStencilMask, 0, + dxbc::Src::kXXXX)); + a.OpDiscard(false, dxbc::Src::R(0, dxbc::Src::kXXXX)); + } break; } } diff --git a/src/xenia/gpu/dxbc.h b/src/xenia/gpu/dxbc.h index 2c9f5eeab..a39989645 100644 --- a/src/xenia/gpu/dxbc.h +++ b/src/xenia/gpu/dxbc.h @@ -1046,6 +1046,7 @@ struct Src : OperandAddress { kYYYY = 0b01010101, kZZZZ = 0b10101010, kWWWW = 0b11111111, + kXYXY = 0b01000100 }; // Ignored for 0-component and 1-component operand types. diff --git a/src/xenia/kernel/xam/xam_task.cc b/src/xenia/kernel/xam/xam_task.cc index 7fe6eb4d3..23358dae5 100644 --- a/src/xenia/kernel/xam/xam_task.cc +++ b/src/xenia/kernel/xam/xam_task.cc @@ -37,14 +37,30 @@ struct XTASK_MESSAGE { be unknown_14; be task_handle; }; + +struct XAM_TASK_ARGS { + be value1; + be value2; + // i think there might be another value here, it might be padding +}; static_assert_size(XTASK_MESSAGE, 0x1C); dword_result_t XamTaskSchedule_entry(lpvoid_t callback, pointer_t message, - lpdword_t unknown, lpdword_t handle_ptr) { + dword_t optional_ptr, lpdword_t handle_ptr, + const ppc_context_t& ctx) { // TODO(gibbed): figure out what this is for *handle_ptr = 12345; + if (optional_ptr) { + auto option = ctx->TranslateVirtual(optional_ptr); + + auto v1 = option->value1; + auto v2 = option->value2; //typically 0? + + XELOGI("Got xam task args: v1 = {:08X}, v2 = {:08X}", v1, v2); + } + uint32_t stack_size = kernel_state()->GetExecutableModule()->stack_size(); // Stack must be aligned to 16kb pages diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc index 664149765..b976f8400 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc @@ -609,6 +609,10 @@ dword_result_t ExAllocatePoolTypeWithTag_entry(dword_t size, dword_t tag, return addr; } DECLARE_XBOXKRNL_EXPORT1(ExAllocatePoolTypeWithTag, kMemory, kImplemented); +dword_result_t ExAllocatePoolWithTag_entry(dword_t numbytes, dword_t tag) { + return ExAllocatePoolTypeWithTag_entry(numbytes, tag, 0); +} +DECLARE_XBOXKRNL_EXPORT1(ExAllocatePoolWithTag, kMemory, kImplemented); dword_result_t ExAllocatePool_entry(dword_t size) { const uint32_t none = 0x656E6F4E; // 'None' diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_ob.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_ob.cc index e09096eb0..f7f0308e8 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_ob.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_ob.cc @@ -52,9 +52,11 @@ dword_result_t ObOpenObjectByName_entry(lpunknown_t obj_attributes_ptr, return result; } DECLARE_XBOXKRNL_EXPORT1(ObOpenObjectByName, kNone, kImplemented); - +// chrispy: investigate this, pretty certain it does not properly emulate the +// original dword_result_t ObOpenObjectByPointer_entry(lpvoid_t object_ptr, lpdword_t out_handle_ptr) { + *out_handle_ptr = 0; auto object = XObject::GetNativeObject(kernel_state(), object_ptr); if (!object) { return X_STATUS_UNSUCCESSFUL; @@ -71,7 +73,8 @@ dword_result_t ObLookupThreadByThreadId_entry(dword_t thread_id, lpdword_t out_object_ptr) { auto thread = kernel_state()->GetThreadByID(thread_id); if (!thread) { - return X_STATUS_NOT_FOUND; + *out_object_ptr = 0; + return X_STATUS_INVALID_PARAMETER; } // Retain the object. Will be released in ObDereferenceObject. @@ -80,16 +83,18 @@ dword_result_t ObLookupThreadByThreadId_entry(dword_t thread_id, return X_STATUS_SUCCESS; } DECLARE_XBOXKRNL_EXPORT1(ObLookupThreadByThreadId, kNone, kImplemented); - +// These values come from how Xenia handles uninitialized kernel data exports. +// D###BEEF where ### is the ordinal. +const static std::unordered_map object_types = { + {XObject::Type::Event, 0xD00EBEEF}, + {XObject::Type::Semaphore, 0xD017BEEF}, + {XObject::Type::Thread, 0xD01BBEEF}}; dword_result_t ObReferenceObjectByHandle_entry(dword_t handle, dword_t object_type_ptr, lpdword_t out_object_ptr) { - // These values come from how Xenia handles uninitialized kernel data exports. - // D###BEEF where ### is the ordinal. - const static std::unordered_map object_types = { - {XObject::Type::Event, 0xD00EBEEF}, - {XObject::Type::Semaphore, 0xD017BEEF}, - {XObject::Type::Thread, 0xD01BBEEF}}; + // chrispy: gotta preinit this to 0, kernel is expected to do that + *out_object_ptr = 0; + auto object = kernel_state()->object_table()->LookupObject(handle); if (!object) { return X_STATUS_INVALID_HANDLE; @@ -132,22 +137,43 @@ dword_result_t ObReferenceObjectByName_entry(lpstring_t name, } DECLARE_XBOXKRNL_EXPORT1(ObReferenceObjectByName, kNone, kImplemented); -dword_result_t ObDereferenceObject_entry(dword_t native_ptr) { +void ObDereferenceObject_entry(dword_t native_ptr, const ppc_context_t& ctx) { // Check if a dummy value from ObReferenceObjectByHandle. if (native_ptr == 0xDEADF00D) { - return 0; + return; } auto object = XObject::GetNativeObject( kernel_state(), kernel_memory()->TranslateVirtual(native_ptr)); if (object) { object->ReleaseHandle(); - } - return 0; + } else { + if (native_ptr) { + XELOGW("Unregistered guest object provided to ObDereferenceObject {:08X}", + native_ptr.value()); + } + } + return; } DECLARE_XBOXKRNL_EXPORT1(ObDereferenceObject, kNone, kImplemented); +void ObReferenceObject_entry(dword_t native_ptr) { + // Check if a dummy value from ObReferenceObjectByHandle. + auto object = XObject::GetNativeObject( + kernel_state(), kernel_memory()->TranslateVirtual(native_ptr)); + if (object) { + object->RetainHandle(); + } else { + if (native_ptr) { + XELOGW("Unregistered guest object provided to ObReferenceObject {:08X}", + native_ptr.value()); + } + } + return; +} +DECLARE_XBOXKRNL_EXPORT1(ObReferenceObject, kNone, kImplemented); + dword_result_t ObCreateSymbolicLink_entry(pointer_t path_ptr, pointer_t target_ptr) { auto path = xe::utf8::canonicalize_guest_path( diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc index 14179939e..6144731d3 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc @@ -237,7 +237,7 @@ void KeSetCurrentStackPointers_entry(lpvoid_t stack_ptr, auto current_thread = XThread::GetCurrentThread(); auto pcr = context->TranslateVirtualGPR(context->r[13]); - + //also supposed to load msr mask, and the current msr with that, and store thread->stack_alloc_base = stack_alloc_base.value(); thread->stack_base = stack_base.value(); thread->stack_limit = stack_limit.value(); @@ -500,6 +500,10 @@ uint32_t xeNtSetEvent(uint32_t handle, xe::be* previous_state_ptr) { auto ev = kernel_state()->object_table()->LookupObject(handle); if (ev) { + //d3 ros does this + if (ev->type() != XObject::Type::Event) { + return X_STATUS_OBJECT_TYPE_MISMATCH; + } int32_t was_signalled = ev->Set(0, false); if (previous_state_ptr) { *previous_state_ptr = static_cast(was_signalled); diff --git a/src/xenia/kernel/xthread.h b/src/xenia/kernel/xthread.h index 898aea006..ceadd49dd 100644 --- a/src/xenia/kernel/xthread.h +++ b/src/xenia/kernel/xthread.h @@ -70,7 +70,8 @@ struct XAPC { // Processor Control Region struct X_KPCR { xe::be tls_ptr; // 0x0 - uint8_t unk_04[0x2C]; // 0x4 + xe::be msr_mask; // 0x4 + uint8_t unk_08[0x28]; // 0x8 xe::be pcr_ptr; // 0x30 uint8_t unk_34[0x3C]; // 0x34 xe::be stack_base_ptr; // 0x70 Stack base address (high addr) diff --git a/src/xenia/ui/d3d12/d3d12_submission_tracker.cc b/src/xenia/ui/d3d12/d3d12_submission_tracker.cc index d8f604f7f..3a1c7938d 100644 --- a/src/xenia/ui/d3d12/d3d12_submission_tracker.cc +++ b/src/xenia/ui/d3d12/d3d12_submission_tracker.cc @@ -75,12 +75,7 @@ bool D3D12SubmissionTracker::AwaitSubmissionCompletion( fence_value = submission_signal_queued_; } if (fence_->GetCompletedValue() < fence_value) { - if (FAILED(fence_->SetEventOnCompletion(fence_value, - fence_completion_event_))) { - return false; - } - if (WaitForSingleObject(fence_completion_event_, INFINITE) != - WAIT_OBJECT_0) { + if (FAILED(fence_->SetEventOnCompletion(fence_value, nullptr))) { return false; } } diff --git a/src/xenia/ui/windowed_app_main_win.cc b/src/xenia/ui/windowed_app_main_win.cc index 115eb259f..6e14d159c 100644 --- a/src/xenia/ui/windowed_app_main_win.cc +++ b/src/xenia/ui/windowed_app_main_win.cc @@ -18,13 +18,17 @@ #include "xenia/ui/windowed_app_context_win.h" DEFINE_bool(enable_console, false, "Open a console window with the main window", - "General"); + "Logging"); + +static uintptr_t g_xenia_exe_base = 0; +static size_t g_xenia_exe_size = 0; #if XE_ARCH_AMD64 == 1 DEFINE_bool(enable_rdrand_ntdll_patch, true, "Hot-patches ntdll at the start of the process to not use rdrand " "as part of the RNG for heap randomization. Can reduce CPU usage " "significantly, but is untested on all Windows versions.", "Win32"); + // begin ntdll hack #include static bool g_didfailtowrite = false; @@ -77,36 +81,194 @@ static void do_ntdll_hack_this_process() { } #endif // end ntdll hack -LONG _UnhandledExceptionFilter(_EXCEPTION_POINTERS* ExceptionInfo) { - PVOID exception_addr = ExceptionInfo->ExceptionRecord->ExceptionAddress; +struct HostExceptionReport { + _EXCEPTION_POINTERS* const ExceptionInfo; + size_t Report_Scratchpos; - DWORD64 last_stackpointer = ExceptionInfo->ContextRecord->Rsp; + const DWORD last_win32_error; + const NTSTATUS last_ntstatus; - DWORD64 last_rip = ExceptionInfo->ContextRecord->Rip; + const int errno_value; + char Report_Scratchbuffer[2048]; - DWORD except_code = ExceptionInfo->ExceptionRecord->ExceptionCode; + unsigned int address_format_ring_index; - DWORD last_error = GetLastError(); + char formatted_addresses[16][128]; - NTSTATUS stat = __readgsdword(0x1250); + void AddString(const char* s); + static char* ChompNewlines(char* s); - int last_errno_value = errno; + HostExceptionReport(_EXCEPTION_POINTERS* _ExceptionInfo) + : ExceptionInfo(_ExceptionInfo), + Report_Scratchpos(0u), + last_win32_error(GetLastError()), + last_ntstatus(__readgsdword(0x1250)), + errno_value(errno), + address_format_ring_index(0) + { + memset(Report_Scratchbuffer, 0, sizeof(Report_Scratchbuffer)); + } + void DisplayExceptionMessage() { + MessageBoxA(nullptr, Report_Scratchbuffer, "Unhandled Exception in Xenia", + MB_ICONERROR); + } - char except_message_buf[1024]; + const char* GetFormattedAddress(uintptr_t address); + const char* GetFormattedAddress(PVOID address) { + return GetFormattedAddress(reinterpret_cast(address)); + } +}; +char* HostExceptionReport::ChompNewlines(char* s) { + if (!s) { + return nullptr; + } + unsigned read_pos = 0; + unsigned write_pos = 0; + + while (true) { + char current = s[read_pos++]; + if (current == '\n') { + continue; + } + s[write_pos++] = current; + if (!current) { + break; + } + } + return s; +} +void HostExceptionReport::AddString(const char* s) { + size_t ln = strlen(s); + + for (size_t i = 0; i < ln; ++i) { + Report_Scratchbuffer[i + Report_Scratchpos] = s[i]; + } + Report_Scratchpos += ln; +} + +const char* HostExceptionReport::GetFormattedAddress(uintptr_t address) { + char(¤t_buffer)[128] = + formatted_addresses[address_format_ring_index++ % 16]; + + if (address >= g_xenia_exe_base && + address - g_xenia_exe_base < g_xenia_exe_size) { + uintptr_t offset = address - g_xenia_exe_base; + + sprintf_s(current_buffer, "xenia_canary.exe+%llX", offset); + } else { + sprintf_s(current_buffer, "0x%llX", address); + } + return current_buffer; +} +using ExceptionInfoCategoryHandler = bool (*)(HostExceptionReport* report); +static char* Ntstatus_msg(NTSTATUS status) { + char* statusmsg = nullptr; + FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_HMODULE | + FORMAT_MESSAGE_IGNORE_INSERTS, + GetModuleHandleA("ntdll.dll"), status, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&statusmsg, + 0, NULL); + return statusmsg; +} +static bool exception_pointers_handler(HostExceptionReport* report) { + PVOID exception_addr = + report->ExceptionInfo->ExceptionRecord->ExceptionAddress; + + DWORD64 last_stackpointer = report->ExceptionInfo->ContextRecord->Rsp; + + DWORD64 last_rip = report->ExceptionInfo->ContextRecord->Rip; + DWORD except_code = report->ExceptionInfo->ExceptionRecord->ExceptionCode; + + char except_message_buf[256]; sprintf_s(except_message_buf, - "Exception encountered!\nException address: %p\nStackpointer: " - "%p\nInstruction pointer: %p\nExceptionCode: 0x%X\nLast Win32 " - "Error: 0x%X\nLast NTSTATUS: 0x%X\nLast errno value: 0x%X\n", - exception_addr, (void*)last_stackpointer, (void*)last_rip, except_code, - last_error, stat, last_errno_value); - MessageBoxA(nullptr, except_message_buf, "Unhandled Exception", MB_ICONERROR); + "Exception encountered!\nException address: %s\nStackpointer: " + "%s\nInstruction pointer: %s\nExceptionCode: 0x%X (%s)\n", + report->GetFormattedAddress(exception_addr), + report->GetFormattedAddress(last_stackpointer), + report->GetFormattedAddress(last_rip), except_code, + HostExceptionReport::ChompNewlines(Ntstatus_msg(except_code))); + + report->AddString(except_message_buf); + + return true; +} + +static bool exception_win32_error_handle(HostExceptionReport* report) { + if (!report->last_win32_error) { + return false; // no error, nothing to do + } + // todo: formatmessage + char win32_error_buf[512]; + // its ok if we dont free statusmsg, we're exiting anyway + char* statusmsg = nullptr; + FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, report->last_win32_error, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&statusmsg, + 0, NULL); + sprintf_s(win32_error_buf, "Last Win32 Error: 0x%X (%s)\n", + report->last_win32_error, + HostExceptionReport::ChompNewlines(statusmsg)); + report->AddString(win32_error_buf); + return true; +} +static bool exception_ntstatus_error_handle(HostExceptionReport* report) { + if (!report->last_ntstatus) { + return false; + } + // todo: formatmessage + char win32_error_buf[512]; + + sprintf_s(win32_error_buf, "Last NTSTATUS: 0x%X (%s)\n", + report->last_ntstatus, Ntstatus_msg(report->last_ntstatus)); + report->AddString(win32_error_buf); + return true; +} + +static bool exception_cerror_handle(HostExceptionReport* report) { + if (!report->errno_value) { + return false; + } + char errno_buffer[512]; + sprintf_s(errno_buffer, "Last errno value: 0x%X (%s)\n", report->errno_value, + strerror(report->errno_value)); + + report->AddString(errno_buffer); + return true; +} + +static ExceptionInfoCategoryHandler host_exception_category_handlers[] = { + exception_pointers_handler, exception_win32_error_handle, + exception_ntstatus_error_handle, exception_cerror_handle}; + +LONG _UnhandledExceptionFilter(_EXCEPTION_POINTERS* ExceptionInfo) { + HostExceptionReport report{ExceptionInfo}; + for (auto&& handler : host_exception_category_handlers) { + __try { + if (!handler(&report)) { + continue; + } + } __except (EXCEPTION_EXECUTE_HANDLER) { + report.AddString("\n"); + } + } + report.DisplayExceptionMessage(); + return EXCEPTION_CONTINUE_SEARCH; } int WINAPI wWinMain(HINSTANCE hinstance, HINSTANCE hinstance_prev, LPWSTR command_line, int show_cmd) { + MODULEINFO modinfo; + + GetModuleInformation(GetCurrentProcess(), (HMODULE)hinstance, &modinfo, + sizeof(MODULEINFO)); + + g_xenia_exe_base = reinterpret_cast(hinstance); + g_xenia_exe_size = modinfo.SizeOfImage; + int result; SetUnhandledExceptionFilter(_UnhandledExceptionFilter); {