Merge pull request #99 from chrisps/stack_sync2_fence_krnl_hostexcept

Improve stack sync, kernel fixes, better host exception reporting
This commit is contained in:
chrisps 2022-12-04 13:50:06 -08:00 committed by GitHub
commit 85723f117d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 412 additions and 105 deletions

View File

@ -21,6 +21,9 @@ namespace debugging {
// The state may change at any time (attach after launch, etc), so do not // The state may change at any time (attach after launch, etc), so do not
// cache this value. Determining if the debugger is attached is expensive, // cache this value. Determining if the debugger is attached is expensive,
// though, so avoid calling it frequently. // though, so avoid calling it frequently.
// chrispy: no longer expensive to check on windows, we're directly using the
// teb/peb now to find this
bool IsDebuggerAttached(); bool IsDebuggerAttached();
// Breaks into the debugger if it is attached. // Breaks into the debugger if it is attached.

View File

@ -15,7 +15,10 @@
namespace xe { namespace xe {
namespace debugging { namespace debugging {
bool IsDebuggerAttached() { return IsDebuggerPresent() ? true : false; } bool IsDebuggerAttached() {
return reinterpret_cast<const bool*>(
__readgsqword(0x60))[2]; // get BeingDebugged field of PEB
}
void Break() { __debugbreak(); } void Break() { __debugbreak(); }

View File

@ -664,6 +664,22 @@ struct MagicDiv {
multiplier_ = PregenerateUint32Div(original, extradata_); multiplier_ = PregenerateUint32Div(original, extradata_);
} }
constexpr uint32_t GetRightShift() const {
IDivExtraInfo extra{};
extra.value_ = extradata_;
return extra.info.shift_;
}
constexpr bool AddFlag() const {
IDivExtraInfo extra{};
extra.value_ = extradata_;
return extra.info.shift_;
}
constexpr uint32_t GetMultiplier() const { return multiplier_;
}
constexpr uint32_t Apply(uint32_t numerator) const { constexpr uint32_t Apply(uint32_t numerator) const {
return ApplyUint32Div(numerator, multiplier_, extradata_); return ApplyUint32Div(numerator, multiplier_, extradata_);
} }

View File

@ -725,10 +725,11 @@ ResolveFunctionThunk X64HelperEmitter::EmitResolveFunctionThunk() {
return (ResolveFunctionThunk)fn; return (ResolveFunctionThunk)fn;
} }
// r11 = size of callers stack, r8 = return address w/ adjustment // r11 = size of callers stack, r8 = return address w/ adjustment
//i'm not proud of this code, but it shouldn't be executed frequently at all // i'm not proud of this code, but it shouldn't be executed frequently at all
void* X64HelperEmitter::EmitGuestAndHostSynchronizeStackHelper() { void* X64HelperEmitter::EmitGuestAndHostSynchronizeStackHelper() {
_code_offsets code_offsets = {}; _code_offsets code_offsets = {};
code_offsets.prolog = getSize(); code_offsets.prolog = getSize();
push(rbx);
mov(rbx, GetBackendCtxPtr(offsetof(X64BackendContext, stackpoints))); mov(rbx, GetBackendCtxPtr(offsetof(X64BackendContext, stackpoints)));
mov(eax, mov(eax,
GetBackendCtxPtr(offsetof(X64BackendContext, current_stackpoint_depth))); GetBackendCtxPtr(offsetof(X64BackendContext, current_stackpoint_depth)));
@ -741,8 +742,9 @@ void* X64HelperEmitter::EmitGuestAndHostSynchronizeStackHelper() {
Xbyak::Label signed_underflow{}; Xbyak::Label signed_underflow{};
xor_(r12d, r12d); xor_(r12d, r12d);
//todo: should use Loop instruction here if hasFastLoop, // todo: should use Loop instruction here if hasFastLoop,
//currently xbyak does not support it but its super easy to modify xbyak to have it // currently xbyak does not support it but its super easy to modify xbyak to
// have it
L(looper); L(looper);
imul(edx, ecx, sizeof(X64BackendStackpoint)); imul(edx, ecx, sizeof(X64BackendStackpoint));
mov(r10d, ptr[rbx + rdx + offsetof(X64BackendStackpoint, guest_stack_)]); mov(r10d, ptr[rbx + rdx + offsetof(X64BackendStackpoint, guest_stack_)]);
@ -760,12 +762,47 @@ void* X64HelperEmitter::EmitGuestAndHostSynchronizeStackHelper() {
} }
js(signed_underflow, T_NEAR); // should be impossible!! js(signed_underflow, T_NEAR); // should be impossible!!
jmp(looper, T_NEAR); jmp(looper, T_NEAR);
L(loopout); L(loopout);
Xbyak::Label skip_adjust{}; Xbyak::Label skip_adjust{};
cmp(r12d, 1);//should never happen? cmp(r12d, 1); // should never happen?
jle(skip_adjust, T_NEAR); jle(skip_adjust, T_NEAR);
Xbyak::Label we_good{};
// now we need to make sure that the return address matches
// mov(r9d, ptr[GetContextReg() + offsetof(ppc::PPCContext, lr)]);
pop(r9); // guest retaddr
// r10d = the guest_stack
// while guest_stack is equal and return address is not equal, decrement
Xbyak::Label search_for_retaddr{};
Xbyak::Label we_good_but_increment{};
L(search_for_retaddr);
imul(edx, ecx, sizeof(X64BackendStackpoint));
cmp(r10d, ptr[rbx + rdx + offsetof(X64BackendStackpoint, guest_stack_)]);
jnz(we_good_but_increment, T_NEAR);
cmp(r9d,
ptr[rbx + rdx + offsetof(X64BackendStackpoint, guest_return_address_)]);
jz(we_good, T_NEAR); // stack is equal, return address is equal, we've got
// our destination stack
dec(ecx);
jmp(search_for_retaddr, T_NEAR);
Xbyak::Label checkbp{};
L(we_good_but_increment);
add(edx, sizeof(X64BackendStackpoint));
inc(ecx);
jmp(checkbp, T_NEAR);
L(we_good);
//we're popping this return address, so go down by one
sub(edx, sizeof(X64BackendStackpoint));
dec(ecx);
L(checkbp);
mov(rsp, ptr[rbx + rdx + offsetof(X64BackendStackpoint, host_stack_)]); mov(rsp, ptr[rbx + rdx + offsetof(X64BackendStackpoint, host_stack_)]);
if (IsFeatureEnabled(kX64FlagsIndependentVars)) { if (IsFeatureEnabled(kX64FlagsIndependentVars)) {
inc(ecx); inc(ecx);
@ -773,13 +810,13 @@ void* X64HelperEmitter::EmitGuestAndHostSynchronizeStackHelper() {
add(ecx, 1); add(ecx, 1);
} }
// this->DebugBreak();
sub(rsp, r11); // adjust stack sub(rsp, r11); // adjust stack
mov(GetBackendCtxPtr(offsetof(X64BackendContext, current_stackpoint_depth)), mov(GetBackendCtxPtr(offsetof(X64BackendContext, current_stackpoint_depth)),
ecx); // set next stackpoint index to be after the one we restored to ecx); // set next stackpoint index to be after the one we restored to
jmp(r8);
L(skip_adjust); L(skip_adjust);
pop(rbx);
jmp(r8); // return to caller jmp(r8); // return to caller
code_offsets.prolog_stack_alloc = getSize(); code_offsets.prolog_stack_alloc = getSize();
code_offsets.body = getSize(); code_offsets.body = getSize();
@ -787,24 +824,11 @@ void* X64HelperEmitter::EmitGuestAndHostSynchronizeStackHelper() {
code_offsets.tail = getSize(); code_offsets.tail = getSize();
L(signed_underflow); L(signed_underflow);
//find a good, compact way to signal error here // find a good, compact way to signal error here
// maybe an invalid opcode that we execute, then detect in an exception handler? // maybe an invalid opcode that we execute, then detect in an exception
// handler?
this->DebugBreak(); this->DebugBreak();
// stack unwinding, take first entry
//actually, no reason to have this
/*mov(rsp, ptr[rbx + offsetof(X64BackendStackpoint, host_stack_)]);
mov(ptr[rbx + offsetof(X64BackendStackpoint, guest_stack_)], r9d);
sub(rsp, r11);
xor_(eax, eax);
inc(eax);
mov(GetBackendCtxPtr(offsetof(X64BackendContext, current_stackpoint_depth)),
eax);
jmp(r8);*/
// this->DebugBreak(); // err, add an xe::FatalError to call for this
return EmitCurrentForOffsets(code_offsets); return EmitCurrentForOffsets(code_offsets);
} }

View File

@ -48,7 +48,7 @@ struct X64BackendStackpoint {
// pad to 16 bytes so we never end up having a 64 bit load/store for // pad to 16 bytes so we never end up having a 64 bit load/store for
// host_stack_ straddling two lines. Consider this field reserved for future // host_stack_ straddling two lines. Consider this field reserved for future
// use // use
unsigned unused_; unsigned guest_return_address_;
}; };
// located prior to the ctx register // located prior to the ctx register
// some things it would be nice to have be per-emulator instance instead of per // some things it would be nice to have be per-emulator instance instead of per

View File

@ -486,6 +486,7 @@ uint64_t ResolveFunction(void* raw_context, uint64_t target_address) {
if (cvars::enable_host_guest_stack_synchronization) { if (cvars::enable_host_guest_stack_synchronization) {
auto processor = thread_state->processor(); auto processor = thread_state->processor();
auto module_for_address = auto module_for_address =
processor->LookupModule(static_cast<uint32_t>(target_address)); processor->LookupModule(static_cast<uint32_t>(target_address));
@ -498,6 +499,7 @@ uint64_t ResolveFunction(void* raw_context, uint64_t target_address) {
if (flags->is_return_site) { if (flags->is_return_site) {
auto ones_with_address = processor->FindFunctionsWithAddress( auto ones_with_address = processor->FindFunctionsWithAddress(
static_cast<uint32_t>(target_address)); static_cast<uint32_t>(target_address));
if (ones_with_address.size() != 0) { if (ones_with_address.size() != 0) {
// this loop to find a host address for the guest address is // this loop to find a host address for the guest address is
// necessary because FindFunctionsWithAddress works via a range // necessary because FindFunctionsWithAddress works via a range
@ -618,21 +620,42 @@ uint64_t ResolveFunction(void* raw_context, uint64_t target_address) {
and 5 bytes for the jmp with no cycles taken for the jump and 5 bytes for the jmp with no cycles taken for the jump
which will be predicted not taken. which will be predicted not taken.
Our handling for the check is implemented in X64HelperEmitter::EmitGuestAndHostSynchronizeStackHelper. we don't call it directly though, Our handling for the check is implemented in
instead we go through backend()->synchronize_guest_and_host_stack_helper_for_size(num_bytes_needed_to_represent_stack_size). we place the stack size after the X64HelperEmitter::EmitGuestAndHostSynchronizeStackHelper. we
call instruction so we can load it in the helper and readjust the return address to point after the literal value. don't call it directly though, instead we go through
backend()->synchronize_guest_and_host_stack_helper_for_size(num_bytes_needed_to_represent_stack_size).
we place the stack size after the call instruction so we can
load it in the helper and readjust the return address to point
after the literal value.
The helper is going to search the array of stackpoints to find the first one that is greater than or equal to the current stack pointer, when it finds The helper is going to search the array of
the entry it will set the currently host rsp to the host stack pointer value in the entry, and then subtract the stack size of the caller from that. stackpoints to find the first one that is greater than or
the current stackpoint index is adjusted to point to the one after the stackpoint we restored to. equal to the current stack pointer, when it finds the entry it
will set the currently host rsp to the host stack pointer
value in the entry, and then subtract the stack size of the
caller from that. the current stackpoint index is adjusted to
point to the one after the stackpoint we restored to.
The helper then jumps back to the function that was longjmp'ed to, with the host stack in its proper state. it just works! The helper then jumps back to the function
that was longjmp'ed to, with the host stack in its proper
state. it just works!
*/ */
if (num_frames_bigger > 1) { if (num_frames_bigger > 1) {
/*
* can't do anything about this right now :(
* epic mickey is quite slow due to having to call resolve on
* every longjmp, and it longjmps a lot but if we add an
* indirection we lose our stack misalignment check
*/
/* reinterpret_cast<X64CodeCache*>(backend->code_cache())
->AddIndirection(static_cast<uint32_t>(target_address),
static_cast<uint32_t>(host_address));
*/
return host_address; return host_address;
} }
} }
@ -1649,6 +1672,9 @@ Xbyak::Address X64Emitter::GetBackendFlagsPtr() const {
} }
void X64Emitter::HandleStackpointOverflowError(ppc::PPCContext* context) { void X64Emitter::HandleStackpointOverflowError(ppc::PPCContext* context) {
if (debugging::IsDebuggerAttached()) {
debugging::Break();
}
// context->lr // context->lr
// todo: show lr in message? // todo: show lr in message?
xe::FatalError( xe::FatalError(
@ -1674,6 +1700,9 @@ void X64Emitter::PushStackpoint() {
mov(qword[rbx + offsetof(X64BackendStackpoint, host_stack_)], rsp); mov(qword[rbx + offsetof(X64BackendStackpoint, host_stack_)], rsp);
mov(dword[rbx + offsetof(X64BackendStackpoint, guest_stack_)], r8d); mov(dword[rbx + offsetof(X64BackendStackpoint, guest_stack_)], r8d);
mov(r8d, qword[GetContextReg() + offsetof(ppc::PPCContext, lr)]);
mov(dword[rbx + offsetof(X64BackendStackpoint, guest_return_address_)], r8d);
if (IsFeatureEnabled(kX64FlagsIndependentVars)) { if (IsFeatureEnabled(kX64FlagsIndependentVars)) {
inc(eax); inc(eax);
} else { } else {
@ -1716,24 +1745,6 @@ void X64Emitter::EnsureSynchronizedGuestAndHostStack() {
// need to be made // need to be made
// that result in the stack not being 8 byte misaligned on context reentry // that result in the stack not being 8 byte misaligned on context reentry
#if 0
Xbyak::Label skip{};
mov(r8, qword[GetContextReg() + offsetof(ppc::PPCContext, r[1])]);
mov(rbx, GetBackendCtxPtr(offsetof(X64BackendContext, stackpoints)));
imul(eax,
GetBackendCtxPtr(offsetof(X64BackendContext, current_stackpoint_depth)),
sizeof(X64BackendStackpoint));
sub(eax, sizeof(X64BackendStackpoint));
add(rbx, rax);
cmp(r8d, dword[rbx + offsetof(X64BackendStackpoint, guest_stack_)]);
jle(skip, T_NEAR);
Xbyak::Label skip{};
mov(r11d, stack_size());
call(backend_->synchronize_guest_and_host_stack_helper());
L(skip);
#endif
Xbyak::Label& return_from_sync = this->NewCachedLabel(); Xbyak::Label& return_from_sync = this->NewCachedLabel();
// if we got here somehow from setjmp or the like we ought to have a // if we got here somehow from setjmp or the like we ought to have a
@ -1747,7 +1758,6 @@ void X64Emitter::EnsureSynchronizedGuestAndHostStack() {
uint32_t stack32 = static_cast<uint32_t>(e.stack_size()); uint32_t stack32 = static_cast<uint32_t>(e.stack_size());
auto backend = e.backend(); auto backend = e.backend();
if (stack32 < 256) { if (stack32 < 256) {
e.call(backend->synchronize_guest_and_host_stack_helper_for_size(1)); e.call(backend->synchronize_guest_and_host_stack_helper_for_size(1));
e.db(stack32); e.db(stack32);

View File

@ -11,6 +11,7 @@
#include "xenia/base/assert.h" #include "xenia/base/assert.h"
#include "xenia/base/byte_order.h" #include "xenia/base/byte_order.h"
#include "xenia/base/cvar.h"
#include "xenia/base/memory.h" #include "xenia/base/memory.h"
#include "xenia/base/profiling.h" #include "xenia/base/profiling.h"
#include "xenia/base/reset_scope.h" #include "xenia/base/reset_scope.h"
@ -22,6 +23,10 @@
#include "xenia/cpu/ppc/ppc_opcode_info.h" #include "xenia/cpu/ppc/ppc_opcode_info.h"
#include "xenia/cpu/ppc/ppc_scanner.h" #include "xenia/cpu/ppc/ppc_scanner.h"
#include "xenia/cpu/processor.h" #include "xenia/cpu/processor.h"
#include "xenia/cpu/xex_module.h"
DEFINE_bool(dump_translated_hir_functions, false, "dumps translated hir",
"CPU");
namespace xe { namespace xe {
namespace cpu { namespace cpu {
@ -107,10 +112,44 @@ class HirBuilderScope {
~HirBuilderScope() { ~HirBuilderScope() {
if (builder_) { if (builder_) {
builder_->RemoveCurrent(); builder_->RemoveCurrent();
} }
} }
}; };
void PPCTranslator::DumpHIR(GuestFunction* function, PPCHIRBuilder* builder) {
if (cvars::dump_translated_hir_functions) {
StringBuffer buffer{};
builder_->Dump(&buffer);
XexModule* mod = dynamic_cast<XexModule*>(function->module());
std::wstring folder_name = L"hirdump";
if (mod) {
xex2_opt_execution_info* opt_exec_info = nullptr;
if (mod->GetOptHeader(XEX_HEADER_EXECUTION_INFO, &opt_exec_info)) {
folder_name =
L"hirdump_title_" + std::to_wstring(opt_exec_info->title_id);
}
}
std::filesystem::path folder_path{folder_name};
if (!std::filesystem::exists(folder_path)) {
std::filesystem::create_directory(folder_path);
}
{
wchar_t tmpbuf[64];
_snwprintf(tmpbuf, 64, L"%X", function->address());
folder_path.append(&tmpbuf[0]);
}
FILE* f = fopen(folder_path.generic_u8string().c_str(), "w");
if (f) {
fputs(buffer.buffer(), f);
fclose(f);
}
}
}
bool PPCTranslator::Translate(GuestFunction* function, bool PPCTranslator::Translate(GuestFunction* function,
uint32_t debug_info_flags) { uint32_t debug_info_flags) {
SCOPE_profile_cpu_f("cpu"); SCOPE_profile_cpu_f("cpu");
@ -203,6 +242,8 @@ bool PPCTranslator::Translate(GuestFunction* function,
string_buffer_.Reset(); string_buffer_.Reset();
} }
DumpHIR(function, builder_.get());
// Assemble to backend machine code. // Assemble to backend machine code.
if (!assembler_->Assemble(function, builder_.get(), debug_info_flags, if (!assembler_->Assemble(function, builder_.get(), debug_info_flags,
std::move(debug_info))) { std::move(debug_info))) {

View File

@ -31,7 +31,9 @@ class PPCTranslator {
~PPCTranslator(); ~PPCTranslator();
bool Translate(GuestFunction* function, uint32_t debug_info_flags); bool Translate(GuestFunction* function, uint32_t debug_info_flags);
void DumpHIR(GuestFunction* function, PPCHIRBuilder* builder);
void Reset(); void Reset();
private: private:
void DumpSource(GuestFunction* function, StringBuffer* string_buffer); void DumpSource(GuestFunction* function, StringBuffer* string_buffer);

View File

@ -3124,7 +3124,6 @@ bool D3D12CommandProcessor::IssueCopy_ReadbackResolvePath() {
auto physaddr = memory_->TranslatePhysical(written_address); auto physaddr = memory_->TranslatePhysical(written_address);
memory::vastcpy(physaddr, (uint8_t*)readback_mapping, memory::vastcpy(physaddr, (uint8_t*)readback_mapping,
written_length); written_length);
// XEDmaCpy(physaddr, readback_mapping, written_length);
D3D12_RANGE readback_write_range = {}; D3D12_RANGE readback_write_range = {};
readback_buffer->Unmap(0, &readback_write_range); readback_buffer->Unmap(0, &readback_write_range);
} }
@ -3152,9 +3151,7 @@ void D3D12CommandProcessor::CheckSubmissionFence(uint64_t await_submission) {
direct_queue->Signal(queue_operations_since_submission_fence_, direct_queue->Signal(queue_operations_since_submission_fence_,
fence_value) && fence_value) &&
SUCCEEDED(queue_operations_since_submission_fence_ SUCCEEDED(queue_operations_since_submission_fence_
->SetEventOnCompletion(fence_value, ->SetEventOnCompletion(fence_value, nullptr)))) {
fence_completion_event_)))) {
WaitForSingleObject(fence_completion_event_, INFINITE);
queue_operations_done_since_submission_signal_ = false; queue_operations_done_since_submission_signal_ = false;
} else { } else {
XELOGE( XELOGE(
@ -3170,9 +3167,8 @@ void D3D12CommandProcessor::CheckSubmissionFence(uint64_t await_submission) {
uint64_t submission_completed_before = submission_completed_; uint64_t submission_completed_before = submission_completed_;
submission_completed_ = submission_fence_->GetCompletedValue(); submission_completed_ = submission_fence_->GetCompletedValue();
if (submission_completed_ < await_submission) { if (submission_completed_ < await_submission) {
if (SUCCEEDED(submission_fence_->SetEventOnCompletion( if (SUCCEEDED(submission_fence_->SetEventOnCompletion(await_submission,
await_submission, fence_completion_event_))) { nullptr))) {
WaitForSingleObject(fence_completion_event_, INFINITE);
submission_completed_ = submission_fence_->GetCompletedValue(); submission_completed_ = submission_fence_->GetCompletedValue();
} }
} }

View File

@ -43,6 +43,7 @@ DEFINE_bool(
"Allow stencil reference output usage on Direct3D 12 on Intel GPUs - not " "Allow stencil reference output usage on Direct3D 12 on Intel GPUs - not "
"working on UHD Graphics 630 as of March 2021 (driver 27.20.0100.8336).", "working on UHD Graphics 630 as of March 2021 (driver 27.20.0100.8336).",
"GPU"); "GPU");
DEFINE_bool(no_discard_stencil_in_transfer_pipelines, false, "bleh", "GPU");
// TODO(Triang3l): Make ROV the default when it's optimized better (for // TODO(Triang3l): Make ROV the default when it's optimized better (for
// instance, using static shader modifications to pass render target // instance, using static shader modifications to pass render target
// parameters). // parameters).
@ -2940,7 +2941,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
// r0.xy = destination pixel XY index within the 32bpp tile // r0.xy = destination pixel XY index within the 32bpp tile
// r0.zw = 32bpp tile XY index // r0.zw = 32bpp tile XY index
a.OpUDiv(dxbc::Dest::R(0, 0b1100), dxbc::Dest::R(0, 0b0011), a.OpUDiv(dxbc::Dest::R(0, 0b1100), dxbc::Dest::R(0, 0b0011),
dxbc::Src::R(0, 0b01000100), dxbc::Src::R(0, dxbc::Src::kXYXY),
dxbc::Src::LU(dest_tile_width_pixels, dest_tile_height_pixels, dxbc::Src::LU(dest_tile_width_pixels, dest_tile_height_pixels,
dest_tile_width_pixels, dest_tile_height_pixels)); dest_tile_width_pixels, dest_tile_height_pixels));
@ -4189,12 +4190,14 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
break; break;
case TransferOutput::kStencilBit: case TransferOutput::kStencilBit:
// Discard the sample if the needed stencil bit is not set. // Discard the sample if the needed stencil bit is not set.
assert_true(cbuffer_index_stencil_mask != UINT32_MAX); if (!cvars::no_discard_stencil_in_transfer_pipelines) {
a.OpAnd(dxbc::Dest::R(0, 0b0001), dxbc::Src::R(1, dxbc::Src::kXXXX), assert_true(cbuffer_index_stencil_mask != UINT32_MAX);
dxbc::Src::CB(cbuffer_index_stencil_mask, a.OpAnd(dxbc::Dest::R(0, 0b0001), dxbc::Src::R(1, dxbc::Src::kXXXX),
kTransferCBVRegisterStencilMask, 0, dxbc::Src::CB(cbuffer_index_stencil_mask,
dxbc::Src::kXXXX)); kTransferCBVRegisterStencilMask, 0,
a.OpDiscard(false, dxbc::Src::R(0, dxbc::Src::kXXXX)); dxbc::Src::kXXXX));
a.OpDiscard(false, dxbc::Src::R(0, dxbc::Src::kXXXX));
}
break; break;
} }
} }

View File

@ -1046,6 +1046,7 @@ struct Src : OperandAddress {
kYYYY = 0b01010101, kYYYY = 0b01010101,
kZZZZ = 0b10101010, kZZZZ = 0b10101010,
kWWWW = 0b11111111, kWWWW = 0b11111111,
kXYXY = 0b01000100
}; };
// Ignored for 0-component and 1-component operand types. // Ignored for 0-component and 1-component operand types.

View File

@ -37,14 +37,30 @@ struct XTASK_MESSAGE {
be<uint32_t> unknown_14; be<uint32_t> unknown_14;
be<uint32_t> task_handle; be<uint32_t> task_handle;
}; };
struct XAM_TASK_ARGS {
be<uint32_t> value1;
be<uint32_t> value2;
// i think there might be another value here, it might be padding
};
static_assert_size(XTASK_MESSAGE, 0x1C); static_assert_size(XTASK_MESSAGE, 0x1C);
dword_result_t XamTaskSchedule_entry(lpvoid_t callback, dword_result_t XamTaskSchedule_entry(lpvoid_t callback,
pointer_t<XTASK_MESSAGE> message, pointer_t<XTASK_MESSAGE> message,
lpdword_t unknown, lpdword_t handle_ptr) { dword_t optional_ptr, lpdword_t handle_ptr,
const ppc_context_t& ctx) {
// TODO(gibbed): figure out what this is for // TODO(gibbed): figure out what this is for
*handle_ptr = 12345; *handle_ptr = 12345;
if (optional_ptr) {
auto option = ctx->TranslateVirtual<XAM_TASK_ARGS*>(optional_ptr);
auto v1 = option->value1;
auto v2 = option->value2; //typically 0?
XELOGI("Got xam task args: v1 = {:08X}, v2 = {:08X}", v1, v2);
}
uint32_t stack_size = kernel_state()->GetExecutableModule()->stack_size(); uint32_t stack_size = kernel_state()->GetExecutableModule()->stack_size();
// Stack must be aligned to 16kb pages // Stack must be aligned to 16kb pages

View File

@ -609,6 +609,10 @@ dword_result_t ExAllocatePoolTypeWithTag_entry(dword_t size, dword_t tag,
return addr; return addr;
} }
DECLARE_XBOXKRNL_EXPORT1(ExAllocatePoolTypeWithTag, kMemory, kImplemented); DECLARE_XBOXKRNL_EXPORT1(ExAllocatePoolTypeWithTag, kMemory, kImplemented);
dword_result_t ExAllocatePoolWithTag_entry(dword_t numbytes, dword_t tag) {
return ExAllocatePoolTypeWithTag_entry(numbytes, tag, 0);
}
DECLARE_XBOXKRNL_EXPORT1(ExAllocatePoolWithTag, kMemory, kImplemented);
dword_result_t ExAllocatePool_entry(dword_t size) { dword_result_t ExAllocatePool_entry(dword_t size) {
const uint32_t none = 0x656E6F4E; // 'None' const uint32_t none = 0x656E6F4E; // 'None'

View File

@ -52,9 +52,11 @@ dword_result_t ObOpenObjectByName_entry(lpunknown_t obj_attributes_ptr,
return result; return result;
} }
DECLARE_XBOXKRNL_EXPORT1(ObOpenObjectByName, kNone, kImplemented); DECLARE_XBOXKRNL_EXPORT1(ObOpenObjectByName, kNone, kImplemented);
// chrispy: investigate this, pretty certain it does not properly emulate the
// original
dword_result_t ObOpenObjectByPointer_entry(lpvoid_t object_ptr, dword_result_t ObOpenObjectByPointer_entry(lpvoid_t object_ptr,
lpdword_t out_handle_ptr) { lpdword_t out_handle_ptr) {
*out_handle_ptr = 0;
auto object = XObject::GetNativeObject<XObject>(kernel_state(), object_ptr); auto object = XObject::GetNativeObject<XObject>(kernel_state(), object_ptr);
if (!object) { if (!object) {
return X_STATUS_UNSUCCESSFUL; return X_STATUS_UNSUCCESSFUL;
@ -71,7 +73,8 @@ dword_result_t ObLookupThreadByThreadId_entry(dword_t thread_id,
lpdword_t out_object_ptr) { lpdword_t out_object_ptr) {
auto thread = kernel_state()->GetThreadByID(thread_id); auto thread = kernel_state()->GetThreadByID(thread_id);
if (!thread) { if (!thread) {
return X_STATUS_NOT_FOUND; *out_object_ptr = 0;
return X_STATUS_INVALID_PARAMETER;
} }
// Retain the object. Will be released in ObDereferenceObject. // Retain the object. Will be released in ObDereferenceObject.
@ -80,16 +83,18 @@ dword_result_t ObLookupThreadByThreadId_entry(dword_t thread_id,
return X_STATUS_SUCCESS; return X_STATUS_SUCCESS;
} }
DECLARE_XBOXKRNL_EXPORT1(ObLookupThreadByThreadId, kNone, kImplemented); DECLARE_XBOXKRNL_EXPORT1(ObLookupThreadByThreadId, kNone, kImplemented);
// These values come from how Xenia handles uninitialized kernel data exports.
// D###BEEF where ### is the ordinal.
const static std::unordered_map<XObject::Type, uint32_t> object_types = {
{XObject::Type::Event, 0xD00EBEEF},
{XObject::Type::Semaphore, 0xD017BEEF},
{XObject::Type::Thread, 0xD01BBEEF}};
dword_result_t ObReferenceObjectByHandle_entry(dword_t handle, dword_result_t ObReferenceObjectByHandle_entry(dword_t handle,
dword_t object_type_ptr, dword_t object_type_ptr,
lpdword_t out_object_ptr) { lpdword_t out_object_ptr) {
// These values come from how Xenia handles uninitialized kernel data exports. // chrispy: gotta preinit this to 0, kernel is expected to do that
// D###BEEF where ### is the ordinal. *out_object_ptr = 0;
const static std::unordered_map<XObject::Type, uint32_t> object_types = {
{XObject::Type::Event, 0xD00EBEEF},
{XObject::Type::Semaphore, 0xD017BEEF},
{XObject::Type::Thread, 0xD01BBEEF}};
auto object = kernel_state()->object_table()->LookupObject<XObject>(handle); auto object = kernel_state()->object_table()->LookupObject<XObject>(handle);
if (!object) { if (!object) {
return X_STATUS_INVALID_HANDLE; return X_STATUS_INVALID_HANDLE;
@ -132,22 +137,43 @@ dword_result_t ObReferenceObjectByName_entry(lpstring_t name,
} }
DECLARE_XBOXKRNL_EXPORT1(ObReferenceObjectByName, kNone, kImplemented); DECLARE_XBOXKRNL_EXPORT1(ObReferenceObjectByName, kNone, kImplemented);
dword_result_t ObDereferenceObject_entry(dword_t native_ptr) { void ObDereferenceObject_entry(dword_t native_ptr, const ppc_context_t& ctx) {
// Check if a dummy value from ObReferenceObjectByHandle. // Check if a dummy value from ObReferenceObjectByHandle.
if (native_ptr == 0xDEADF00D) { if (native_ptr == 0xDEADF00D) {
return 0; return;
} }
auto object = XObject::GetNativeObject<XObject>( auto object = XObject::GetNativeObject<XObject>(
kernel_state(), kernel_memory()->TranslateVirtual(native_ptr)); kernel_state(), kernel_memory()->TranslateVirtual(native_ptr));
if (object) { if (object) {
object->ReleaseHandle(); object->ReleaseHandle();
}
return 0; } else {
if (native_ptr) {
XELOGW("Unregistered guest object provided to ObDereferenceObject {:08X}",
native_ptr.value());
}
}
return;
} }
DECLARE_XBOXKRNL_EXPORT1(ObDereferenceObject, kNone, kImplemented); DECLARE_XBOXKRNL_EXPORT1(ObDereferenceObject, kNone, kImplemented);
void ObReferenceObject_entry(dword_t native_ptr) {
// Check if a dummy value from ObReferenceObjectByHandle.
auto object = XObject::GetNativeObject<XObject>(
kernel_state(), kernel_memory()->TranslateVirtual(native_ptr));
if (object) {
object->RetainHandle();
} else {
if (native_ptr) {
XELOGW("Unregistered guest object provided to ObReferenceObject {:08X}",
native_ptr.value());
}
}
return;
}
DECLARE_XBOXKRNL_EXPORT1(ObReferenceObject, kNone, kImplemented);
dword_result_t ObCreateSymbolicLink_entry(pointer_t<X_ANSI_STRING> path_ptr, dword_result_t ObCreateSymbolicLink_entry(pointer_t<X_ANSI_STRING> path_ptr,
pointer_t<X_ANSI_STRING> target_ptr) { pointer_t<X_ANSI_STRING> target_ptr) {
auto path = xe::utf8::canonicalize_guest_path( auto path = xe::utf8::canonicalize_guest_path(

View File

@ -237,7 +237,7 @@ void KeSetCurrentStackPointers_entry(lpvoid_t stack_ptr,
auto current_thread = XThread::GetCurrentThread(); auto current_thread = XThread::GetCurrentThread();
auto pcr = context->TranslateVirtualGPR<X_KPCR*>(context->r[13]); auto pcr = context->TranslateVirtualGPR<X_KPCR*>(context->r[13]);
//also supposed to load msr mask, and the current msr with that, and store
thread->stack_alloc_base = stack_alloc_base.value(); thread->stack_alloc_base = stack_alloc_base.value();
thread->stack_base = stack_base.value(); thread->stack_base = stack_base.value();
thread->stack_limit = stack_limit.value(); thread->stack_limit = stack_limit.value();
@ -500,6 +500,10 @@ uint32_t xeNtSetEvent(uint32_t handle, xe::be<uint32_t>* previous_state_ptr) {
auto ev = kernel_state()->object_table()->LookupObject<XEvent>(handle); auto ev = kernel_state()->object_table()->LookupObject<XEvent>(handle);
if (ev) { if (ev) {
//d3 ros does this
if (ev->type() != XObject::Type::Event) {
return X_STATUS_OBJECT_TYPE_MISMATCH;
}
int32_t was_signalled = ev->Set(0, false); int32_t was_signalled = ev->Set(0, false);
if (previous_state_ptr) { if (previous_state_ptr) {
*previous_state_ptr = static_cast<uint32_t>(was_signalled); *previous_state_ptr = static_cast<uint32_t>(was_signalled);

View File

@ -70,7 +70,8 @@ struct XAPC {
// Processor Control Region // Processor Control Region
struct X_KPCR { struct X_KPCR {
xe::be<uint32_t> tls_ptr; // 0x0 xe::be<uint32_t> tls_ptr; // 0x0
uint8_t unk_04[0x2C]; // 0x4 xe::be<uint32_t> msr_mask; // 0x4
uint8_t unk_08[0x28]; // 0x8
xe::be<uint32_t> pcr_ptr; // 0x30 xe::be<uint32_t> pcr_ptr; // 0x30
uint8_t unk_34[0x3C]; // 0x34 uint8_t unk_34[0x3C]; // 0x34
xe::be<uint32_t> stack_base_ptr; // 0x70 Stack base address (high addr) xe::be<uint32_t> stack_base_ptr; // 0x70 Stack base address (high addr)

View File

@ -75,12 +75,7 @@ bool D3D12SubmissionTracker::AwaitSubmissionCompletion(
fence_value = submission_signal_queued_; fence_value = submission_signal_queued_;
} }
if (fence_->GetCompletedValue() < fence_value) { if (fence_->GetCompletedValue() < fence_value) {
if (FAILED(fence_->SetEventOnCompletion(fence_value, if (FAILED(fence_->SetEventOnCompletion(fence_value, nullptr))) {
fence_completion_event_))) {
return false;
}
if (WaitForSingleObject(fence_completion_event_, INFINITE) !=
WAIT_OBJECT_0) {
return false; return false;
} }
} }

View File

@ -18,13 +18,17 @@
#include "xenia/ui/windowed_app_context_win.h" #include "xenia/ui/windowed_app_context_win.h"
DEFINE_bool(enable_console, false, "Open a console window with the main window", DEFINE_bool(enable_console, false, "Open a console window with the main window",
"General"); "Logging");
static uintptr_t g_xenia_exe_base = 0;
static size_t g_xenia_exe_size = 0;
#if XE_ARCH_AMD64 == 1 #if XE_ARCH_AMD64 == 1
DEFINE_bool(enable_rdrand_ntdll_patch, true, DEFINE_bool(enable_rdrand_ntdll_patch, true,
"Hot-patches ntdll at the start of the process to not use rdrand " "Hot-patches ntdll at the start of the process to not use rdrand "
"as part of the RNG for heap randomization. Can reduce CPU usage " "as part of the RNG for heap randomization. Can reduce CPU usage "
"significantly, but is untested on all Windows versions.", "significantly, but is untested on all Windows versions.",
"Win32"); "Win32");
// begin ntdll hack // begin ntdll hack
#include <psapi.h> #include <psapi.h>
static bool g_didfailtowrite = false; static bool g_didfailtowrite = false;
@ -77,36 +81,194 @@ static void do_ntdll_hack_this_process() {
} }
#endif #endif
// end ntdll hack // end ntdll hack
LONG _UnhandledExceptionFilter(_EXCEPTION_POINTERS* ExceptionInfo) { struct HostExceptionReport {
PVOID exception_addr = ExceptionInfo->ExceptionRecord->ExceptionAddress; _EXCEPTION_POINTERS* const ExceptionInfo;
size_t Report_Scratchpos;
DWORD64 last_stackpointer = ExceptionInfo->ContextRecord->Rsp; const DWORD last_win32_error;
const NTSTATUS last_ntstatus;
DWORD64 last_rip = ExceptionInfo->ContextRecord->Rip; const int errno_value;
char Report_Scratchbuffer[2048];
DWORD except_code = ExceptionInfo->ExceptionRecord->ExceptionCode; unsigned int address_format_ring_index;
DWORD last_error = GetLastError(); char formatted_addresses[16][128];
NTSTATUS stat = __readgsdword(0x1250); void AddString(const char* s);
static char* ChompNewlines(char* s);
int last_errno_value = errno; HostExceptionReport(_EXCEPTION_POINTERS* _ExceptionInfo)
: ExceptionInfo(_ExceptionInfo),
Report_Scratchpos(0u),
last_win32_error(GetLastError()),
last_ntstatus(__readgsdword(0x1250)),
errno_value(errno),
address_format_ring_index(0)
{
memset(Report_Scratchbuffer, 0, sizeof(Report_Scratchbuffer));
}
void DisplayExceptionMessage() {
MessageBoxA(nullptr, Report_Scratchbuffer, "Unhandled Exception in Xenia",
MB_ICONERROR);
}
char except_message_buf[1024]; const char* GetFormattedAddress(uintptr_t address);
const char* GetFormattedAddress(PVOID address) {
return GetFormattedAddress(reinterpret_cast<uintptr_t>(address));
}
};
char* HostExceptionReport::ChompNewlines(char* s) {
if (!s) {
return nullptr;
}
unsigned read_pos = 0;
unsigned write_pos = 0;
while (true) {
char current = s[read_pos++];
if (current == '\n') {
continue;
}
s[write_pos++] = current;
if (!current) {
break;
}
}
return s;
}
void HostExceptionReport::AddString(const char* s) {
size_t ln = strlen(s);
for (size_t i = 0; i < ln; ++i) {
Report_Scratchbuffer[i + Report_Scratchpos] = s[i];
}
Report_Scratchpos += ln;
}
const char* HostExceptionReport::GetFormattedAddress(uintptr_t address) {
char(&current_buffer)[128] =
formatted_addresses[address_format_ring_index++ % 16];
if (address >= g_xenia_exe_base &&
address - g_xenia_exe_base < g_xenia_exe_size) {
uintptr_t offset = address - g_xenia_exe_base;
sprintf_s(current_buffer, "xenia_canary.exe+%llX", offset);
} else {
sprintf_s(current_buffer, "0x%llX", address);
}
return current_buffer;
}
using ExceptionInfoCategoryHandler = bool (*)(HostExceptionReport* report);
static char* Ntstatus_msg(NTSTATUS status) {
char* statusmsg = nullptr;
FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_HMODULE |
FORMAT_MESSAGE_IGNORE_INSERTS,
GetModuleHandleA("ntdll.dll"), status,
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&statusmsg,
0, NULL);
return statusmsg;
}
static bool exception_pointers_handler(HostExceptionReport* report) {
PVOID exception_addr =
report->ExceptionInfo->ExceptionRecord->ExceptionAddress;
DWORD64 last_stackpointer = report->ExceptionInfo->ContextRecord->Rsp;
DWORD64 last_rip = report->ExceptionInfo->ContextRecord->Rip;
DWORD except_code = report->ExceptionInfo->ExceptionRecord->ExceptionCode;
char except_message_buf[256];
sprintf_s(except_message_buf, sprintf_s(except_message_buf,
"Exception encountered!\nException address: %p\nStackpointer: " "Exception encountered!\nException address: %s\nStackpointer: "
"%p\nInstruction pointer: %p\nExceptionCode: 0x%X\nLast Win32 " "%s\nInstruction pointer: %s\nExceptionCode: 0x%X (%s)\n",
"Error: 0x%X\nLast NTSTATUS: 0x%X\nLast errno value: 0x%X\n", report->GetFormattedAddress(exception_addr),
exception_addr, (void*)last_stackpointer, (void*)last_rip, except_code, report->GetFormattedAddress(last_stackpointer),
last_error, stat, last_errno_value); report->GetFormattedAddress(last_rip), except_code,
MessageBoxA(nullptr, except_message_buf, "Unhandled Exception", MB_ICONERROR); HostExceptionReport::ChompNewlines(Ntstatus_msg(except_code)));
report->AddString(except_message_buf);
return true;
}
static bool exception_win32_error_handle(HostExceptionReport* report) {
if (!report->last_win32_error) {
return false; // no error, nothing to do
}
// todo: formatmessage
char win32_error_buf[512];
// its ok if we dont free statusmsg, we're exiting anyway
char* statusmsg = nullptr;
FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_IGNORE_INSERTS,
NULL, report->last_win32_error,
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&statusmsg,
0, NULL);
sprintf_s(win32_error_buf, "Last Win32 Error: 0x%X (%s)\n",
report->last_win32_error,
HostExceptionReport::ChompNewlines(statusmsg));
report->AddString(win32_error_buf);
return true;
}
static bool exception_ntstatus_error_handle(HostExceptionReport* report) {
if (!report->last_ntstatus) {
return false;
}
// todo: formatmessage
char win32_error_buf[512];
sprintf_s(win32_error_buf, "Last NTSTATUS: 0x%X (%s)\n",
report->last_ntstatus, Ntstatus_msg(report->last_ntstatus));
report->AddString(win32_error_buf);
return true;
}
static bool exception_cerror_handle(HostExceptionReport* report) {
if (!report->errno_value) {
return false;
}
char errno_buffer[512];
sprintf_s(errno_buffer, "Last errno value: 0x%X (%s)\n", report->errno_value,
strerror(report->errno_value));
report->AddString(errno_buffer);
return true;
}
static ExceptionInfoCategoryHandler host_exception_category_handlers[] = {
exception_pointers_handler, exception_win32_error_handle,
exception_ntstatus_error_handle, exception_cerror_handle};
LONG _UnhandledExceptionFilter(_EXCEPTION_POINTERS* ExceptionInfo) {
HostExceptionReport report{ExceptionInfo};
for (auto&& handler : host_exception_category_handlers) {
__try {
if (!handler(&report)) {
continue;
}
} __except (EXCEPTION_EXECUTE_HANDLER) {
report.AddString("<Nested Exception Encountered>\n");
}
}
report.DisplayExceptionMessage();
return EXCEPTION_CONTINUE_SEARCH; return EXCEPTION_CONTINUE_SEARCH;
} }
int WINAPI wWinMain(HINSTANCE hinstance, HINSTANCE hinstance_prev, int WINAPI wWinMain(HINSTANCE hinstance, HINSTANCE hinstance_prev,
LPWSTR command_line, int show_cmd) { LPWSTR command_line, int show_cmd) {
MODULEINFO modinfo;
GetModuleInformation(GetCurrentProcess(), (HMODULE)hinstance, &modinfo,
sizeof(MODULEINFO));
g_xenia_exe_base = reinterpret_cast<uintptr_t>(hinstance);
g_xenia_exe_size = modinfo.SizeOfImage;
int result; int result;
SetUnhandledExceptionFilter(_UnhandledExceptionFilter); SetUnhandledExceptionFilter(_UnhandledExceptionFilter);
{ {