Merge pull request #85 from chrisps/canary_experimental

Kernel improvements, "fix" crash on sandy bridge/ivy bridge
This commit is contained in:
Radosław Gliński 2022-10-21 14:18:03 +02:00 committed by GitHub
commit 7c375879bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 88 additions and 58 deletions

2
.gitignore vendored
View File

@ -101,3 +101,5 @@ node_modules/.bin/
/third_party/binutils/binutils*
/third_party/vasm/
/tools/shader-playground/*.dll
/profile_print_times.py
/profile_times.txt

2
.gitmodules vendored
View File

@ -36,7 +36,7 @@
url = https://github.com/skystrife/cpptoml.git
[submodule "third_party/cxxopts"]
path = third_party/cxxopts
url = https://github.com/jarro2783/cxxopts.git
url = https://github.com/chrisps/cxxopts.git
[submodule "third_party/SDL2"]
path = third_party/SDL2
url = https://github.com/libsdl-org/SDL.git

View File

@ -177,14 +177,7 @@ void XmaDecoder::WorkerThreadMain() {
} else {
idle_loop_count = 0;
}
if (idle_loop_count > 500) {
// Idle for an extended period. Introduce a 20ms wait.
xe::threading::Wait(work_event_.get(), false,
std::chrono::milliseconds(20));
}
xe::threading::MaybeYield();
xe::threading::Wait(work_event_.get(), false);
}
}
@ -316,7 +309,7 @@ void XmaDecoder::WriteRegister(uint32_t addr, uint32_t value) {
}
}
// Signal the decoder thread to start processing.
work_event_->Set();
work_event_->SetBoostPriority();
} else if (r >= XmaRegister::Context0Lock && r <= XmaRegister::Context9Lock) {
// Context lock command.
// This requests a lock by flagging the context.

View File

@ -405,9 +405,10 @@ void X64Emitter::EmitProfilerEpilogue() {
if (cvars::instrument_call_times) {
uint64_t* profiler_entry =
backend()->GetProfilerRecordForFunction(current_guest_function_);
mov(ecx, 0x7ffe0014);
mov(rdx, qword[rcx]);
mov(rbx, (uintptr_t)profiler_entry);
mov(r10, (uintptr_t)profiler_entry);
sub(rdx, qword[rsp + StackLayout::GUEST_PROFILER_START]);
// atomic add our time to the profiler entry
@ -416,7 +417,8 @@ void X64Emitter::EmitProfilerEpilogue() {
// this a few cycles less intrusive, but its good enough for now
// actually... lets just try without atomics lol
// lock();
add(qword[rbx], rdx);
add(qword[r10], rdx);
}
#endif
}

View File

@ -1872,20 +1872,20 @@ Value* HIRBuilder::AndNot(Value* value1, Value* value2) {
ASSERT_NON_FLOAT_TYPE(value1);
ASSERT_NON_FLOAT_TYPE(value2);
ASSERT_TYPES_EQUAL(value1, value2);
if (value1 == value2) {
// only other type it can be used with is INT64_TYPE (andc)
if (value1->type != VEC128_TYPE) {
return this->And(this->Not(value2), value1);
} else if (value1 == value2) {
return LoadZero(value1->type);
} else if (value1->IsConstantZero()) {
return value1;
} else if (value2->IsConstantZero()) {
} else if (value1->IsConstantZero() || value2->IsConstantZero()) {
return value1;
} else {
Instr* i = AppendInstr(OPCODE_AND_NOT_info, 0, AllocValue(value1->type));
i->set_src1(value1);
i->set_src2(value2);
i->src3.value = NULL;
return i->dest;
}
Instr* i = AppendInstr(OPCODE_AND_NOT_info, 0, AllocValue(value1->type));
i->set_src1(value1);
i->set_src2(value2);
i->src3.value = NULL;
return i->dest;
}
Value* HIRBuilder::Or(Value* value1, Value* value2) {

View File

@ -4355,7 +4355,7 @@ bool D3D12CommandProcessor::UpdateBindings(
uint32_t float_constant_index;
while (xe::bit_scan_forward(float_constant_map_entry,
&float_constant_index)) {
float_constant_map_entry &= ~(1ull << float_constant_index);
float_constant_map_entry = xe::clear_lowest_bit(float_constant_map_entry);
std::memcpy(float_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
(float_constant_index << 2)]
@ -4386,7 +4386,7 @@ bool D3D12CommandProcessor::UpdateBindings(
uint32_t float_constant_index;
while (xe::bit_scan_forward(float_constant_map_entry,
&float_constant_index)) {
float_constant_map_entry &= ~(1ull << float_constant_index);
float_constant_map_entry = xe::clear_lowest_bit(float_constant_map_entry);
std::memcpy(float_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
(float_constant_index << 2)]
@ -4877,7 +4877,7 @@ bool D3D12CommandProcessor::UpdateBindings_BindfulPath(
bool& retflag) {
retflag = true;
auto& provider = this->GetD3D12Provider();
size_t texture_count_pixel = textures_pixel->size();
size_t texture_count_pixel = textures_pixel ? textures_pixel->size() : 0;
size_t texture_count_vertex = textures_vertex.size();
//
// Bindful descriptors path.

View File

@ -680,9 +680,6 @@ class D3D12CommandProcessor final : public CommandProcessor {
ID3D12Resource* readback_buffer_ = nullptr;
uint32_t readback_buffer_size_ = 0;
std::atomic<bool> pix_capture_requested_ = false;
bool pix_capturing_;
// The current fixed-function drawing state.
D3D12_VIEWPORT ff_viewport_;
D3D12_RECT ff_scissor_;
@ -776,6 +773,9 @@ class D3D12CommandProcessor final : public CommandProcessor {
// scratch memexport data
MemExportRange memexport_ranges_[512];
uint32_t memexport_range_count_ = 0;
std::atomic<bool> pix_capture_requested_ = false;
bool pix_capturing_;
};
} // namespace d3d12

View File

@ -150,8 +150,8 @@ SharedMemory::WatchHandle SharedMemory::WatchMemoryRange(
watch_page_first << page_size_log2_ >> kWatchBucketSizeLog2;
uint32_t bucket_last =
watch_page_last << page_size_log2_ >> kWatchBucketSizeLog2;
auto global_lock = global_critical_region_.Acquire();
//chrispy: Not required the global lock is always held by the caller
// auto global_lock = global_critical_region_.Acquire();
// Allocate the range.
WatchRange* range = watch_range_first_free_;

View File

@ -188,6 +188,12 @@ void RtlInitAnsiString_entry(pointer_t<X_ANSI_STRING> destination,
destination->pointer = source.guest_address();
}
DECLARE_XBOXKRNL_EXPORT1(RtlInitAnsiString, kNone, kImplemented);
//https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/nf-wdm-rtlupcaseunicodechar
dword_result_t RtlUpcaseUnicodeChar_entry(dword_t SourceCharacter) {
return std::use_facet<std::ctype<char16_t>>(std::locale()).toupper(SourceCharacter);
}
DECLARE_XBOXKRNL_EXPORT1(RtlUpcaseUnicodeChar, kNone, kImplemented);
// https://msdn.microsoft.com/en-us/library/ff561899
void RtlFreeAnsiString_entry(pointer_t<X_ANSI_STRING> string) {

View File

@ -957,13 +957,14 @@ static void PrefetchForCAS(const void* value) {
}
}
uint32_t xeKeKfAcquireSpinLock(uint32_t* lock) {
uint32_t xeKeKfAcquireSpinLock(uint32_t* lock, uint64_t r13 = 1) {
// XELOGD(
// "KfAcquireSpinLock({:08X})",
// lock_ptr);
PrefetchForCAS(lock);
assert_true(*lock != static_cast<uint32_t>(r13));
// Lock.
while (!xe::atomic_cas(0, 1, lock)) {
while (!xe::atomic_cas(0, static_cast<uint32_t>(r13), lock)) {
// Spin!
// TODO(benvanik): error on deadlock?
xe::threading::MaybeYield();
@ -976,34 +977,51 @@ uint32_t xeKeKfAcquireSpinLock(uint32_t* lock) {
return old_irql;
}
dword_result_t KfAcquireSpinLock_entry(lpdword_t lock_ptr) {
dword_result_t KfAcquireSpinLock_entry(lpdword_t lock_ptr,
ppc_context_t& ppc_context) {
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
return xeKeKfAcquireSpinLock(lock);
return xeKeKfAcquireSpinLock(lock, ppc_context->r[13]);
}
DECLARE_XBOXKRNL_EXPORT3(KfAcquireSpinLock, kThreading, kImplemented, kBlocking,
kHighFrequency);
void xeKeKfReleaseSpinLock(uint32_t* lock, dword_t old_irql) {
// Unlock.
*lock = 0;
if (old_irql >= 2) {
return;
}
// Restore IRQL.
XThread* thread = XThread::GetCurrentThread();
thread->LowerIrql(old_irql);
// Unlock.
xe::atomic_dec(lock);
}
void KfReleaseSpinLock_entry(lpdword_t lock_ptr, dword_t old_irql) {
void KfReleaseSpinLock_entry(lpdword_t lock_ptr, dword_t old_irql,
ppc_context_t& ppc_ctx) {
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
xeKeKfReleaseSpinLock(lock, old_irql);
assert_true(*lock_ptr == static_cast<uint32_t>(ppc_ctx->r[13]));
*lock_ptr = 0;
if (old_irql >= 2) {
return;
}
// Restore IRQL.
XThread* thread = XThread::GetCurrentThread();
thread->LowerIrql(old_irql);
}
DECLARE_XBOXKRNL_EXPORT2(KfReleaseSpinLock, kThreading, kImplemented,
kHighFrequency);
// todo: this is not accurate
void KeAcquireSpinLockAtRaisedIrql_entry(lpdword_t lock_ptr) {
void KeAcquireSpinLockAtRaisedIrql_entry(lpdword_t lock_ptr,
ppc_context_t& ppc_ctx) {
// Lock.
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
// must not be our own thread
assert_true(*lock_ptr != static_cast<uint32_t>(ppc_ctx->r[13]));
PrefetchForCAS(lock);
while (!xe::atomic_cas(0, 1, lock)) {
while (!xe::atomic_cas(0, static_cast<uint32_t>(ppc_ctx->r[13]), lock)) {
#if XE_ARCH_AMD64 == 1
// todo: this is just a nop if they don't have SMT, which is not great
// either...
@ -1017,11 +1035,13 @@ void KeAcquireSpinLockAtRaisedIrql_entry(lpdword_t lock_ptr) {
DECLARE_XBOXKRNL_EXPORT3(KeAcquireSpinLockAtRaisedIrql, kThreading,
kImplemented, kBlocking, kHighFrequency);
dword_result_t KeTryToAcquireSpinLockAtRaisedIrql_entry(lpdword_t lock_ptr) {
dword_result_t KeTryToAcquireSpinLockAtRaisedIrql_entry(
lpdword_t lock_ptr, ppc_context_t& ppc_ctx) {
// Lock.
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
assert_true(*lock_ptr != static_cast<uint32_t>(ppc_ctx->r[13]));
PrefetchForCAS(lock);
if (!xe::atomic_cas(0, 1, lock)) {
if (!xe::atomic_cas(0, static_cast<uint32_t>(ppc_ctx->r[13]), lock)) {
return 0;
}
return 1;
@ -1029,10 +1049,12 @@ dword_result_t KeTryToAcquireSpinLockAtRaisedIrql_entry(lpdword_t lock_ptr) {
DECLARE_XBOXKRNL_EXPORT4(KeTryToAcquireSpinLockAtRaisedIrql, kThreading,
kImplemented, kBlocking, kHighFrequency, kSketchy);
void KeReleaseSpinLockFromRaisedIrql_entry(lpdword_t lock_ptr) {
void KeReleaseSpinLockFromRaisedIrql_entry(lpdword_t lock_ptr,
ppc_context_t& ppc_ctx) {
// Unlock.
assert_true(*lock_ptr == static_cast<uint32_t>(ppc_ctx->r[13]));
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
xe::atomic_dec(lock);
*lock_ptr = 0;
}
DECLARE_XBOXKRNL_EXPORT2(KeReleaseSpinLockFromRaisedIrql, kThreading,
kImplemented, kHighFrequency);
@ -1261,8 +1283,8 @@ void ExInitializeReadWriteLock_entry(pointer_t<X_ERWLOCK> lock_ptr) {
}
DECLARE_XBOXKRNL_EXPORT1(ExInitializeReadWriteLock, kThreading, kImplemented);
void ExAcquireReadWriteLockExclusive_entry(pointer_t<X_ERWLOCK> lock_ptr) {
auto old_irql = xeKeKfAcquireSpinLock(&lock_ptr->spin_lock);
void ExAcquireReadWriteLockExclusive_entry(pointer_t<X_ERWLOCK> lock_ptr, ppc_context_t& ppc_context) {
auto old_irql = xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
int32_t lock_count = ++lock_ptr->lock_count;
if (!lock_count) {
@ -1279,8 +1301,9 @@ DECLARE_XBOXKRNL_EXPORT2(ExAcquireReadWriteLockExclusive, kThreading,
kImplemented, kBlocking);
dword_result_t ExTryToAcquireReadWriteLockExclusive_entry(
pointer_t<X_ERWLOCK> lock_ptr) {
auto old_irql = xeKeKfAcquireSpinLock(&lock_ptr->spin_lock);
pointer_t<X_ERWLOCK> lock_ptr, ppc_context_t& ppc_context) {
auto old_irql =
xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
uint32_t result;
if (lock_ptr->lock_count < 0) {
@ -1296,8 +1319,9 @@ dword_result_t ExTryToAcquireReadWriteLockExclusive_entry(
DECLARE_XBOXKRNL_EXPORT1(ExTryToAcquireReadWriteLockExclusive, kThreading,
kImplemented);
void ExAcquireReadWriteLockShared_entry(pointer_t<X_ERWLOCK> lock_ptr) {
auto old_irql = xeKeKfAcquireSpinLock(&lock_ptr->spin_lock);
void ExAcquireReadWriteLockShared_entry(pointer_t<X_ERWLOCK> lock_ptr,
ppc_context_t& ppc_context) {
auto old_irql = xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
int32_t lock_count = ++lock_ptr->lock_count;
if (!lock_count ||
@ -1316,8 +1340,9 @@ DECLARE_XBOXKRNL_EXPORT2(ExAcquireReadWriteLockShared, kThreading, kImplemented,
kBlocking);
dword_result_t ExTryToAcquireReadWriteLockShared_entry(
pointer_t<X_ERWLOCK> lock_ptr) {
auto old_irql = xeKeKfAcquireSpinLock(&lock_ptr->spin_lock);
pointer_t<X_ERWLOCK> lock_ptr, ppc_context_t& ppc_context) {
auto old_irql =
xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
uint32_t result;
if (lock_ptr->lock_count < 0 ||
@ -1335,8 +1360,10 @@ dword_result_t ExTryToAcquireReadWriteLockShared_entry(
DECLARE_XBOXKRNL_EXPORT1(ExTryToAcquireReadWriteLockShared, kThreading,
kImplemented);
void ExReleaseReadWriteLock_entry(pointer_t<X_ERWLOCK> lock_ptr) {
auto old_irql = xeKeKfAcquireSpinLock(&lock_ptr->spin_lock);
void ExReleaseReadWriteLock_entry(pointer_t<X_ERWLOCK> lock_ptr,
ppc_context_t& ppc_context) {
auto old_irql =
xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
int32_t lock_count = --lock_ptr->lock_count;

View File

@ -100,7 +100,7 @@ struct X_KTHREAD {
uint8_t unk_58[0x4]; // 0x58
xe::be<uint32_t> stack_base; // 0x5C
xe::be<uint32_t> stack_limit; // 0x60
uint8_t unk_64[0x4]; // 0x64
xe::be<uint32_t> stack_kernel; // 0x64
xe::be<uint32_t> tls_address; // 0x68
uint8_t unk_6C; // 0x6C
uint8_t unk_6D[0x7]; // 0x6D

2
third_party/cxxopts vendored

@ -1 +1 @@
Subproject commit 2e3c6991d33811878ebcc0839d3815850d129b3a
Subproject commit b2b8cf2f50a449720874f43445e23d75b77dcc43