Merge pull request #85 from chrisps/canary_experimental
Kernel improvements, "fix" crash on sandy bridge/ivy bridge
This commit is contained in:
commit
7c375879bc
|
@ -101,3 +101,5 @@ node_modules/.bin/
|
|||
/third_party/binutils/binutils*
|
||||
/third_party/vasm/
|
||||
/tools/shader-playground/*.dll
|
||||
/profile_print_times.py
|
||||
/profile_times.txt
|
||||
|
|
|
@ -36,7 +36,7 @@
|
|||
url = https://github.com/skystrife/cpptoml.git
|
||||
[submodule "third_party/cxxopts"]
|
||||
path = third_party/cxxopts
|
||||
url = https://github.com/jarro2783/cxxopts.git
|
||||
url = https://github.com/chrisps/cxxopts.git
|
||||
[submodule "third_party/SDL2"]
|
||||
path = third_party/SDL2
|
||||
url = https://github.com/libsdl-org/SDL.git
|
||||
|
|
|
@ -177,14 +177,7 @@ void XmaDecoder::WorkerThreadMain() {
|
|||
} else {
|
||||
idle_loop_count = 0;
|
||||
}
|
||||
|
||||
if (idle_loop_count > 500) {
|
||||
// Idle for an extended period. Introduce a 20ms wait.
|
||||
xe::threading::Wait(work_event_.get(), false,
|
||||
std::chrono::milliseconds(20));
|
||||
}
|
||||
|
||||
xe::threading::MaybeYield();
|
||||
xe::threading::Wait(work_event_.get(), false);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -316,7 +309,7 @@ void XmaDecoder::WriteRegister(uint32_t addr, uint32_t value) {
|
|||
}
|
||||
}
|
||||
// Signal the decoder thread to start processing.
|
||||
work_event_->Set();
|
||||
work_event_->SetBoostPriority();
|
||||
} else if (r >= XmaRegister::Context0Lock && r <= XmaRegister::Context9Lock) {
|
||||
// Context lock command.
|
||||
// This requests a lock by flagging the context.
|
||||
|
|
|
@ -405,9 +405,10 @@ void X64Emitter::EmitProfilerEpilogue() {
|
|||
if (cvars::instrument_call_times) {
|
||||
uint64_t* profiler_entry =
|
||||
backend()->GetProfilerRecordForFunction(current_guest_function_);
|
||||
|
||||
mov(ecx, 0x7ffe0014);
|
||||
mov(rdx, qword[rcx]);
|
||||
mov(rbx, (uintptr_t)profiler_entry);
|
||||
mov(r10, (uintptr_t)profiler_entry);
|
||||
sub(rdx, qword[rsp + StackLayout::GUEST_PROFILER_START]);
|
||||
|
||||
// atomic add our time to the profiler entry
|
||||
|
@ -416,7 +417,8 @@ void X64Emitter::EmitProfilerEpilogue() {
|
|||
// this a few cycles less intrusive, but its good enough for now
|
||||
// actually... lets just try without atomics lol
|
||||
// lock();
|
||||
add(qword[rbx], rdx);
|
||||
add(qword[r10], rdx);
|
||||
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -1872,20 +1872,20 @@ Value* HIRBuilder::AndNot(Value* value1, Value* value2) {
|
|||
ASSERT_NON_FLOAT_TYPE(value1);
|
||||
ASSERT_NON_FLOAT_TYPE(value2);
|
||||
ASSERT_TYPES_EQUAL(value1, value2);
|
||||
|
||||
if (value1 == value2) {
|
||||
// only other type it can be used with is INT64_TYPE (andc)
|
||||
if (value1->type != VEC128_TYPE) {
|
||||
return this->And(this->Not(value2), value1);
|
||||
} else if (value1 == value2) {
|
||||
return LoadZero(value1->type);
|
||||
} else if (value1->IsConstantZero()) {
|
||||
return value1;
|
||||
} else if (value2->IsConstantZero()) {
|
||||
} else if (value1->IsConstantZero() || value2->IsConstantZero()) {
|
||||
return value1;
|
||||
} else {
|
||||
Instr* i = AppendInstr(OPCODE_AND_NOT_info, 0, AllocValue(value1->type));
|
||||
i->set_src1(value1);
|
||||
i->set_src2(value2);
|
||||
i->src3.value = NULL;
|
||||
return i->dest;
|
||||
}
|
||||
|
||||
Instr* i = AppendInstr(OPCODE_AND_NOT_info, 0, AllocValue(value1->type));
|
||||
i->set_src1(value1);
|
||||
i->set_src2(value2);
|
||||
i->src3.value = NULL;
|
||||
return i->dest;
|
||||
}
|
||||
|
||||
Value* HIRBuilder::Or(Value* value1, Value* value2) {
|
||||
|
|
|
@ -4355,7 +4355,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
uint32_t float_constant_index;
|
||||
while (xe::bit_scan_forward(float_constant_map_entry,
|
||||
&float_constant_index)) {
|
||||
float_constant_map_entry &= ~(1ull << float_constant_index);
|
||||
float_constant_map_entry = xe::clear_lowest_bit(float_constant_map_entry);
|
||||
std::memcpy(float_constants,
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
|
||||
(float_constant_index << 2)]
|
||||
|
@ -4386,7 +4386,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
uint32_t float_constant_index;
|
||||
while (xe::bit_scan_forward(float_constant_map_entry,
|
||||
&float_constant_index)) {
|
||||
float_constant_map_entry &= ~(1ull << float_constant_index);
|
||||
float_constant_map_entry = xe::clear_lowest_bit(float_constant_map_entry);
|
||||
std::memcpy(float_constants,
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
|
||||
(float_constant_index << 2)]
|
||||
|
@ -4877,7 +4877,7 @@ bool D3D12CommandProcessor::UpdateBindings_BindfulPath(
|
|||
bool& retflag) {
|
||||
retflag = true;
|
||||
auto& provider = this->GetD3D12Provider();
|
||||
size_t texture_count_pixel = textures_pixel->size();
|
||||
size_t texture_count_pixel = textures_pixel ? textures_pixel->size() : 0;
|
||||
size_t texture_count_vertex = textures_vertex.size();
|
||||
//
|
||||
// Bindful descriptors path.
|
||||
|
|
|
@ -680,9 +680,6 @@ class D3D12CommandProcessor final : public CommandProcessor {
|
|||
ID3D12Resource* readback_buffer_ = nullptr;
|
||||
uint32_t readback_buffer_size_ = 0;
|
||||
|
||||
std::atomic<bool> pix_capture_requested_ = false;
|
||||
bool pix_capturing_;
|
||||
|
||||
// The current fixed-function drawing state.
|
||||
D3D12_VIEWPORT ff_viewport_;
|
||||
D3D12_RECT ff_scissor_;
|
||||
|
@ -776,6 +773,9 @@ class D3D12CommandProcessor final : public CommandProcessor {
|
|||
// scratch memexport data
|
||||
MemExportRange memexport_ranges_[512];
|
||||
uint32_t memexport_range_count_ = 0;
|
||||
|
||||
std::atomic<bool> pix_capture_requested_ = false;
|
||||
bool pix_capturing_;
|
||||
};
|
||||
|
||||
} // namespace d3d12
|
||||
|
|
|
@ -150,8 +150,8 @@ SharedMemory::WatchHandle SharedMemory::WatchMemoryRange(
|
|||
watch_page_first << page_size_log2_ >> kWatchBucketSizeLog2;
|
||||
uint32_t bucket_last =
|
||||
watch_page_last << page_size_log2_ >> kWatchBucketSizeLog2;
|
||||
|
||||
auto global_lock = global_critical_region_.Acquire();
|
||||
//chrispy: Not required the global lock is always held by the caller
|
||||
// auto global_lock = global_critical_region_.Acquire();
|
||||
|
||||
// Allocate the range.
|
||||
WatchRange* range = watch_range_first_free_;
|
||||
|
|
|
@ -188,6 +188,12 @@ void RtlInitAnsiString_entry(pointer_t<X_ANSI_STRING> destination,
|
|||
destination->pointer = source.guest_address();
|
||||
}
|
||||
DECLARE_XBOXKRNL_EXPORT1(RtlInitAnsiString, kNone, kImplemented);
|
||||
//https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/nf-wdm-rtlupcaseunicodechar
|
||||
dword_result_t RtlUpcaseUnicodeChar_entry(dword_t SourceCharacter) {
|
||||
return std::use_facet<std::ctype<char16_t>>(std::locale()).toupper(SourceCharacter);
|
||||
}
|
||||
DECLARE_XBOXKRNL_EXPORT1(RtlUpcaseUnicodeChar, kNone, kImplemented);
|
||||
|
||||
|
||||
// https://msdn.microsoft.com/en-us/library/ff561899
|
||||
void RtlFreeAnsiString_entry(pointer_t<X_ANSI_STRING> string) {
|
||||
|
|
|
@ -957,13 +957,14 @@ static void PrefetchForCAS(const void* value) {
|
|||
}
|
||||
}
|
||||
|
||||
uint32_t xeKeKfAcquireSpinLock(uint32_t* lock) {
|
||||
uint32_t xeKeKfAcquireSpinLock(uint32_t* lock, uint64_t r13 = 1) {
|
||||
// XELOGD(
|
||||
// "KfAcquireSpinLock({:08X})",
|
||||
// lock_ptr);
|
||||
PrefetchForCAS(lock);
|
||||
assert_true(*lock != static_cast<uint32_t>(r13));
|
||||
// Lock.
|
||||
while (!xe::atomic_cas(0, 1, lock)) {
|
||||
while (!xe::atomic_cas(0, static_cast<uint32_t>(r13), lock)) {
|
||||
// Spin!
|
||||
// TODO(benvanik): error on deadlock?
|
||||
xe::threading::MaybeYield();
|
||||
|
@ -976,34 +977,51 @@ uint32_t xeKeKfAcquireSpinLock(uint32_t* lock) {
|
|||
return old_irql;
|
||||
}
|
||||
|
||||
dword_result_t KfAcquireSpinLock_entry(lpdword_t lock_ptr) {
|
||||
dword_result_t KfAcquireSpinLock_entry(lpdword_t lock_ptr,
|
||||
ppc_context_t& ppc_context) {
|
||||
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
|
||||
return xeKeKfAcquireSpinLock(lock);
|
||||
return xeKeKfAcquireSpinLock(lock, ppc_context->r[13]);
|
||||
}
|
||||
DECLARE_XBOXKRNL_EXPORT3(KfAcquireSpinLock, kThreading, kImplemented, kBlocking,
|
||||
kHighFrequency);
|
||||
|
||||
void xeKeKfReleaseSpinLock(uint32_t* lock, dword_t old_irql) {
|
||||
// Unlock.
|
||||
*lock = 0;
|
||||
if (old_irql >= 2) {
|
||||
return;
|
||||
}
|
||||
// Restore IRQL.
|
||||
XThread* thread = XThread::GetCurrentThread();
|
||||
thread->LowerIrql(old_irql);
|
||||
|
||||
// Unlock.
|
||||
xe::atomic_dec(lock);
|
||||
}
|
||||
|
||||
void KfReleaseSpinLock_entry(lpdword_t lock_ptr, dword_t old_irql) {
|
||||
void KfReleaseSpinLock_entry(lpdword_t lock_ptr, dword_t old_irql,
|
||||
ppc_context_t& ppc_ctx) {
|
||||
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
|
||||
xeKeKfReleaseSpinLock(lock, old_irql);
|
||||
|
||||
assert_true(*lock_ptr == static_cast<uint32_t>(ppc_ctx->r[13]));
|
||||
|
||||
*lock_ptr = 0;
|
||||
if (old_irql >= 2) {
|
||||
return;
|
||||
}
|
||||
// Restore IRQL.
|
||||
XThread* thread = XThread::GetCurrentThread();
|
||||
thread->LowerIrql(old_irql);
|
||||
}
|
||||
DECLARE_XBOXKRNL_EXPORT2(KfReleaseSpinLock, kThreading, kImplemented,
|
||||
kHighFrequency);
|
||||
// todo: this is not accurate
|
||||
void KeAcquireSpinLockAtRaisedIrql_entry(lpdword_t lock_ptr) {
|
||||
void KeAcquireSpinLockAtRaisedIrql_entry(lpdword_t lock_ptr,
|
||||
ppc_context_t& ppc_ctx) {
|
||||
// Lock.
|
||||
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
|
||||
// must not be our own thread
|
||||
assert_true(*lock_ptr != static_cast<uint32_t>(ppc_ctx->r[13]));
|
||||
|
||||
PrefetchForCAS(lock);
|
||||
while (!xe::atomic_cas(0, 1, lock)) {
|
||||
while (!xe::atomic_cas(0, static_cast<uint32_t>(ppc_ctx->r[13]), lock)) {
|
||||
#if XE_ARCH_AMD64 == 1
|
||||
// todo: this is just a nop if they don't have SMT, which is not great
|
||||
// either...
|
||||
|
@ -1017,11 +1035,13 @@ void KeAcquireSpinLockAtRaisedIrql_entry(lpdword_t lock_ptr) {
|
|||
DECLARE_XBOXKRNL_EXPORT3(KeAcquireSpinLockAtRaisedIrql, kThreading,
|
||||
kImplemented, kBlocking, kHighFrequency);
|
||||
|
||||
dword_result_t KeTryToAcquireSpinLockAtRaisedIrql_entry(lpdword_t lock_ptr) {
|
||||
dword_result_t KeTryToAcquireSpinLockAtRaisedIrql_entry(
|
||||
lpdword_t lock_ptr, ppc_context_t& ppc_ctx) {
|
||||
// Lock.
|
||||
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
|
||||
assert_true(*lock_ptr != static_cast<uint32_t>(ppc_ctx->r[13]));
|
||||
PrefetchForCAS(lock);
|
||||
if (!xe::atomic_cas(0, 1, lock)) {
|
||||
if (!xe::atomic_cas(0, static_cast<uint32_t>(ppc_ctx->r[13]), lock)) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
|
@ -1029,10 +1049,12 @@ dword_result_t KeTryToAcquireSpinLockAtRaisedIrql_entry(lpdword_t lock_ptr) {
|
|||
DECLARE_XBOXKRNL_EXPORT4(KeTryToAcquireSpinLockAtRaisedIrql, kThreading,
|
||||
kImplemented, kBlocking, kHighFrequency, kSketchy);
|
||||
|
||||
void KeReleaseSpinLockFromRaisedIrql_entry(lpdword_t lock_ptr) {
|
||||
void KeReleaseSpinLockFromRaisedIrql_entry(lpdword_t lock_ptr,
|
||||
ppc_context_t& ppc_ctx) {
|
||||
// Unlock.
|
||||
assert_true(*lock_ptr == static_cast<uint32_t>(ppc_ctx->r[13]));
|
||||
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
|
||||
xe::atomic_dec(lock);
|
||||
*lock_ptr = 0;
|
||||
}
|
||||
DECLARE_XBOXKRNL_EXPORT2(KeReleaseSpinLockFromRaisedIrql, kThreading,
|
||||
kImplemented, kHighFrequency);
|
||||
|
@ -1261,8 +1283,8 @@ void ExInitializeReadWriteLock_entry(pointer_t<X_ERWLOCK> lock_ptr) {
|
|||
}
|
||||
DECLARE_XBOXKRNL_EXPORT1(ExInitializeReadWriteLock, kThreading, kImplemented);
|
||||
|
||||
void ExAcquireReadWriteLockExclusive_entry(pointer_t<X_ERWLOCK> lock_ptr) {
|
||||
auto old_irql = xeKeKfAcquireSpinLock(&lock_ptr->spin_lock);
|
||||
void ExAcquireReadWriteLockExclusive_entry(pointer_t<X_ERWLOCK> lock_ptr, ppc_context_t& ppc_context) {
|
||||
auto old_irql = xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
|
||||
|
||||
int32_t lock_count = ++lock_ptr->lock_count;
|
||||
if (!lock_count) {
|
||||
|
@ -1279,8 +1301,9 @@ DECLARE_XBOXKRNL_EXPORT2(ExAcquireReadWriteLockExclusive, kThreading,
|
|||
kImplemented, kBlocking);
|
||||
|
||||
dword_result_t ExTryToAcquireReadWriteLockExclusive_entry(
|
||||
pointer_t<X_ERWLOCK> lock_ptr) {
|
||||
auto old_irql = xeKeKfAcquireSpinLock(&lock_ptr->spin_lock);
|
||||
pointer_t<X_ERWLOCK> lock_ptr, ppc_context_t& ppc_context) {
|
||||
auto old_irql =
|
||||
xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
|
||||
|
||||
uint32_t result;
|
||||
if (lock_ptr->lock_count < 0) {
|
||||
|
@ -1296,8 +1319,9 @@ dword_result_t ExTryToAcquireReadWriteLockExclusive_entry(
|
|||
DECLARE_XBOXKRNL_EXPORT1(ExTryToAcquireReadWriteLockExclusive, kThreading,
|
||||
kImplemented);
|
||||
|
||||
void ExAcquireReadWriteLockShared_entry(pointer_t<X_ERWLOCK> lock_ptr) {
|
||||
auto old_irql = xeKeKfAcquireSpinLock(&lock_ptr->spin_lock);
|
||||
void ExAcquireReadWriteLockShared_entry(pointer_t<X_ERWLOCK> lock_ptr,
|
||||
ppc_context_t& ppc_context) {
|
||||
auto old_irql = xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
|
||||
|
||||
int32_t lock_count = ++lock_ptr->lock_count;
|
||||
if (!lock_count ||
|
||||
|
@ -1316,8 +1340,9 @@ DECLARE_XBOXKRNL_EXPORT2(ExAcquireReadWriteLockShared, kThreading, kImplemented,
|
|||
kBlocking);
|
||||
|
||||
dword_result_t ExTryToAcquireReadWriteLockShared_entry(
|
||||
pointer_t<X_ERWLOCK> lock_ptr) {
|
||||
auto old_irql = xeKeKfAcquireSpinLock(&lock_ptr->spin_lock);
|
||||
pointer_t<X_ERWLOCK> lock_ptr, ppc_context_t& ppc_context) {
|
||||
auto old_irql =
|
||||
xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
|
||||
|
||||
uint32_t result;
|
||||
if (lock_ptr->lock_count < 0 ||
|
||||
|
@ -1335,8 +1360,10 @@ dword_result_t ExTryToAcquireReadWriteLockShared_entry(
|
|||
DECLARE_XBOXKRNL_EXPORT1(ExTryToAcquireReadWriteLockShared, kThreading,
|
||||
kImplemented);
|
||||
|
||||
void ExReleaseReadWriteLock_entry(pointer_t<X_ERWLOCK> lock_ptr) {
|
||||
auto old_irql = xeKeKfAcquireSpinLock(&lock_ptr->spin_lock);
|
||||
void ExReleaseReadWriteLock_entry(pointer_t<X_ERWLOCK> lock_ptr,
|
||||
ppc_context_t& ppc_context) {
|
||||
auto old_irql =
|
||||
xeKeKfAcquireSpinLock(&lock_ptr->spin_lock, ppc_context->r[13]);
|
||||
|
||||
int32_t lock_count = --lock_ptr->lock_count;
|
||||
|
||||
|
|
|
@ -100,7 +100,7 @@ struct X_KTHREAD {
|
|||
uint8_t unk_58[0x4]; // 0x58
|
||||
xe::be<uint32_t> stack_base; // 0x5C
|
||||
xe::be<uint32_t> stack_limit; // 0x60
|
||||
uint8_t unk_64[0x4]; // 0x64
|
||||
xe::be<uint32_t> stack_kernel; // 0x64
|
||||
xe::be<uint32_t> tls_address; // 0x68
|
||||
uint8_t unk_6C; // 0x6C
|
||||
uint8_t unk_6D[0x7]; // 0x6D
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit 2e3c6991d33811878ebcc0839d3815850d129b3a
|
||||
Subproject commit b2b8cf2f50a449720874f43445e23d75b77dcc43
|
Loading…
Reference in New Issue