From 6a08208dc8832c0fdcdd22afe9673c82855e2ae1 Mon Sep 17 00:00:00 2001 From: disjtqz Date: Sat, 14 Oct 2023 09:08:02 -0400 Subject: [PATCH] Proper misalignment for AllocatePool, add guest object table --- src/xenia/base/atomic.h | 11 + src/xenia/base/math.h | 43 ++- src/xenia/cpu/ppc/ppc_context.h | 14 +- src/xenia/cpu/processor.cc | 56 ++++ src/xenia/cpu/processor.h | 13 + src/xenia/kernel/util/guest_object_table.cc | 262 ++++++++++++++++++ src/xenia/kernel/util/guest_object_table.h | 52 ++++ src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc | 82 ++++-- src/xenia/kernel/xboxkrnl/xboxkrnl_memory.h | 5 + .../kernel/xboxkrnl/xboxkrnl_threading.cc | 13 +- src/xenia/memory.h | 7 + 11 files changed, 513 insertions(+), 45 deletions(-) create mode 100644 src/xenia/kernel/util/guest_object_table.cc create mode 100644 src/xenia/kernel/util/guest_object_table.h diff --git a/src/xenia/base/atomic.h b/src/xenia/base/atomic.h index e34a6f1e6..1f37d085d 100644 --- a/src/xenia/base/atomic.h +++ b/src/xenia/base/atomic.h @@ -24,6 +24,17 @@ inline int32_t atomic_inc(volatile int32_t* value) { inline int32_t atomic_dec(volatile int32_t* value) { return _InterlockedDecrement(reinterpret_cast(value)); } +inline int32_t atomic_or(volatile int32_t* value, int32_t nv) { + return _InterlockedOr(reinterpret_cast(value), nv); +} + +inline int32_t atomic_and(volatile int32_t* value, int32_t nv) { + return _InterlockedAnd(reinterpret_cast(value), nv); +} + +inline int32_t atomic_xor(volatile int32_t* value, int32_t nv) { + return _InterlockedXor(reinterpret_cast(value), nv); +} inline int32_t atomic_exchange(int32_t new_value, volatile int32_t* value) { return _InterlockedExchange(reinterpret_cast(value), diff --git a/src/xenia/base/math.h b/src/xenia/base/math.h index 4956f4415..0b2e4b536 100644 --- a/src/xenia/base/math.h +++ b/src/xenia/base/math.h @@ -45,17 +45,19 @@ constexpr bool is_pow2(T value) { return (value & (value - 1)) == 0; } /* - Use this in place of the shift + and not sequence that is being used currently in bit iteration code. This is more efficient - because it does not introduce a dependency on to the previous bit scanning operation. The shift and not sequence does get translated to a single instruction (the bit test and reset instruction), - but this code can be executed alongside the scan + Use this in place of the shift + and not sequence that is being used + currently in bit iteration code. This is more efficient because it does not + introduce a dependency on to the previous bit scanning operation. The shift + and not sequence does get translated to a single instruction (the bit test + and reset instruction), but this code can be executed alongside the scan */ -template +template constexpr T clear_lowest_bit(T value) { static_assert(std::is_integral_v); return (value - static_cast(1)) & value; } - // Rounds up the given value to the given alignment. +// Rounds up the given value to the given alignment. template constexpr T align(T value, T alignment) { return (value + alignment - 1) & ~(alignment - 1); @@ -319,7 +321,14 @@ inline T log2_ceil(T v) { template inline T rotate_left(T v, uint8_t sh) { - return (T(v) << sh) | (T(v) >> ((sizeof(T) * 8) - sh)); + return (T(v) << sh) | (T(v) >> ((sizeof(T) * CHAR_BIT) - sh)); +} +template +inline T rotate_right(T v, uint8_t sh) { + constexpr unsigned char SHIFT_MASK = (CHAR_BIT * sizeof(T)) - 1; + uint8_t rshr = sh & SHIFT_MASK; + uint8_t lshl = static_cast(-static_cast(sh)) & SHIFT_MASK; + return (n >> rshr) | (n << lshl); } #if XE_PLATFORM_WIN32 template <> @@ -338,6 +347,22 @@ template <> inline uint64_t rotate_left(uint64_t v, uint8_t sh) { return _rotl64(v, sh); } +template <> +inline uint8_t rotate_right(uint8_t v, uint8_t sh) { + return _rotr8(v, sh); +} +template <> +inline uint16_t rotate_right(uint16_t v, uint8_t sh) { + return _rotr16(v, sh); +} +template <> +inline uint32_t rotate_right(uint32_t v, uint8_t sh) { + return _rotr(v, sh); +} +template <> +inline uint64_t rotate_right(uint64_t v, uint8_t sh) { + return _rotr64(v, sh); +} #endif // XE_PLATFORM_WIN32 template @@ -410,7 +435,6 @@ static float ArchReciprocal(float den) { return _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ss(den))); } - using ArchFloatMask = __m128; XE_FORCEINLINE @@ -437,7 +461,7 @@ static uint32_t ArchFloatMaskSignbit(ArchFloatMask x) { } constexpr ArchFloatMask floatmask_zero{.0f}; - + #else static float ArchMin(float x, float y) { return std::min(x, y); } static float ArchMax(float x, float y) { return std::max(x, y); } @@ -464,7 +488,6 @@ static ArchFloatMask ArchANDFloatMask(ArchFloatMask x, ArchFloatMask y) { } constexpr ArchFloatMask floatmask_zero = 0; - XE_FORCEINLINE static uint32_t ArchFloatMaskSignbit(ArchFloatMask x) { return x >> 31; } @@ -634,7 +657,7 @@ static constexpr uint32_t PregenerateUint32Div(uint32_t _denom, uint32_t& out_ex } static constexpr uint32_t ApplyUint32Div(uint32_t num, uint32_t mul, - uint32_t extradata) { + uint32_t extradata) { IDivExtraInfo extra{}; extra.value_ = extradata; diff --git a/src/xenia/cpu/ppc/ppc_context.h b/src/xenia/cpu/ppc/ppc_context.h index 37c14d10e..55a44fc22 100644 --- a/src/xenia/cpu/ppc/ppc_context.h +++ b/src/xenia/cpu/ppc/ppc_context.h @@ -432,9 +432,11 @@ typedef struct alignas(64) PPCContext_s { template inline T TranslateVirtual(uint32_t guest_address) XE_RESTRICT const { + static_assert(std::is_pointer_v); #if XE_PLATFORM_WIN32 == 1 uint8_t* host_address = virtual_membase + guest_address; - if (guest_address >= static_cast(reinterpret_cast(this))) { + if (guest_address >= + static_cast(reinterpret_cast(this))) { host_address += 0x1000; } return reinterpret_cast(host_address); @@ -443,11 +445,17 @@ typedef struct alignas(64) PPCContext_s { #endif } - //for convenience in kernel functions, version that auto narrows to uint32 + template + inline xe::be* TranslateVirtualBE(uint32_t guest_address) + XE_RESTRICT const { + static_assert(!std::is_pointer_v && + sizeof(T) > 1); // maybe assert is_integral? + return TranslateVirtual*>(guest_address); + } + // for convenience in kernel functions, version that auto narrows to uint32 template inline T TranslateVirtualGPR(uint64_t guest_address) XE_RESTRICT const { return TranslateVirtual(static_cast(guest_address)); - } template diff --git a/src/xenia/cpu/processor.cc b/src/xenia/cpu/processor.cc index c3e46af2a..ce69c6b70 100644 --- a/src/xenia/cpu/processor.cc +++ b/src/xenia/cpu/processor.cc @@ -1291,6 +1291,62 @@ uint32_t Processor::CalculateNextGuestInstruction(ThreadDebugInfo* thread_info, return current_pc + 4; } } +uint32_t Processor::GuestAtomicIncrement32(ppc::PPCContext* context, + uint32_t guest_address) { + uint32_t* host_address = context->TranslateVirtual(guest_address); + uint32_t result; + while (true) { + result = *host_address; + // todo: should call a processor->backend function that acquires a + // reservation instead of using host atomics + if (xe::atomic_cas(xe::byte_swap(result), xe::byte_swap(result + 1), + host_address)) { + break; + } + } + return result; +} +uint32_t Processor::GuestAtomicDecrement32(ppc::PPCContext* context, + uint32_t guest_address) { + uint32_t* host_address = context->TranslateVirtual(guest_address); + + uint32_t result; + while (true) { + result = *host_address; + // todo: should call a processor->backend function that acquires a + // reservation instead of using host atomics + if (xe::atomic_cas(xe::byte_swap(result), xe::byte_swap(result - 1), + host_address)) { + break; + } + } + return result; +} + +uint32_t Processor::GuestAtomicOr32(ppc::PPCContext* context, + uint32_t guest_address, uint32_t mask) { + return xe::atomic_or( + context->TranslateVirtual(guest_address), + xe::byte_swap(mask)); +} +uint32_t Processor::GuestAtomicXor32(ppc::PPCContext* context, + uint32_t guest_address, uint32_t mask) { + return xe::atomic_xor( + context->TranslateVirtual(guest_address), + xe::byte_swap(mask)); +} +uint32_t Processor::GuestAtomicAnd32(ppc::PPCContext* context, + uint32_t guest_address, uint32_t mask) { + return xe::atomic_and( + context->TranslateVirtual(guest_address), + xe::byte_swap(mask)); +} + +bool Processor::GuestAtomicCAS32(ppc::PPCContext* context, uint32_t old_value, + uint32_t new_value, uint32_t guest_address) { + return xe::atomic_cas(xe::byte_swap(old_value), xe::byte_swap(new_value), + context->TranslateVirtual(guest_address)); +} } // namespace cpu } // namespace xe diff --git a/src/xenia/cpu/processor.h b/src/xenia/cpu/processor.h index c0985672b..782d7e52b 100644 --- a/src/xenia/cpu/processor.h +++ b/src/xenia/cpu/processor.h @@ -184,6 +184,19 @@ class Processor { // Returns the new PC guest address. uint32_t StepToGuestSafePoint(uint32_t thread_id, bool ignore_host = false); + uint32_t GuestAtomicIncrement32(ppc::PPCContext* context, + uint32_t guest_address); + uint32_t GuestAtomicDecrement32(ppc::PPCContext* context, + uint32_t guest_address); + uint32_t GuestAtomicOr32(ppc::PPCContext* context, uint32_t guest_address, + uint32_t mask); + uint32_t GuestAtomicXor32(ppc::PPCContext* context, uint32_t guest_address, + uint32_t mask); + uint32_t GuestAtomicAnd32(ppc::PPCContext* context, uint32_t guest_address, + uint32_t mask); + bool GuestAtomicCAS32(ppc::PPCContext* context, uint32_t old_value, + uint32_t new_value, uint32_t guest_address); + public: // TODO(benvanik): hide. void OnThreadCreated(uint32_t handle, ThreadState* thread_state, diff --git a/src/xenia/kernel/util/guest_object_table.cc b/src/xenia/kernel/util/guest_object_table.cc new file mode 100644 index 000000000..843643e85 --- /dev/null +++ b/src/xenia/kernel/util/guest_object_table.cc @@ -0,0 +1,262 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2023 Xenia Canary. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/kernel/util/guest_object_table.h" +#include "xenia/base/atomic.h" +#include "xenia/cpu/processor.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_memory.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h" +namespace xe { +namespace kernel { +namespace util { + +static constexpr uint32_t NUM_HANDLES_PER_BUCKET = 64; +static constexpr uint32_t SIZE_PER_HANDLE_BUCKET = + sizeof(guest_handle_t) * NUM_HANDLES_PER_BUCKET; + +// every time we need to reallocate the list of buckets, we allocate an +// additional BUCKET_SLOT_GROWTH slots +static constexpr uint32_t BUCKET_SLOT_GROWTH = 8; + +// if set, the element is a reference to the next free slot, not an object +static constexpr uint32_t ELEMENT_IS_FREE_FLAG = 1; +static constexpr uint32_t HANDLE_MAX = 0xFFFFFF; + +static constexpr uint32_t HandleToBucketOffset(guest_handle_t handle) { + // yes, this does not divide by SIZE_PER_HANDLE_BUCKET, but the mask has the + // low 2 bits clear and we shr by 6 so its really handle >> 8 + return (((handle & 0xFFFFFC) >> 6) & 0x3FFFFFC); +} + +static constexpr uint32_t HandleToBucketElementOffset(guest_handle_t handle) { + return handle & 0xFC; +} +void InitializeNewHandleRange(X_HANDLE_TABLE* table, PPCContext* context, + uint32_t bucket_base_handle, + uint32_t new_bucket) { + uint32_t bucket_slot_addr = + HandleToBucketOffset(bucket_base_handle) + table->table_dynamic_buckets; + + // insert the new bucket into its slot + *context->TranslateVirtualBE(bucket_slot_addr) = new_bucket; + + table->free_offset = bucket_base_handle; + table->highest_allocated_offset = bucket_base_handle + SIZE_PER_HANDLE_BUCKET; + + auto bucket = context->TranslateVirtualBE(new_bucket); + + /* + initialize each bucket slot with a handle to the next free slot + (bucket_handle_index+1) this is so we can read back the slot, update free + ptr to that, and then store an object in NewObjectHandle + + */ + for (uint32_t bucket_handle_index = 0; + bucket_handle_index < NUM_HANDLES_PER_BUCKET; ++bucket_handle_index) { + bucket[bucket_handle_index] = (bucket_base_handle | ELEMENT_IS_FREE_FLAG) + + ((bucket_handle_index + 1) * 4); + } +} + +bool GrowHandleTable(uint32_t table_ptr, PPCContext* context) { + X_HANDLE_TABLE* table = context->TranslateVirtual(table_ptr); + + guest_handle_t new_bucket_handle_base = table->highest_allocated_offset; + if (new_bucket_handle_base >= HANDLE_MAX) { + return false; + } + + uint32_t new_bucket = xboxkrnl::xeAllocatePoolTypeWithTag( + context, SIZE_PER_HANDLE_BUCKET, 'tHbO', table->unk_pool_arg_34); + if (!new_bucket) { + return false; + } + // this is exactly equal to (SIZE_PER_HANDLE_BUCKET* + // countof(table_static_buckets)) - 1 + if ((new_bucket_handle_base & 0x7FF) != 0) { + InitializeNewHandleRange(table, context, new_bucket_handle_base, + new_bucket); + return true; + } + if (new_bucket_handle_base) { + // bucket list realloc logic starts here + uint32_t new_dynamic_buckets = xboxkrnl::xeAllocatePoolTypeWithTag( + context, + sizeof(uint32_t) * ((new_bucket_handle_base / SIZE_PER_HANDLE_BUCKET) + + BUCKET_SLOT_GROWTH), + 'rHbO', table->unk_pool_arg_34); + if (new_dynamic_buckets) { + /* + copy old bucket list contents to new, larger bucket list + */ + memcpy(context->TranslateVirtual(new_dynamic_buckets), + context->TranslateVirtual(table->table_dynamic_buckets), + sizeof(uint32_t) * (new_bucket_handle_base / SIZE_PER_HANDLE_BUCKET)); + + if (context->TranslateVirtualBE(table->table_dynamic_buckets) != + &table->table_static_buckets[0]) { + xboxkrnl::xeFreePool(context, table->table_dynamic_buckets); + } + table->table_dynamic_buckets = new_dynamic_buckets; + InitializeNewHandleRange(table, context, new_bucket_handle_base, + new_bucket); + return true; + } + xboxkrnl::xeFreePool(context, new_bucket); + return false; + } + table->table_dynamic_buckets = + table_ptr + offsetof(X_HANDLE_TABLE, table_static_buckets); + InitializeNewHandleRange(table, context, new_bucket_handle_base, new_bucket); + return true; +} + +uint32_t NewObjectHandle(uint32_t table_guest, uint32_t object_guest, + PPCContext* context) { + X_HANDLE_TABLE* table = + context->TranslateVirtual(table_guest); + + X_OBJECT_HEADER* object = context->TranslateVirtual( + object_guest - sizeof(X_OBJECT_HEADER)); + + guest_handle_t new_handle; + + xboxkrnl::xeKeKfAcquireSpinLock(context, &table->table_lock, false); + { + if (table->unk_36 || + table->free_offset == table->highest_allocated_offset && + !GrowHandleTable(table_guest, context)) { + new_handle = 0; + } else { + guest_handle_t new_handle_offset = table->free_offset; + uint32_t bucket = *context->TranslateVirtualBE( + HandleToBucketOffset(new_handle_offset) + + table->table_dynamic_buckets); + auto object_ptr_dest = context->TranslateVirtualBE( + bucket + HandleToBucketElementOffset(new_handle_offset)); + + // see end of InitializeNewHandleRange, each slot contains the offset of + // the next free slot + uint32_t next_free_slot = *object_ptr_dest; + + table->free_offset = next_free_slot & ~ELEMENT_IS_FREE_FLAG; + table->num_handles++; + + // this object header field is not atomic, because we're already under the + // table lock whenever we make changes to it + ++object->handle_count; + + *object_ptr_dest = object_guest; + new_handle = (static_cast(table->handle_high_byte) << 24) | + new_handle_offset; + } + } + xboxkrnl::xeKeKfReleaseSpinLock(context, &table->table_lock, 0, false); + + return new_handle; +} + +uint32_t DestroyObjectHandle(uint32_t table_guest, uint32_t handle, + PPCContext* context) { + X_HANDLE_TABLE* table = + context->TranslateVirtual(table_guest); + + xboxkrnl::xeKeKfAcquireSpinLock(context, &table->table_lock, false); + unsigned int result = 0; + { + if ((handle >> 24) != table->handle_high_byte) { + xenia_assert(false); + + } else { + uint32_t handle_sans_flags_and_high = handle & 0xFFFFFC; + + if (handle_sans_flags_and_high < table->highest_allocated_offset) { + uint32_t bucket_for_handle = *context->TranslateVirtualBE( + HandleToBucketOffset(handle_sans_flags_and_high) + + table->table_dynamic_buckets); + + uint32_t bucket_element_guest_ptr = + bucket_for_handle + HandleToBucketElementOffset(handle); + if (bucket_element_guest_ptr) { + auto bucket_element_ptr = + context->TranslateVirtualBE(bucket_element_guest_ptr); + + uint32_t bucket_element = *bucket_element_ptr; + if ((bucket_element & ELEMENT_IS_FREE_FLAG) == 0) { + result = bucket_element & ~2; + *bucket_element_ptr = table->free_offset | ELEMENT_IS_FREE_FLAG; + table->free_offset = handle_sans_flags_and_high; + table->num_handles--; + } + } + + } else { + xenia_assert(false); + } + } + } + + xboxkrnl::xeKeKfReleaseSpinLock(context, &table->table_lock, 0, false); + + return result; +} + +uint32_t LookupHandleUnlocked(X_HANDLE_TABLE* table, guest_handle_t handle, + bool reference_object, PPCContext* context) { + uint32_t result_object = 0; + + if ((handle >> 24) != table->handle_high_byte) { + return 0U; + } + if ((handle & 0xFFFFFC) >= table->highest_allocated_offset) { + return 0U; + } + uint32_t bucket_element_guest_ptr = + *context->TranslateVirtualBE(HandleToBucketOffset(handle) + + table->table_dynamic_buckets) + + HandleToBucketElementOffset(handle); + if (bucket_element_guest_ptr != 0) { + uint32_t bucket_element = + *context->TranslateVirtualBE(bucket_element_guest_ptr); + result_object = bucket_element & ~2U; + + if ((bucket_element & ELEMENT_IS_FREE_FLAG) == 0) { + if (reference_object) { + X_OBJECT_HEADER* header = context->TranslateVirtual( + result_object - sizeof(X_OBJECT_HEADER)); + + context->processor->GuestAtomicIncrement32( + context, context->HostToGuestVirtual(&header->pointer_count)); + } + } else { + result_object = 0; + } + } else { + result_object = 0; + } + return result_object; +} + +uint32_t LookupHandle(uint32_t table, uint32_t handle, + uint32_t reference_object, PPCContext* context) { + X_HANDLE_TABLE* table_ptr = context->TranslateVirtual(table); + uint32_t old_irql = + xboxkrnl::xeKeKfAcquireSpinLock(context, &table_ptr->table_lock); + + uint32_t result = + LookupHandleUnlocked(table_ptr, handle, reference_object, context); + + xboxkrnl::xeKeKfReleaseSpinLock(context, &table_ptr->table_lock, old_irql); + + return result; +} + +} // namespace util +} // namespace kernel +} // namespace xe diff --git a/src/xenia/kernel/util/guest_object_table.h b/src/xenia/kernel/util/guest_object_table.h new file mode 100644 index 000000000..c2995ef4c --- /dev/null +++ b/src/xenia/kernel/util/guest_object_table.h @@ -0,0 +1,52 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2023 Xenia Canary. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_KERNEL_UTIL_GUEST_OBJECT_TABLE_H_ +#define XENIA_KERNEL_UTIL_GUEST_OBJECT_TABLE_H_ + +#include "xenia/kernel/kernel_state.h" +#include "xenia/xbox.h" +namespace xe { +namespace kernel { +namespace util { +// use this to make it clearer in the code whether a uint32_t is a handle or not +using guest_handle_t = uint32_t; +// not normally api visible, but used so we can accurately recreate how the 360 +// os allocated handles +struct X_HANDLE_TABLE { + xe::be num_handles; + xe::be free_offset; + xe::be highest_allocated_offset; + xe::be table_dynamic_buckets; + xe::be table_static_buckets[8]; + X_KSPINLOCK table_lock; + //used as unknown arg 3 to pool allocations + uint8_t unk_pool_arg_34; + uint8_t handle_high_byte; + uint8_t unk_36; + uint8_t unk_38; +}; + +static_assert_size(X_HANDLE_TABLE, 0x38); + +bool GrowHandleTable(uint32_t table_ptr, cpu::ppc::PPCContext* context); +uint32_t NewObjectHandle(uint32_t table_guest, uint32_t object_guest, + cpu::ppc::PPCContext* context); +uint32_t DestroyObjectHandle(uint32_t table_guest, guest_handle_t handle, + cpu::ppc::PPCContext* context); +uint32_t LookupHandleUnlocked(X_HANDLE_TABLE* table, guest_handle_t handle, + bool reference_object, + cpu::ppc::PPCContext* context); +uint32_t LookupHandle(uint32_t table, guest_handle_t handle, + uint32_t reference_object, cpu::ppc::PPCContext* context); +} // namespace util +} // namespace kernel +} // namespace xe + +#endif // XENIA_KERNEL_UTIL_GUEST_OBJECT_TABLE_H_ diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc index c74fef18b..8cc80bc8f 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc @@ -14,9 +14,9 @@ #include "xenia/base/math.h" #include "xenia/kernel/kernel_state.h" #include "xenia/kernel/util/shim_utils.h" +#include "xenia/kernel/xboxkrnl/xboxkrnl_memory.h" #include "xenia/kernel/xboxkrnl/xboxkrnl_private.h" #include "xenia/xbox.h" -#include "xenia/kernel/xboxkrnl/xboxkrnl_memory.h" DEFINE_bool( ignore_offset_for_ranged_allocations, false, "Allows to ignore 4k offset for physical allocations with provided range. " @@ -380,10 +380,10 @@ dword_result_t NtAllocateEncryptedMemory_entry(dword_t unk, dword_t region_size, DECLARE_XBOXKRNL_EXPORT1(NtAllocateEncryptedMemory, kMemory, kImplemented); uint32_t xeMmAllocatePhysicalMemoryEx(uint32_t flags, uint32_t region_size, - uint32_t protect_bits, - uint32_t min_addr_range, - uint32_t max_addr_range, - uint32_t alignment) { + uint32_t protect_bits, + uint32_t min_addr_range, + uint32_t max_addr_range, + uint32_t alignment) { // Type will usually be 0 (user request?), where 1 and 2 are sometimes made // by D3D/etc. @@ -463,7 +463,7 @@ dword_result_t MmAllocatePhysicalMemory_entry(dword_t flags, dword_t region_size, dword_t protect_bits) { return xeMmAllocatePhysicalMemoryEx(flags, region_size, protect_bits, 0, - 0xFFFFFFFFu, 0); + 0xFFFFFFFFu, 0); } DECLARE_XBOXKRNL_EXPORT1(MmAllocatePhysicalMemory, kMemory, kImplemented); @@ -642,35 +642,64 @@ dword_result_t MmMapIoSpace_entry(dword_t unk0, lpvoid_t src_address, } DECLARE_XBOXKRNL_EXPORT1(MmMapIoSpace, kMemory, kImplemented); -dword_result_t ExAllocatePoolTypeWithTag_entry(dword_t size, dword_t tag, - dword_t zero) { - uint32_t alignment = 8; - uint32_t adjusted_size = size; - if (adjusted_size < 4 * 1024) { - adjusted_size = xe::round_up(adjusted_size, 4 * 1024); +struct X_POOL_ALLOC_HEADER { + uint8_t unk_0; + uint8_t unk_1; + uint8_t unk_2; // set this to 170 + uint8_t unk_3; + xe::be tag; +}; + +uint32_t xeAllocatePoolTypeWithTag(PPCContext* context, uint32_t size, + uint32_t tag, uint32_t zero) { + if (size <= 0xFD8) { + uint32_t adjusted_size = size + sizeof(X_POOL_ALLOC_HEADER); + + uint32_t addr = + kernel_state()->memory()->SystemHeapAlloc(adjusted_size, 64); + + auto result_ptr = context->TranslateVirtual(addr); + result_ptr->unk_2 = 170; + result_ptr->tag = tag; + + return addr + sizeof(X_POOL_ALLOC_HEADER); } else { - alignment = 4 * 1024; + return kernel_state()->memory()->SystemHeapAlloc(size, 4096); } +} - uint32_t addr = - kernel_state()->memory()->SystemHeapAlloc(adjusted_size, alignment); - - return addr; +dword_result_t ExAllocatePoolTypeWithTag_entry(dword_t size, dword_t tag, + dword_t zero, + const ppc_context_t& context) { + return xeAllocatePoolTypeWithTag(context, size, tag, zero); } DECLARE_XBOXKRNL_EXPORT1(ExAllocatePoolTypeWithTag, kMemory, kImplemented); -dword_result_t ExAllocatePoolWithTag_entry(dword_t numbytes, dword_t tag) { - return ExAllocatePoolTypeWithTag_entry(numbytes, tag, 0); + +dword_result_t ExAllocatePoolWithTag_entry(dword_t numbytes, dword_t tag, + const ppc_context_t& context) { + return xeAllocatePoolTypeWithTag(context, numbytes, tag, 0); } DECLARE_XBOXKRNL_EXPORT1(ExAllocatePoolWithTag, kMemory, kImplemented); -dword_result_t ExAllocatePool_entry(dword_t size) { +dword_result_t ExAllocatePool_entry(dword_t size, + const ppc_context_t& context) { const uint32_t none = 0x656E6F4E; // 'None' - return ExAllocatePoolTypeWithTag_entry(size, none, 0); + return xeAllocatePoolTypeWithTag(context, size, none, 0); } DECLARE_XBOXKRNL_EXPORT1(ExAllocatePool, kMemory, kImplemented); -void ExFreePool_entry(lpvoid_t base_address) { - kernel_state()->memory()->SystemHeapFree(base_address); +void xeFreePool(PPCContext* context, uint32_t base_address) { + auto memory = context->kernel_state->memory(); + //if 4kb aligned, there is no pool header! + if ((base_address & (4096 - 1)) == 0) { + memory->SystemHeapFree(base_address); + } else { + memory->SystemHeapFree(base_address - sizeof(X_POOL_ALLOC_HEADER)); + } +} + +void ExFreePool_entry(lpvoid_t base_address, const ppc_context_t& context) { + xeFreePool(context, base_address.guest_address()); } DECLARE_XBOXKRNL_EXPORT1(ExFreePool, kMemory, kImplemented); @@ -710,9 +739,7 @@ DECLARE_XBOXKRNL_EXPORT1(KeLockL2, kMemory, kStub); void KeUnlockL2_entry() {} DECLARE_XBOXKRNL_EXPORT1(KeUnlockL2, kMemory, kStub); -dword_result_t MmCreateKernelStack_entry(dword_t stack_size, dword_t r4) { - assert_zero(r4); // Unknown argument. - +uint32_t xeMmCreateKernelStack(uint32_t stack_size, uint32_t r4) { auto stack_size_aligned = (stack_size + 0xFFF) & 0xFFFFF000; uint32_t stack_alignment = (stack_size & 0xF000) ? 0x1000 : 0x10000; @@ -725,6 +752,9 @@ dword_result_t MmCreateKernelStack_entry(dword_t stack_size, dword_t r4) { &stack_address); return stack_address + stack_size; } +dword_result_t MmCreateKernelStack_entry(dword_t stack_size, dword_t r4) { + return xeMmCreateKernelStack(stack_size, r4); +} DECLARE_XBOXKRNL_EXPORT1(MmCreateKernelStack, kMemory, kImplemented); dword_result_t MmDeleteKernelStack_entry(lpvoid_t stack_base, diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.h b/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.h index ed7905b59..839aaabab 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.h +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.h @@ -24,7 +24,12 @@ uint32_t xeMmAllocatePhysicalMemoryEx(uint32_t flags, uint32_t region_size, uint32_t min_addr_range, uint32_t max_addr_range, uint32_t alignment); +uint32_t xeAllocatePoolTypeWithTag(PPCContext* context, uint32_t size, + uint32_t tag, uint32_t zero); +void xeFreePool(PPCContext* context, uint32_t base_address); + +uint32_t xeMmCreateKernelStack(uint32_t size, uint32_t r4); } // namespace xboxkrnl } // namespace kernel } // namespace xe diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc index 49512dc23..511f25bd9 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_threading.cc @@ -1132,8 +1132,8 @@ dword_result_t KfAcquireSpinLock_entry(pointer_t lock_ptr, DECLARE_XBOXKRNL_EXPORT3(KfAcquireSpinLock, kThreading, kImplemented, kBlocking, kHighFrequency); -void xeKeKfReleaseSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, uint32_t old_irql, - bool change_irql) { +void xeKeKfReleaseSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, + uint32_t old_irql, bool change_irql) { assert_true(lock->prcb_of_owner == static_cast(ctx->r[13])); // Unlock. lock->prcb_of_owner.value = 0; @@ -1170,8 +1170,9 @@ dword_result_t KeTryToAcquireSpinLockAtRaisedIrql_entry( auto lock = reinterpret_cast(lock_ptr.host_address()); assert_true(lock_ptr->prcb_of_owner != static_cast(ppc_ctx->r[13])); PrefetchForCAS(lock); - if (!xe::atomic_cas(0, xe::byte_swap(static_cast(ppc_ctx->r[13])), - lock)) { + if (!ppc_ctx->processor->GuestAtomicCAS32( + ppc_ctx, 0, static_cast(ppc_ctx->r[13]), + lock_ptr.guest_address())) { return 0; } return 1; @@ -1361,8 +1362,8 @@ X_STATUS xeProcessUserApcs(PPCContext* ctx) { return alert_status; } -static void YankApcList(PPCContext* ctx, X_KTHREAD* current_thread, unsigned apc_mode, - bool rundown) { +static void YankApcList(PPCContext* ctx, X_KTHREAD* current_thread, + unsigned apc_mode, bool rundown) { uint32_t unlocked_irql = xeKeKfAcquireSpinLock(ctx, ¤t_thread->apc_lock); diff --git a/src/xenia/memory.h b/src/xenia/memory.h index b4195fcd4..185b46cd7 100644 --- a/src/xenia/memory.h +++ b/src/xenia/memory.h @@ -373,6 +373,13 @@ class Memory { inline T* TranslateVirtual(TypedGuestPointer guest_address) { return TranslateVirtual(guest_address.m_ptr); } + template + inline xe::be* TranslateVirtualBE(uint32_t guest_address) + XE_RESTRICT const { + static_assert(!std::is_pointer_v && + sizeof(T) > 1); // maybe assert is_integral? + return TranslateVirtual*>(guest_address); + } // Base address of physical memory in the host address space. // This is often something like 0x200000000.