Proper misalignment for AllocatePool, add guest object table

This commit is contained in:
disjtqz 2023-10-14 09:08:02 -04:00 committed by Radosław Gliński
parent ee424ae14a
commit 6a08208dc8
11 changed files with 513 additions and 45 deletions

View File

@ -24,6 +24,17 @@ inline int32_t atomic_inc(volatile int32_t* value) {
inline int32_t atomic_dec(volatile int32_t* value) { inline int32_t atomic_dec(volatile int32_t* value) {
return _InterlockedDecrement(reinterpret_cast<volatile long*>(value)); return _InterlockedDecrement(reinterpret_cast<volatile long*>(value));
} }
inline int32_t atomic_or(volatile int32_t* value, int32_t nv) {
return _InterlockedOr(reinterpret_cast<volatile long*>(value), nv);
}
inline int32_t atomic_and(volatile int32_t* value, int32_t nv) {
return _InterlockedAnd(reinterpret_cast<volatile long*>(value), nv);
}
inline int32_t atomic_xor(volatile int32_t* value, int32_t nv) {
return _InterlockedXor(reinterpret_cast<volatile long*>(value), nv);
}
inline int32_t atomic_exchange(int32_t new_value, volatile int32_t* value) { inline int32_t atomic_exchange(int32_t new_value, volatile int32_t* value) {
return _InterlockedExchange(reinterpret_cast<volatile long*>(value), return _InterlockedExchange(reinterpret_cast<volatile long*>(value),

View File

@ -45,17 +45,19 @@ constexpr bool is_pow2(T value) {
return (value & (value - 1)) == 0; return (value & (value - 1)) == 0;
} }
/* /*
Use this in place of the shift + and not sequence that is being used currently in bit iteration code. This is more efficient Use this in place of the shift + and not sequence that is being used
because it does not introduce a dependency on to the previous bit scanning operation. The shift and not sequence does get translated to a single instruction (the bit test and reset instruction), currently in bit iteration code. This is more efficient because it does not
but this code can be executed alongside the scan introduce a dependency on to the previous bit scanning operation. The shift
and not sequence does get translated to a single instruction (the bit test
and reset instruction), but this code can be executed alongside the scan
*/ */
template<typename T> template <typename T>
constexpr T clear_lowest_bit(T value) { constexpr T clear_lowest_bit(T value) {
static_assert(std::is_integral_v<T>); static_assert(std::is_integral_v<T>);
return (value - static_cast<T>(1)) & value; return (value - static_cast<T>(1)) & value;
} }
// Rounds up the given value to the given alignment. // Rounds up the given value to the given alignment.
template <typename T> template <typename T>
constexpr T align(T value, T alignment) { constexpr T align(T value, T alignment) {
return (value + alignment - 1) & ~(alignment - 1); return (value + alignment - 1) & ~(alignment - 1);
@ -319,7 +321,14 @@ inline T log2_ceil(T v) {
template <typename T> template <typename T>
inline T rotate_left(T v, uint8_t sh) { inline T rotate_left(T v, uint8_t sh) {
return (T(v) << sh) | (T(v) >> ((sizeof(T) * 8) - sh)); return (T(v) << sh) | (T(v) >> ((sizeof(T) * CHAR_BIT) - sh));
}
template <typename T>
inline T rotate_right(T v, uint8_t sh) {
constexpr unsigned char SHIFT_MASK = (CHAR_BIT * sizeof(T)) - 1;
uint8_t rshr = sh & SHIFT_MASK;
uint8_t lshl = static_cast<uint8_t>(-static_cast<int8_t>(sh)) & SHIFT_MASK;
return (n >> rshr) | (n << lshl);
} }
#if XE_PLATFORM_WIN32 #if XE_PLATFORM_WIN32
template <> template <>
@ -338,6 +347,22 @@ template <>
inline uint64_t rotate_left(uint64_t v, uint8_t sh) { inline uint64_t rotate_left(uint64_t v, uint8_t sh) {
return _rotl64(v, sh); return _rotl64(v, sh);
} }
template <>
inline uint8_t rotate_right(uint8_t v, uint8_t sh) {
return _rotr8(v, sh);
}
template <>
inline uint16_t rotate_right(uint16_t v, uint8_t sh) {
return _rotr16(v, sh);
}
template <>
inline uint32_t rotate_right(uint32_t v, uint8_t sh) {
return _rotr(v, sh);
}
template <>
inline uint64_t rotate_right(uint64_t v, uint8_t sh) {
return _rotr64(v, sh);
}
#endif // XE_PLATFORM_WIN32 #endif // XE_PLATFORM_WIN32
template <typename T> template <typename T>
@ -410,7 +435,6 @@ static float ArchReciprocal(float den) {
return _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ss(den))); return _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ss(den)));
} }
using ArchFloatMask = __m128; using ArchFloatMask = __m128;
XE_FORCEINLINE XE_FORCEINLINE
@ -464,7 +488,6 @@ static ArchFloatMask ArchANDFloatMask(ArchFloatMask x, ArchFloatMask y) {
} }
constexpr ArchFloatMask floatmask_zero = 0; constexpr ArchFloatMask floatmask_zero = 0;
XE_FORCEINLINE XE_FORCEINLINE
static uint32_t ArchFloatMaskSignbit(ArchFloatMask x) { return x >> 31; } static uint32_t ArchFloatMaskSignbit(ArchFloatMask x) { return x >> 31; }
@ -634,7 +657,7 @@ static constexpr uint32_t PregenerateUint32Div(uint32_t _denom, uint32_t& out_ex
} }
static constexpr uint32_t ApplyUint32Div(uint32_t num, uint32_t mul, static constexpr uint32_t ApplyUint32Div(uint32_t num, uint32_t mul,
uint32_t extradata) { uint32_t extradata) {
IDivExtraInfo extra{}; IDivExtraInfo extra{};
extra.value_ = extradata; extra.value_ = extradata;

View File

@ -432,9 +432,11 @@ typedef struct alignas(64) PPCContext_s {
template <typename T = uint8_t*> template <typename T = uint8_t*>
inline T TranslateVirtual(uint32_t guest_address) XE_RESTRICT const { inline T TranslateVirtual(uint32_t guest_address) XE_RESTRICT const {
static_assert(std::is_pointer_v<T>);
#if XE_PLATFORM_WIN32 == 1 #if XE_PLATFORM_WIN32 == 1
uint8_t* host_address = virtual_membase + guest_address; uint8_t* host_address = virtual_membase + guest_address;
if (guest_address >= static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this))) { if (guest_address >=
static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this))) {
host_address += 0x1000; host_address += 0x1000;
} }
return reinterpret_cast<T>(host_address); return reinterpret_cast<T>(host_address);
@ -443,11 +445,17 @@ typedef struct alignas(64) PPCContext_s {
#endif #endif
} }
//for convenience in kernel functions, version that auto narrows to uint32 template <typename T>
inline xe::be<T>* TranslateVirtualBE(uint32_t guest_address)
XE_RESTRICT const {
static_assert(!std::is_pointer_v<T> &&
sizeof(T) > 1); // maybe assert is_integral?
return TranslateVirtual<xe::be<T>*>(guest_address);
}
// for convenience in kernel functions, version that auto narrows to uint32
template <typename T = uint8_t*> template <typename T = uint8_t*>
inline T TranslateVirtualGPR(uint64_t guest_address) XE_RESTRICT const { inline T TranslateVirtualGPR(uint64_t guest_address) XE_RESTRICT const {
return TranslateVirtual<T>(static_cast<uint32_t>(guest_address)); return TranslateVirtual<T>(static_cast<uint32_t>(guest_address));
} }
template <typename T> template <typename T>

View File

@ -1291,6 +1291,62 @@ uint32_t Processor::CalculateNextGuestInstruction(ThreadDebugInfo* thread_info,
return current_pc + 4; return current_pc + 4;
} }
} }
uint32_t Processor::GuestAtomicIncrement32(ppc::PPCContext* context,
uint32_t guest_address) {
uint32_t* host_address = context->TranslateVirtual<uint32_t*>(guest_address);
uint32_t result;
while (true) {
result = *host_address;
// todo: should call a processor->backend function that acquires a
// reservation instead of using host atomics
if (xe::atomic_cas(xe::byte_swap(result), xe::byte_swap(result + 1),
host_address)) {
break;
}
}
return result;
}
uint32_t Processor::GuestAtomicDecrement32(ppc::PPCContext* context,
uint32_t guest_address) {
uint32_t* host_address = context->TranslateVirtual<uint32_t*>(guest_address);
uint32_t result;
while (true) {
result = *host_address;
// todo: should call a processor->backend function that acquires a
// reservation instead of using host atomics
if (xe::atomic_cas(xe::byte_swap(result), xe::byte_swap(result - 1),
host_address)) {
break;
}
}
return result;
}
uint32_t Processor::GuestAtomicOr32(ppc::PPCContext* context,
uint32_t guest_address, uint32_t mask) {
return xe::atomic_or(
context->TranslateVirtual<volatile int32_t*>(guest_address),
xe::byte_swap(mask));
}
uint32_t Processor::GuestAtomicXor32(ppc::PPCContext* context,
uint32_t guest_address, uint32_t mask) {
return xe::atomic_xor(
context->TranslateVirtual<volatile int32_t*>(guest_address),
xe::byte_swap(mask));
}
uint32_t Processor::GuestAtomicAnd32(ppc::PPCContext* context,
uint32_t guest_address, uint32_t mask) {
return xe::atomic_and(
context->TranslateVirtual<volatile int32_t*>(guest_address),
xe::byte_swap(mask));
}
bool Processor::GuestAtomicCAS32(ppc::PPCContext* context, uint32_t old_value,
uint32_t new_value, uint32_t guest_address) {
return xe::atomic_cas(xe::byte_swap(old_value), xe::byte_swap(new_value),
context->TranslateVirtual<uint32_t*>(guest_address));
}
} // namespace cpu } // namespace cpu
} // namespace xe } // namespace xe

View File

@ -184,6 +184,19 @@ class Processor {
// Returns the new PC guest address. // Returns the new PC guest address.
uint32_t StepToGuestSafePoint(uint32_t thread_id, bool ignore_host = false); uint32_t StepToGuestSafePoint(uint32_t thread_id, bool ignore_host = false);
uint32_t GuestAtomicIncrement32(ppc::PPCContext* context,
uint32_t guest_address);
uint32_t GuestAtomicDecrement32(ppc::PPCContext* context,
uint32_t guest_address);
uint32_t GuestAtomicOr32(ppc::PPCContext* context, uint32_t guest_address,
uint32_t mask);
uint32_t GuestAtomicXor32(ppc::PPCContext* context, uint32_t guest_address,
uint32_t mask);
uint32_t GuestAtomicAnd32(ppc::PPCContext* context, uint32_t guest_address,
uint32_t mask);
bool GuestAtomicCAS32(ppc::PPCContext* context, uint32_t old_value,
uint32_t new_value, uint32_t guest_address);
public: public:
// TODO(benvanik): hide. // TODO(benvanik): hide.
void OnThreadCreated(uint32_t handle, ThreadState* thread_state, void OnThreadCreated(uint32_t handle, ThreadState* thread_state,

View File

@ -0,0 +1,262 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2023 Xenia Canary. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/kernel/util/guest_object_table.h"
#include "xenia/base/atomic.h"
#include "xenia/cpu/processor.h"
#include "xenia/kernel/xboxkrnl/xboxkrnl_memory.h"
#include "xenia/kernel/xboxkrnl/xboxkrnl_threading.h"
namespace xe {
namespace kernel {
namespace util {
static constexpr uint32_t NUM_HANDLES_PER_BUCKET = 64;
static constexpr uint32_t SIZE_PER_HANDLE_BUCKET =
sizeof(guest_handle_t) * NUM_HANDLES_PER_BUCKET;
// every time we need to reallocate the list of buckets, we allocate an
// additional BUCKET_SLOT_GROWTH slots
static constexpr uint32_t BUCKET_SLOT_GROWTH = 8;
// if set, the element is a reference to the next free slot, not an object
static constexpr uint32_t ELEMENT_IS_FREE_FLAG = 1;
static constexpr uint32_t HANDLE_MAX = 0xFFFFFF;
static constexpr uint32_t HandleToBucketOffset(guest_handle_t handle) {
// yes, this does not divide by SIZE_PER_HANDLE_BUCKET, but the mask has the
// low 2 bits clear and we shr by 6 so its really handle >> 8
return (((handle & 0xFFFFFC) >> 6) & 0x3FFFFFC);
}
static constexpr uint32_t HandleToBucketElementOffset(guest_handle_t handle) {
return handle & 0xFC;
}
void InitializeNewHandleRange(X_HANDLE_TABLE* table, PPCContext* context,
uint32_t bucket_base_handle,
uint32_t new_bucket) {
uint32_t bucket_slot_addr =
HandleToBucketOffset(bucket_base_handle) + table->table_dynamic_buckets;
// insert the new bucket into its slot
*context->TranslateVirtualBE<uint32_t>(bucket_slot_addr) = new_bucket;
table->free_offset = bucket_base_handle;
table->highest_allocated_offset = bucket_base_handle + SIZE_PER_HANDLE_BUCKET;
auto bucket = context->TranslateVirtualBE<guest_handle_t>(new_bucket);
/*
initialize each bucket slot with a handle to the next free slot
(bucket_handle_index+1) this is so we can read back the slot, update free
ptr to that, and then store an object in NewObjectHandle
*/
for (uint32_t bucket_handle_index = 0;
bucket_handle_index < NUM_HANDLES_PER_BUCKET; ++bucket_handle_index) {
bucket[bucket_handle_index] = (bucket_base_handle | ELEMENT_IS_FREE_FLAG) +
((bucket_handle_index + 1) * 4);
}
}
bool GrowHandleTable(uint32_t table_ptr, PPCContext* context) {
X_HANDLE_TABLE* table = context->TranslateVirtual<X_HANDLE_TABLE*>(table_ptr);
guest_handle_t new_bucket_handle_base = table->highest_allocated_offset;
if (new_bucket_handle_base >= HANDLE_MAX) {
return false;
}
uint32_t new_bucket = xboxkrnl::xeAllocatePoolTypeWithTag(
context, SIZE_PER_HANDLE_BUCKET, 'tHbO', table->unk_pool_arg_34);
if (!new_bucket) {
return false;
}
// this is exactly equal to (SIZE_PER_HANDLE_BUCKET*
// countof(table_static_buckets)) - 1
if ((new_bucket_handle_base & 0x7FF) != 0) {
InitializeNewHandleRange(table, context, new_bucket_handle_base,
new_bucket);
return true;
}
if (new_bucket_handle_base) {
// bucket list realloc logic starts here
uint32_t new_dynamic_buckets = xboxkrnl::xeAllocatePoolTypeWithTag(
context,
sizeof(uint32_t) * ((new_bucket_handle_base / SIZE_PER_HANDLE_BUCKET) +
BUCKET_SLOT_GROWTH),
'rHbO', table->unk_pool_arg_34);
if (new_dynamic_buckets) {
/*
copy old bucket list contents to new, larger bucket list
*/
memcpy(context->TranslateVirtual(new_dynamic_buckets),
context->TranslateVirtual(table->table_dynamic_buckets),
sizeof(uint32_t) * (new_bucket_handle_base / SIZE_PER_HANDLE_BUCKET));
if (context->TranslateVirtualBE<uint32_t>(table->table_dynamic_buckets) !=
&table->table_static_buckets[0]) {
xboxkrnl::xeFreePool(context, table->table_dynamic_buckets);
}
table->table_dynamic_buckets = new_dynamic_buckets;
InitializeNewHandleRange(table, context, new_bucket_handle_base,
new_bucket);
return true;
}
xboxkrnl::xeFreePool(context, new_bucket);
return false;
}
table->table_dynamic_buckets =
table_ptr + offsetof(X_HANDLE_TABLE, table_static_buckets);
InitializeNewHandleRange(table, context, new_bucket_handle_base, new_bucket);
return true;
}
uint32_t NewObjectHandle(uint32_t table_guest, uint32_t object_guest,
PPCContext* context) {
X_HANDLE_TABLE* table =
context->TranslateVirtual<X_HANDLE_TABLE*>(table_guest);
X_OBJECT_HEADER* object = context->TranslateVirtual<X_OBJECT_HEADER*>(
object_guest - sizeof(X_OBJECT_HEADER));
guest_handle_t new_handle;
xboxkrnl::xeKeKfAcquireSpinLock(context, &table->table_lock, false);
{
if (table->unk_36 ||
table->free_offset == table->highest_allocated_offset &&
!GrowHandleTable(table_guest, context)) {
new_handle = 0;
} else {
guest_handle_t new_handle_offset = table->free_offset;
uint32_t bucket = *context->TranslateVirtualBE<uint32_t>(
HandleToBucketOffset(new_handle_offset) +
table->table_dynamic_buckets);
auto object_ptr_dest = context->TranslateVirtualBE<uint32_t>(
bucket + HandleToBucketElementOffset(new_handle_offset));
// see end of InitializeNewHandleRange, each slot contains the offset of
// the next free slot
uint32_t next_free_slot = *object_ptr_dest;
table->free_offset = next_free_slot & ~ELEMENT_IS_FREE_FLAG;
table->num_handles++;
// this object header field is not atomic, because we're already under the
// table lock whenever we make changes to it
++object->handle_count;
*object_ptr_dest = object_guest;
new_handle = (static_cast<uint32_t>(table->handle_high_byte) << 24) |
new_handle_offset;
}
}
xboxkrnl::xeKeKfReleaseSpinLock(context, &table->table_lock, 0, false);
return new_handle;
}
uint32_t DestroyObjectHandle(uint32_t table_guest, uint32_t handle,
PPCContext* context) {
X_HANDLE_TABLE* table =
context->TranslateVirtual<X_HANDLE_TABLE*>(table_guest);
xboxkrnl::xeKeKfAcquireSpinLock(context, &table->table_lock, false);
unsigned int result = 0;
{
if ((handle >> 24) != table->handle_high_byte) {
xenia_assert(false);
} else {
uint32_t handle_sans_flags_and_high = handle & 0xFFFFFC;
if (handle_sans_flags_and_high < table->highest_allocated_offset) {
uint32_t bucket_for_handle = *context->TranslateVirtualBE<uint32_t>(
HandleToBucketOffset(handle_sans_flags_and_high) +
table->table_dynamic_buckets);
uint32_t bucket_element_guest_ptr =
bucket_for_handle + HandleToBucketElementOffset(handle);
if (bucket_element_guest_ptr) {
auto bucket_element_ptr =
context->TranslateVirtualBE<uint32_t>(bucket_element_guest_ptr);
uint32_t bucket_element = *bucket_element_ptr;
if ((bucket_element & ELEMENT_IS_FREE_FLAG) == 0) {
result = bucket_element & ~2;
*bucket_element_ptr = table->free_offset | ELEMENT_IS_FREE_FLAG;
table->free_offset = handle_sans_flags_and_high;
table->num_handles--;
}
}
} else {
xenia_assert(false);
}
}
}
xboxkrnl::xeKeKfReleaseSpinLock(context, &table->table_lock, 0, false);
return result;
}
uint32_t LookupHandleUnlocked(X_HANDLE_TABLE* table, guest_handle_t handle,
bool reference_object, PPCContext* context) {
uint32_t result_object = 0;
if ((handle >> 24) != table->handle_high_byte) {
return 0U;
}
if ((handle & 0xFFFFFC) >= table->highest_allocated_offset) {
return 0U;
}
uint32_t bucket_element_guest_ptr =
*context->TranslateVirtualBE<uint32_t>(HandleToBucketOffset(handle) +
table->table_dynamic_buckets) +
HandleToBucketElementOffset(handle);
if (bucket_element_guest_ptr != 0) {
uint32_t bucket_element =
*context->TranslateVirtualBE<uint32_t>(bucket_element_guest_ptr);
result_object = bucket_element & ~2U;
if ((bucket_element & ELEMENT_IS_FREE_FLAG) == 0) {
if (reference_object) {
X_OBJECT_HEADER* header = context->TranslateVirtual<X_OBJECT_HEADER*>(
result_object - sizeof(X_OBJECT_HEADER));
context->processor->GuestAtomicIncrement32(
context, context->HostToGuestVirtual(&header->pointer_count));
}
} else {
result_object = 0;
}
} else {
result_object = 0;
}
return result_object;
}
uint32_t LookupHandle(uint32_t table, uint32_t handle,
uint32_t reference_object, PPCContext* context) {
X_HANDLE_TABLE* table_ptr = context->TranslateVirtual<X_HANDLE_TABLE*>(table);
uint32_t old_irql =
xboxkrnl::xeKeKfAcquireSpinLock(context, &table_ptr->table_lock);
uint32_t result =
LookupHandleUnlocked(table_ptr, handle, reference_object, context);
xboxkrnl::xeKeKfReleaseSpinLock(context, &table_ptr->table_lock, old_irql);
return result;
}
} // namespace util
} // namespace kernel
} // namespace xe

View File

@ -0,0 +1,52 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2023 Xenia Canary. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_KERNEL_UTIL_GUEST_OBJECT_TABLE_H_
#define XENIA_KERNEL_UTIL_GUEST_OBJECT_TABLE_H_
#include "xenia/kernel/kernel_state.h"
#include "xenia/xbox.h"
namespace xe {
namespace kernel {
namespace util {
// use this to make it clearer in the code whether a uint32_t is a handle or not
using guest_handle_t = uint32_t;
// not normally api visible, but used so we can accurately recreate how the 360
// os allocated handles
struct X_HANDLE_TABLE {
xe::be<uint32_t> num_handles;
xe::be<guest_handle_t> free_offset;
xe::be<uint32_t> highest_allocated_offset;
xe::be<uint32_t> table_dynamic_buckets;
xe::be<uint32_t> table_static_buckets[8];
X_KSPINLOCK table_lock;
//used as unknown arg 3 to pool allocations
uint8_t unk_pool_arg_34;
uint8_t handle_high_byte;
uint8_t unk_36;
uint8_t unk_38;
};
static_assert_size(X_HANDLE_TABLE, 0x38);
bool GrowHandleTable(uint32_t table_ptr, cpu::ppc::PPCContext* context);
uint32_t NewObjectHandle(uint32_t table_guest, uint32_t object_guest,
cpu::ppc::PPCContext* context);
uint32_t DestroyObjectHandle(uint32_t table_guest, guest_handle_t handle,
cpu::ppc::PPCContext* context);
uint32_t LookupHandleUnlocked(X_HANDLE_TABLE* table, guest_handle_t handle,
bool reference_object,
cpu::ppc::PPCContext* context);
uint32_t LookupHandle(uint32_t table, guest_handle_t handle,
uint32_t reference_object, cpu::ppc::PPCContext* context);
} // namespace util
} // namespace kernel
} // namespace xe
#endif // XENIA_KERNEL_UTIL_GUEST_OBJECT_TABLE_H_

View File

@ -14,9 +14,9 @@
#include "xenia/base/math.h" #include "xenia/base/math.h"
#include "xenia/kernel/kernel_state.h" #include "xenia/kernel/kernel_state.h"
#include "xenia/kernel/util/shim_utils.h" #include "xenia/kernel/util/shim_utils.h"
#include "xenia/kernel/xboxkrnl/xboxkrnl_memory.h"
#include "xenia/kernel/xboxkrnl/xboxkrnl_private.h" #include "xenia/kernel/xboxkrnl/xboxkrnl_private.h"
#include "xenia/xbox.h" #include "xenia/xbox.h"
#include "xenia/kernel/xboxkrnl/xboxkrnl_memory.h"
DEFINE_bool( DEFINE_bool(
ignore_offset_for_ranged_allocations, false, ignore_offset_for_ranged_allocations, false,
"Allows to ignore 4k offset for physical allocations with provided range. " "Allows to ignore 4k offset for physical allocations with provided range. "
@ -380,10 +380,10 @@ dword_result_t NtAllocateEncryptedMemory_entry(dword_t unk, dword_t region_size,
DECLARE_XBOXKRNL_EXPORT1(NtAllocateEncryptedMemory, kMemory, kImplemented); DECLARE_XBOXKRNL_EXPORT1(NtAllocateEncryptedMemory, kMemory, kImplemented);
uint32_t xeMmAllocatePhysicalMemoryEx(uint32_t flags, uint32_t region_size, uint32_t xeMmAllocatePhysicalMemoryEx(uint32_t flags, uint32_t region_size,
uint32_t protect_bits, uint32_t protect_bits,
uint32_t min_addr_range, uint32_t min_addr_range,
uint32_t max_addr_range, uint32_t max_addr_range,
uint32_t alignment) { uint32_t alignment) {
// Type will usually be 0 (user request?), where 1 and 2 are sometimes made // Type will usually be 0 (user request?), where 1 and 2 are sometimes made
// by D3D/etc. // by D3D/etc.
@ -463,7 +463,7 @@ dword_result_t MmAllocatePhysicalMemory_entry(dword_t flags,
dword_t region_size, dword_t region_size,
dword_t protect_bits) { dword_t protect_bits) {
return xeMmAllocatePhysicalMemoryEx(flags, region_size, protect_bits, 0, return xeMmAllocatePhysicalMemoryEx(flags, region_size, protect_bits, 0,
0xFFFFFFFFu, 0); 0xFFFFFFFFu, 0);
} }
DECLARE_XBOXKRNL_EXPORT1(MmAllocatePhysicalMemory, kMemory, kImplemented); DECLARE_XBOXKRNL_EXPORT1(MmAllocatePhysicalMemory, kMemory, kImplemented);
@ -642,35 +642,64 @@ dword_result_t MmMapIoSpace_entry(dword_t unk0, lpvoid_t src_address,
} }
DECLARE_XBOXKRNL_EXPORT1(MmMapIoSpace, kMemory, kImplemented); DECLARE_XBOXKRNL_EXPORT1(MmMapIoSpace, kMemory, kImplemented);
dword_result_t ExAllocatePoolTypeWithTag_entry(dword_t size, dword_t tag, struct X_POOL_ALLOC_HEADER {
dword_t zero) { uint8_t unk_0;
uint32_t alignment = 8; uint8_t unk_1;
uint32_t adjusted_size = size; uint8_t unk_2; // set this to 170
if (adjusted_size < 4 * 1024) { uint8_t unk_3;
adjusted_size = xe::round_up(adjusted_size, 4 * 1024); xe::be<uint32_t> tag;
};
uint32_t xeAllocatePoolTypeWithTag(PPCContext* context, uint32_t size,
uint32_t tag, uint32_t zero) {
if (size <= 0xFD8) {
uint32_t adjusted_size = size + sizeof(X_POOL_ALLOC_HEADER);
uint32_t addr =
kernel_state()->memory()->SystemHeapAlloc(adjusted_size, 64);
auto result_ptr = context->TranslateVirtual<X_POOL_ALLOC_HEADER*>(addr);
result_ptr->unk_2 = 170;
result_ptr->tag = tag;
return addr + sizeof(X_POOL_ALLOC_HEADER);
} else { } else {
alignment = 4 * 1024; return kernel_state()->memory()->SystemHeapAlloc(size, 4096);
} }
}
uint32_t addr = dword_result_t ExAllocatePoolTypeWithTag_entry(dword_t size, dword_t tag,
kernel_state()->memory()->SystemHeapAlloc(adjusted_size, alignment); dword_t zero,
const ppc_context_t& context) {
return addr; return xeAllocatePoolTypeWithTag(context, size, tag, zero);
} }
DECLARE_XBOXKRNL_EXPORT1(ExAllocatePoolTypeWithTag, kMemory, kImplemented); DECLARE_XBOXKRNL_EXPORT1(ExAllocatePoolTypeWithTag, kMemory, kImplemented);
dword_result_t ExAllocatePoolWithTag_entry(dword_t numbytes, dword_t tag) {
return ExAllocatePoolTypeWithTag_entry(numbytes, tag, 0); dword_result_t ExAllocatePoolWithTag_entry(dword_t numbytes, dword_t tag,
const ppc_context_t& context) {
return xeAllocatePoolTypeWithTag(context, numbytes, tag, 0);
} }
DECLARE_XBOXKRNL_EXPORT1(ExAllocatePoolWithTag, kMemory, kImplemented); DECLARE_XBOXKRNL_EXPORT1(ExAllocatePoolWithTag, kMemory, kImplemented);
dword_result_t ExAllocatePool_entry(dword_t size) { dword_result_t ExAllocatePool_entry(dword_t size,
const ppc_context_t& context) {
const uint32_t none = 0x656E6F4E; // 'None' const uint32_t none = 0x656E6F4E; // 'None'
return ExAllocatePoolTypeWithTag_entry(size, none, 0); return xeAllocatePoolTypeWithTag(context, size, none, 0);
} }
DECLARE_XBOXKRNL_EXPORT1(ExAllocatePool, kMemory, kImplemented); DECLARE_XBOXKRNL_EXPORT1(ExAllocatePool, kMemory, kImplemented);
void ExFreePool_entry(lpvoid_t base_address) { void xeFreePool(PPCContext* context, uint32_t base_address) {
kernel_state()->memory()->SystemHeapFree(base_address); auto memory = context->kernel_state->memory();
//if 4kb aligned, there is no pool header!
if ((base_address & (4096 - 1)) == 0) {
memory->SystemHeapFree(base_address);
} else {
memory->SystemHeapFree(base_address - sizeof(X_POOL_ALLOC_HEADER));
}
}
void ExFreePool_entry(lpvoid_t base_address, const ppc_context_t& context) {
xeFreePool(context, base_address.guest_address());
} }
DECLARE_XBOXKRNL_EXPORT1(ExFreePool, kMemory, kImplemented); DECLARE_XBOXKRNL_EXPORT1(ExFreePool, kMemory, kImplemented);
@ -710,9 +739,7 @@ DECLARE_XBOXKRNL_EXPORT1(KeLockL2, kMemory, kStub);
void KeUnlockL2_entry() {} void KeUnlockL2_entry() {}
DECLARE_XBOXKRNL_EXPORT1(KeUnlockL2, kMemory, kStub); DECLARE_XBOXKRNL_EXPORT1(KeUnlockL2, kMemory, kStub);
dword_result_t MmCreateKernelStack_entry(dword_t stack_size, dword_t r4) { uint32_t xeMmCreateKernelStack(uint32_t stack_size, uint32_t r4) {
assert_zero(r4); // Unknown argument.
auto stack_size_aligned = (stack_size + 0xFFF) & 0xFFFFF000; auto stack_size_aligned = (stack_size + 0xFFF) & 0xFFFFF000;
uint32_t stack_alignment = (stack_size & 0xF000) ? 0x1000 : 0x10000; uint32_t stack_alignment = (stack_size & 0xF000) ? 0x1000 : 0x10000;
@ -725,6 +752,9 @@ dword_result_t MmCreateKernelStack_entry(dword_t stack_size, dword_t r4) {
&stack_address); &stack_address);
return stack_address + stack_size; return stack_address + stack_size;
} }
dword_result_t MmCreateKernelStack_entry(dword_t stack_size, dword_t r4) {
return xeMmCreateKernelStack(stack_size, r4);
}
DECLARE_XBOXKRNL_EXPORT1(MmCreateKernelStack, kMemory, kImplemented); DECLARE_XBOXKRNL_EXPORT1(MmCreateKernelStack, kMemory, kImplemented);
dword_result_t MmDeleteKernelStack_entry(lpvoid_t stack_base, dword_result_t MmDeleteKernelStack_entry(lpvoid_t stack_base,

View File

@ -24,7 +24,12 @@ uint32_t xeMmAllocatePhysicalMemoryEx(uint32_t flags, uint32_t region_size,
uint32_t min_addr_range, uint32_t min_addr_range,
uint32_t max_addr_range, uint32_t max_addr_range,
uint32_t alignment); uint32_t alignment);
uint32_t xeAllocatePoolTypeWithTag(PPCContext* context, uint32_t size,
uint32_t tag, uint32_t zero);
void xeFreePool(PPCContext* context, uint32_t base_address);
uint32_t xeMmCreateKernelStack(uint32_t size, uint32_t r4);
} // namespace xboxkrnl } // namespace xboxkrnl
} // namespace kernel } // namespace kernel
} // namespace xe } // namespace xe

View File

@ -1132,8 +1132,8 @@ dword_result_t KfAcquireSpinLock_entry(pointer_t<X_KSPINLOCK> lock_ptr,
DECLARE_XBOXKRNL_EXPORT3(KfAcquireSpinLock, kThreading, kImplemented, kBlocking, DECLARE_XBOXKRNL_EXPORT3(KfAcquireSpinLock, kThreading, kImplemented, kBlocking,
kHighFrequency); kHighFrequency);
void xeKeKfReleaseSpinLock(PPCContext* ctx, X_KSPINLOCK* lock, uint32_t old_irql, void xeKeKfReleaseSpinLock(PPCContext* ctx, X_KSPINLOCK* lock,
bool change_irql) { uint32_t old_irql, bool change_irql) {
assert_true(lock->prcb_of_owner == static_cast<uint32_t>(ctx->r[13])); assert_true(lock->prcb_of_owner == static_cast<uint32_t>(ctx->r[13]));
// Unlock. // Unlock.
lock->prcb_of_owner.value = 0; lock->prcb_of_owner.value = 0;
@ -1170,8 +1170,9 @@ dword_result_t KeTryToAcquireSpinLockAtRaisedIrql_entry(
auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address()); auto lock = reinterpret_cast<uint32_t*>(lock_ptr.host_address());
assert_true(lock_ptr->prcb_of_owner != static_cast<uint32_t>(ppc_ctx->r[13])); assert_true(lock_ptr->prcb_of_owner != static_cast<uint32_t>(ppc_ctx->r[13]));
PrefetchForCAS(lock); PrefetchForCAS(lock);
if (!xe::atomic_cas(0, xe::byte_swap(static_cast<uint32_t>(ppc_ctx->r[13])), if (!ppc_ctx->processor->GuestAtomicCAS32(
lock)) { ppc_ctx, 0, static_cast<uint32_t>(ppc_ctx->r[13]),
lock_ptr.guest_address())) {
return 0; return 0;
} }
return 1; return 1;
@ -1361,8 +1362,8 @@ X_STATUS xeProcessUserApcs(PPCContext* ctx) {
return alert_status; return alert_status;
} }
static void YankApcList(PPCContext* ctx, X_KTHREAD* current_thread, unsigned apc_mode, static void YankApcList(PPCContext* ctx, X_KTHREAD* current_thread,
bool rundown) { unsigned apc_mode, bool rundown) {
uint32_t unlocked_irql = uint32_t unlocked_irql =
xeKeKfAcquireSpinLock(ctx, &current_thread->apc_lock); xeKeKfAcquireSpinLock(ctx, &current_thread->apc_lock);

View File

@ -373,6 +373,13 @@ class Memory {
inline T* TranslateVirtual(TypedGuestPointer<T> guest_address) { inline T* TranslateVirtual(TypedGuestPointer<T> guest_address) {
return TranslateVirtual<T*>(guest_address.m_ptr); return TranslateVirtual<T*>(guest_address.m_ptr);
} }
template <typename T>
inline xe::be<T>* TranslateVirtualBE(uint32_t guest_address)
XE_RESTRICT const {
static_assert(!std::is_pointer_v<T> &&
sizeof(T) > 1); // maybe assert is_integral?
return TranslateVirtual<xe::be<T>*>(guest_address);
}
// Base address of physical memory in the host address space. // Base address of physical memory in the host address space.
// This is often something like 0x200000000. // This is often something like 0x200000000.