diff --git a/src/xenia/apu/audio_system.cc b/src/xenia/apu/audio_system.cc index 8949173f9..da1d8cab2 100644 --- a/src/xenia/apu/audio_system.cc +++ b/src/xenia/apu/audio_system.cc @@ -14,6 +14,7 @@ #include "xenia/base/math.h" #include "xenia/cpu/processor.h" #include "xenia/cpu/thread_state.h" +#include "xenia/kernel/objects/xthread.h" #include "xenia/emulator.h" #include "xenia/profiling.h" @@ -47,6 +48,7 @@ namespace xe { namespace apu { using namespace xe::cpu; +using namespace xe::kernel; // Size of a hardware XMA context. const uint32_t kXmaContextSize = 64; @@ -74,7 +76,7 @@ X_STATUS AudioSystem::Setup() { processor_ = emulator_->processor(); // Let the processor know we want register access callbacks. - emulator_->memory()->AddMappedRange( + emulator_->memory()->AddVirtualMappedRange( 0x7FEA0000, 0xFFFF0000, 0x0000FFFF, this, reinterpret_cast(MMIOReadRegisterThunk), reinterpret_cast(MMIOWriteRegisterThunk)); @@ -89,25 +91,23 @@ X_STATUS AudioSystem::Setup() { } registers_.next_context = 1; - // Setup worker thread state. This lets us make calls into guest code. - thread_state_ = new ThreadState(emulator_->processor(), 0, 0, 128 * 1024, 0); - thread_state_->set_name("Audio Worker"); - thread_block_ = memory()->SystemHeapAlloc(2048); - thread_state_->context()->r[13] = thread_block_; + // Setup our worker thread + std::function thread_fn = [this]() { + this->ThreadStart(); + return 0; + }; - // Create worker thread. - // This will initialize the audio system. - // Init needs to happen there so that any thread-local stuff - // is created on the right thread. running_ = true; - thread_ = std::thread(std::bind(&AudioSystem::ThreadStart, this)); + + thread_ = std::make_unique(emulator()->kernel_state(), + 128 * 1024, 0, thread_fn); + thread_->Create(); return X_STATUS_SUCCESS; } void AudioSystem::ThreadStart() { xe::threading::set_name("Audio Worker"); - xe::Profiler::ThreadEnter("Audio Worker"); // Initialize driver and ringbuffer. Initialize(); @@ -135,7 +135,7 @@ void AudioSystem::ThreadStart() { lock_.unlock(); if (client_callback) { uint64_t args[] = {client_callback_arg}; - processor->Execute(thread_state_, client_callback, args, + processor->Execute(thread_->thread_state(), client_callback, args, xe::countof(args)); } pumped++; @@ -157,8 +157,6 @@ void AudioSystem::ThreadStart() { running_ = false; // TODO(benvanik): call module API to kill? - - xe::Profiler::ThreadExit(); } void AudioSystem::Initialize() {} @@ -166,10 +164,7 @@ void AudioSystem::Initialize() {} void AudioSystem::Shutdown() { running_ = false; ResetEvent(client_wait_handles_[maximum_client_count_]); - thread_.join(); - - delete thread_state_; - memory()->SystemHeapFree(thread_block_); + thread_->Wait(0, 0, 0, NULL); memory()->SystemHeapFree(registers_.xma_context_array_ptr); } @@ -252,7 +247,7 @@ void AudioSystem::UnregisterClient(size_t index) { // piece of hardware: // https://github.com/Free60Project/libxenon/blob/master/libxenon/drivers/xenon_sound/sound.c -uint64_t AudioSystem::ReadRegister(uint64_t addr) { +uint64_t AudioSystem::ReadRegister(uint32_t addr) { uint32_t r = addr & 0xFFFF; XELOGAPU("ReadRegister(%.4X)", r); // 1800h is read on startup and stored -- context? buffers? @@ -277,7 +272,7 @@ uint64_t AudioSystem::ReadRegister(uint64_t addr) { return value; } -void AudioSystem::WriteRegister(uint64_t addr, uint64_t value) { +void AudioSystem::WriteRegister(uint32_t addr, uint64_t value) { uint32_t r = addr & 0xFFFF; value = xe::byte_swap(uint32_t(value)); XELOGAPU("WriteRegister(%.4X, %.8X)", r, value); diff --git a/src/xenia/apu/audio_system.h b/src/xenia/apu/audio_system.h index b385f90a2..fc2f61381 100644 --- a/src/xenia/apu/audio_system.h +++ b/src/xenia/apu/audio_system.h @@ -19,6 +19,9 @@ #include "xenia/xbox.h" namespace xe { + +namespace kernel { class XHostThread; } + namespace apu { class AudioDriver; @@ -49,8 +52,8 @@ class AudioSystem { AudioDriver** out_driver) = 0; virtual void DestroyDriver(AudioDriver* driver) = 0; - virtual uint64_t ReadRegister(uint64_t addr); - virtual void WriteRegister(uint64_t addr, uint64_t value); + virtual uint64_t ReadRegister(uint32_t addr); + virtual void WriteRegister(uint32_t addr, uint64_t value); protected: virtual void Initialize(); @@ -58,10 +61,10 @@ class AudioSystem { private: void ThreadStart(); - static uint64_t MMIOReadRegisterThunk(AudioSystem* as, uint64_t addr) { + static uint64_t MMIOReadRegisterThunk(AudioSystem* as, uint32_t addr) { return as->ReadRegister(addr); } - static void MMIOWriteRegisterThunk(AudioSystem* as, uint64_t addr, + static void MMIOWriteRegisterThunk(AudioSystem* as, uint32_t addr, uint64_t value) { as->WriteRegister(addr, value); } @@ -73,9 +76,7 @@ class AudioSystem { Memory* memory_; cpu::Processor* processor_; - std::thread thread_; - cpu::ThreadState* thread_state_; - uint32_t thread_block_; + std::unique_ptr thread_; std::atomic running_; std::mutex lock_; diff --git a/src/xenia/base/memory_generic.cc b/src/xenia/base/memory_generic.cc index 3682fcc5d..a48fc6c13 100644 --- a/src/xenia/base/memory_generic.cc +++ b/src/xenia/base/memory_generic.cc @@ -23,7 +23,7 @@ size_t page_size() { #if XE_PLATFORM_WIN32 SYSTEM_INFO si; GetSystemInfo(&si); - value = si.dwPageSize; + value = si.dwAllocationGranularity; #else value = getpagesize(); #endif // XE_PLATFORM_WIN32 diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index fd3c09e98..60410d248 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -1673,6 +1673,49 @@ EMITTER_OPCODE_TABLE( PREFETCH); +// ============================================================================ +// OPCODE_MEMSET +// ============================================================================ +EMITTER(MEMSET_I64_I8_I64, MATCH(I, I8<>, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + assert_true(i.src2.is_constant); + assert_true(i.src3.is_constant); + assert_true(i.src2.constant() == 0); + e.vpxor(e.xmm0, e.xmm0); + auto addr = ComputeMemoryAddress(e, i.src1); + switch (i.src3.constant()) { + case 32: + e.vmovaps(e.ptr[addr + 0 * 16], e.xmm0); + e.vmovaps(e.ptr[addr + 1 * 16], e.xmm0); + break; + case 128: + e.vmovaps(e.ptr[addr + 0 * 16], e.xmm0); + e.vmovaps(e.ptr[addr + 1 * 16], e.xmm0); + e.vmovaps(e.ptr[addr + 2 * 16], e.xmm0); + e.vmovaps(e.ptr[addr + 3 * 16], e.xmm0); + e.vmovaps(e.ptr[addr + 4 * 16], e.xmm0); + e.vmovaps(e.ptr[addr + 5 * 16], e.xmm0); + e.vmovaps(e.ptr[addr + 6 * 16], e.xmm0); + e.vmovaps(e.ptr[addr + 7 * 16], e.xmm0); + break; + default: + assert_unhandled_case(i.src3.constant()); + break; + } + if (IsTracingData()) { + addr = ComputeMemoryAddress(e, i.src1); + e.mov(e.r9, i.src3.constant()); + e.mov(e.r8, i.src2.constant()); + e.lea(e.rdx, e.ptr[addr]); + e.CallNative(reinterpret_cast(TraceMemset)); + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_MEMSET, + MEMSET_I64_I8_I64); + + // ============================================================================ // OPCODE_MAX // ============================================================================ @@ -6335,6 +6378,7 @@ void RegisterSequences() { REGISTER_EMITTER_OPCODE_TABLE(OPCODE_STORE_CONTEXT); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_LOAD); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_STORE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MEMSET); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_PREFETCH); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MAX); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_MAX); diff --git a/src/xenia/cpu/backend/x64/x64_tracers.cc b/src/xenia/cpu/backend/x64/x64_tracers.cc index e6e250a7c..e3da7a440 100644 --- a/src/xenia/cpu/backend/x64/x64_tracers.cc +++ b/src/xenia/cpu/backend/x64/x64_tracers.cc @@ -28,10 +28,11 @@ namespace x64 { #define TARGET_THREAD 1 -#define IFLUSH() fflush(stdout) +#define IFLUSH() \ + if (thread_state->thread_id() == TARGET_THREAD) fflush(stdout) #define IPRINT \ if (thread_state->thread_id() == TARGET_THREAD) printf -#define DFLUSH() fflush(stdout) +#define DFLUSH() IFLUSH() #define DPRINT \ DFLUSH(); \ if (thread_state->thread_id() == TARGET_THREAD) printf @@ -194,6 +195,13 @@ void TraceMemoryStoreV128(void* raw_context, uint32_t address, __m128 value) { xe::m128_i32<3>(value)); } +void TraceMemset(void* raw_context, uint32_t address, uint8_t value, + uint32_t length) { + auto thread_state = *((ThreadState**)raw_context); + DPRINT("memset %.8X-%.8X (%d) = %.2X", address, address + length, length, + value); +} + } // namespace x64 } // namespace backend } // namespace cpu diff --git a/src/xenia/cpu/backend/x64/x64_tracers.h b/src/xenia/cpu/backend/x64/x64_tracers.h index 1642f7920..c53b7b51d 100644 --- a/src/xenia/cpu/backend/x64/x64_tracers.h +++ b/src/xenia/cpu/backend/x64/x64_tracers.h @@ -64,6 +64,9 @@ void TraceMemoryStoreF32(void* raw_context, uint32_t address, __m128 value); void TraceMemoryStoreF64(void* raw_context, uint32_t address, __m128 value); void TraceMemoryStoreV128(void* raw_context, uint32_t address, __m128 value); +void TraceMemset(void* raw_context, uint32_t address, uint8_t value, + uint32_t length); + } // namespace x64 } // namespace backend } // namespace cpu diff --git a/src/xenia/cpu/frontend/ppc_emit_memory.cc b/src/xenia/cpu/frontend/ppc_emit_memory.cc index cf83bd736..38ef567e3 100644 --- a/src/xenia/cpu/frontend/ppc_emit_memory.cc +++ b/src/xenia/cpu/frontend/ppc_emit_memory.cc @@ -984,11 +984,24 @@ XEEMITTER(dcbtst, 0x7C0001EC, X)(PPCHIRBuilder& f, InstrData& i) { } XEEMITTER(dcbz, 0x7C0007EC, X)(PPCHIRBuilder& f, InstrData& i) { - // No-op for now. - // TODO(benvanik): use prefetch // or dcbz128 0x7C2007EC - // XEINSTRNOTIMPLEMENTED(); - f.Nop(); + // EA <- (RA) + (RB) + // memset(EA & ~31, 0, 32) + Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB); + int block_size; + int address_mask; + if (i.X.RT == 1) { + // dcbz128 - 128 byte set + block_size = 128; + address_mask = ~127; + } + else { + // dcbz - 32 byte set + block_size = 32; + address_mask = ~31; + } + f.Memset(f.And(ea, f.LoadConstant(int64_t(address_mask))), + f.LoadZero(INT8_TYPE), f.LoadConstant(int64_t(block_size))); return 0; } diff --git a/src/xenia/cpu/frontend/test/xe-cpu-ppc-test.cc b/src/xenia/cpu/frontend/test/xe-cpu-ppc-test.cc index a357ef6f6..63c99ec95 100644 --- a/src/xenia/cpu/frontend/test/xe-cpu-ppc-test.cc +++ b/src/xenia/cpu/frontend/test/xe-cpu-ppc-test.cc @@ -197,9 +197,10 @@ class TestRunner { // Simulate a thread. uint32_t stack_size = 64 * 1024; uint32_t stack_address = START_ADDRESS - stack_size; - uint32_t thread_state_address = stack_address - 0x1000; - thread_state.reset(new ThreadState(processor.get(), 0x100, stack_address, - stack_size, thread_state_address)); + uint32_t pcr_address = stack_address - 0x1000; + thread_state.reset(new ThreadState(processor.get(), 0x100, + ThreadStackType::kUserStack, + stack_address, stack_size, pcr_address)); return true; } diff --git a/src/xenia/cpu/hir/hir_builder.cc b/src/xenia/cpu/hir/hir_builder.cc index f98d81331..dd81767b5 100644 --- a/src/xenia/cpu/hir/hir_builder.cc +++ b/src/xenia/cpu/hir/hir_builder.cc @@ -20,15 +20,27 @@ namespace xe { namespace cpu { namespace hir { -#define ASSERT_ADDRESS_TYPE(value) -#define ASSERT_INTEGER_TYPE(value) -#define ASSERT_FLOAT_TYPE(value) -#define ASSERT_NON_VECTOR_TYPE(value) -#define ASSERT_VECTOR_TYPE(value) +#define ASSERT_ADDRESS_TYPE(value) \ + \ +assert_true((value->type) == INT32_TYPE || (value->type) == INT64_TYPE) +#define ASSERT_INTEGER_TYPE(value) \ + \ +assert_true((value->type) == INT8_TYPE || (value->type) == INT16_TYPE || \ + (value->type) == INT32_TYPE || (value->type) == INT64_TYPE) +#define ASSERT_FLOAT_TYPE(value) \ + assert_true((value->type) == FLOAT32_TYPE || (value->type) == FLOAT64_TYPE) +#define ASSERT_NON_FLOAT_TYPE(value) \ + \ +assert_true((value->type) != FLOAT32_TYPE && (value->type) != FLOAT64_TYPE) +#define ASSERT_NON_VECTOR_TYPE(value) assert_false((value->type) == VEC128_TYPE) +#define ASSERT_VECTOR_TYPE(value) assert_true((value->type) == VEC128_TYPE) +#define ASSERT_FLOAT_OR_VECTOR_TYPE(value) \ + assert_true((value->type) == FLOAT32_TYPE || \ + (value->type) == FLOAT64_TYPE || (value->type) == VEC128_TYPE) #define ASSERT_TYPES_EQUAL(value1, value2) \ assert_true((value1->type) == (value2->type)) -HIRBuilder::HIRBuilder() { + HIRBuilder::HIRBuilder() { arena_ = new Arena(); Reset(); } @@ -755,7 +767,7 @@ void HIRBuilder::ReturnTrue(Value* cond) { return; } - ASSERT_ADDRESS_TYPE(value); + ASSERT_ADDRESS_TYPE(cond); Instr* i = AppendInstr(OPCODE_RETURN_TRUE_info, 0); i->set_src1(cond); i->src2.value = i->src3.value = NULL; @@ -873,8 +885,9 @@ Value* HIRBuilder::SignExtend(Value* value, TypeName target_type) { } Value* HIRBuilder::Truncate(Value* value, TypeName target_type) { - ASSERT_INTEGER_TYPE(value->type); - ASSERT_INTEGER_TYPE(target_type); + ASSERT_INTEGER_TYPE(value); + assert_true(target_type == INT8_TYPE || target_type == INT16_TYPE || + target_type == INT32_TYPE || target_type == INT64_TYPE); if (value->type == target_type) { return value; @@ -908,7 +921,7 @@ Value* HIRBuilder::Convert(Value* value, TypeName target_type, } Value* HIRBuilder::Round(Value* value, RoundMode round_mode) { - ASSERT_FLOAT_TYPE(value); + ASSERT_FLOAT_OR_VECTOR_TYPE(value); if (value->IsConstant()) { Value* dest = CloneValue(value); @@ -1090,6 +1103,16 @@ void HIRBuilder::Store(Value* address, Value* value, uint32_t store_flags) { i->src3.value = NULL; } +void HIRBuilder::Memset(Value* address, Value* value, Value* length) { + ASSERT_ADDRESS_TYPE(address); + ASSERT_TYPES_EQUAL(address, length); + assert_true(value->type == INT8_TYPE); + Instr* i = AppendInstr(OPCODE_MEMSET_info, 0); + i->set_src1(address); + i->set_src2(value); + i->set_src3(length); +} + void HIRBuilder::Prefetch(Value* address, size_t length, uint32_t prefetch_flags) { ASSERT_ADDRESS_TYPE(address); @@ -1471,8 +1494,6 @@ Value* HIRBuilder::MulSub(Value* value1, Value* value2, Value* value3) { } Value* HIRBuilder::Neg(Value* value) { - ASSERT_NON_VECTOR_TYPE(value); - Instr* i = AppendInstr(OPCODE_NEG_info, 0, AllocValue(value->type)); i->set_src1(value); i->src2.value = i->src3.value = NULL; @@ -1480,7 +1501,7 @@ Value* HIRBuilder::Neg(Value* value) { } Value* HIRBuilder::Abs(Value* value) { - ASSERT_NON_VECTOR_TYPE(value); + ASSERT_FLOAT_OR_VECTOR_TYPE(value); Instr* i = AppendInstr(OPCODE_ABS_info, 0, AllocValue(value->type)); i->set_src1(value); @@ -1489,7 +1510,7 @@ Value* HIRBuilder::Abs(Value* value) { } Value* HIRBuilder::Sqrt(Value* value) { - ASSERT_FLOAT_TYPE(value); + ASSERT_FLOAT_OR_VECTOR_TYPE(value); Instr* i = AppendInstr(OPCODE_SQRT_info, 0, AllocValue(value->type)); i->set_src1(value); @@ -1498,7 +1519,7 @@ Value* HIRBuilder::Sqrt(Value* value) { } Value* HIRBuilder::RSqrt(Value* value) { - ASSERT_FLOAT_TYPE(value); + ASSERT_FLOAT_OR_VECTOR_TYPE(value); Instr* i = AppendInstr(OPCODE_RSQRT_info, 0, AllocValue(value->type)); i->set_src1(value); @@ -1507,7 +1528,7 @@ Value* HIRBuilder::RSqrt(Value* value) { } Value* HIRBuilder::Pow2(Value* value) { - ASSERT_FLOAT_TYPE(value); + ASSERT_FLOAT_OR_VECTOR_TYPE(value); Instr* i = AppendInstr(OPCODE_POW2_info, 0, AllocValue(value->type)); i->set_src1(value); @@ -1516,7 +1537,7 @@ Value* HIRBuilder::Pow2(Value* value) { } Value* HIRBuilder::Log2(Value* value) { - ASSERT_FLOAT_TYPE(value); + ASSERT_FLOAT_OR_VECTOR_TYPE(value); Instr* i = AppendInstr(OPCODE_LOG2_info, 0, AllocValue(value->type)); i->set_src1(value); @@ -1551,8 +1572,8 @@ Value* HIRBuilder::DotProduct4(Value* value1, Value* value2) { } Value* HIRBuilder::And(Value* value1, Value* value2) { - ASSERT_INTEGER_TYPE(value1); - ASSERT_INTEGER_TYPE(value2); + ASSERT_NON_FLOAT_TYPE(value1); + ASSERT_NON_FLOAT_TYPE(value2); ASSERT_TYPES_EQUAL(value1, value2); if (value1 == value2) { @@ -1571,8 +1592,8 @@ Value* HIRBuilder::And(Value* value1, Value* value2) { } Value* HIRBuilder::Or(Value* value1, Value* value2) { - ASSERT_INTEGER_TYPE(value1); - ASSERT_INTEGER_TYPE(value2); + ASSERT_NON_FLOAT_TYPE(value1); + ASSERT_NON_FLOAT_TYPE(value2); ASSERT_TYPES_EQUAL(value1, value2); if (value1 == value2) { @@ -1591,8 +1612,8 @@ Value* HIRBuilder::Or(Value* value1, Value* value2) { } Value* HIRBuilder::Xor(Value* value1, Value* value2) { - ASSERT_INTEGER_TYPE(value1); - ASSERT_INTEGER_TYPE(value2); + ASSERT_NON_FLOAT_TYPE(value1); + ASSERT_NON_FLOAT_TYPE(value2); ASSERT_TYPES_EQUAL(value1, value2); if (value1 == value2) { @@ -1607,7 +1628,7 @@ Value* HIRBuilder::Xor(Value* value1, Value* value2) { } Value* HIRBuilder::Not(Value* value) { - ASSERT_INTEGER_TYPE(value); + ASSERT_NON_FLOAT_TYPE(value); if (value->IsConstant()) { Value* dest = CloneValue(value); @@ -1657,7 +1678,7 @@ Value* HIRBuilder::VectorShl(Value* value1, Value* value2, TypeName part_type) { } Value* HIRBuilder::Shr(Value* value1, Value* value2) { - ASSERT_INTEGER_TYPE(value1); + ASSERT_NON_FLOAT_TYPE(value1); ASSERT_INTEGER_TYPE(value2); if (value2->IsConstantZero()) { diff --git a/src/xenia/cpu/hir/hir_builder.h b/src/xenia/cpu/hir/hir_builder.h index c90eebcf9..e189f65c2 100644 --- a/src/xenia/cpu/hir/hir_builder.h +++ b/src/xenia/cpu/hir/hir_builder.h @@ -132,6 +132,7 @@ class HIRBuilder { Value* Load(Value* address, TypeName type, uint32_t load_flags = 0); void Store(Value* address, Value* value, uint32_t store_flags = 0); + void Memset(Value* address, Value* value, Value* length); void Prefetch(Value* address, size_t length, uint32_t prefetch_flags = 0); Value* Max(Value* value1, Value* value2); diff --git a/src/xenia/cpu/hir/opcodes.h b/src/xenia/cpu/hir/opcodes.h index 609a990a9..e904903a2 100644 --- a/src/xenia/cpu/hir/opcodes.h +++ b/src/xenia/cpu/hir/opcodes.h @@ -142,6 +142,7 @@ enum Opcode { OPCODE_STORE_CONTEXT, OPCODE_LOAD, OPCODE_STORE, + OPCODE_MEMSET, OPCODE_PREFETCH, OPCODE_MAX, OPCODE_VECTOR_MAX, diff --git a/src/xenia/cpu/hir/opcodes.inl b/src/xenia/cpu/hir/opcodes.inl index 4a6b2a3f7..a023e21e2 100644 --- a/src/xenia/cpu/hir/opcodes.inl +++ b/src/xenia/cpu/hir/opcodes.inl @@ -224,6 +224,12 @@ DEFINE_OPCODE( OPCODE_SIG_X_V_V, OPCODE_FLAG_MEMORY) +DEFINE_OPCODE( + OPCODE_MEMSET, + "memset", + OPCODE_SIG_X_V_V_V, + 0) + DEFINE_OPCODE( OPCODE_PREFETCH, "prefetch", diff --git a/src/xenia/cpu/mmio_handler.cc b/src/xenia/cpu/mmio_handler.cc index b9e58fff6..0be69a72b 100644 --- a/src/xenia/cpu/mmio_handler.cc +++ b/src/xenia/cpu/mmio_handler.cc @@ -12,6 +12,7 @@ #include "xenia/base/assert.h" #include "xenia/base/byte_order.h" #include "xenia/base/math.h" +#include "xenia/base/memory.h" namespace BE { #include @@ -23,9 +24,11 @@ namespace cpu { MMIOHandler* MMIOHandler::global_handler_ = nullptr; // Implemented in the platform cc file. -std::unique_ptr CreateMMIOHandler(uint8_t* mapping_base); +std::unique_ptr CreateMMIOHandler(uint8_t* virtual_membase, + uint8_t* physical_membase); -std::unique_ptr MMIOHandler::Install(uint8_t* mapping_base) { +std::unique_ptr MMIOHandler::Install(uint8_t* virtual_membase, + uint8_t* physical_membase) { // There can be only one handler at a time. assert_null(global_handler_); if (global_handler_) { @@ -33,7 +36,7 @@ std::unique_ptr MMIOHandler::Install(uint8_t* mapping_base) { } // Create the platform-specific handler. - auto handler = CreateMMIOHandler(mapping_base); + auto handler = CreateMMIOHandler(virtual_membase, physical_membase); // Platform-specific initialization for the handler. if (!handler->Initialize()) { @@ -49,45 +52,50 @@ MMIOHandler::~MMIOHandler() { global_handler_ = nullptr; } -bool MMIOHandler::RegisterRange(uint64_t address, uint64_t mask, uint64_t size, - void* context, MMIOReadCallback read_callback, +bool MMIOHandler::RegisterRange(uint32_t virtual_address, uint32_t mask, + uint32_t size, void* context, + MMIOReadCallback read_callback, MMIOWriteCallback write_callback) { mapped_ranges_.push_back({ - reinterpret_cast(mapping_base_) | address, - 0xFFFFFFFF00000000ull | mask, size, context, read_callback, - write_callback, + virtual_address, mask, size, context, read_callback, write_callback, }); return true; } -bool MMIOHandler::CheckLoad(uint64_t address, uint64_t* out_value) { +bool MMIOHandler::CheckLoad(uint32_t virtual_address, uint64_t* out_value) { for (const auto& range : mapped_ranges_) { - if (((address | (uint64_t)mapping_base_) & range.mask) == range.address) { - *out_value = static_cast(range.read(range.context, address)); + if ((virtual_address & range.mask) == range.address) { + *out_value = + static_cast(range.read(range.context, virtual_address)); return true; } } return false; } -bool MMIOHandler::CheckStore(uint64_t address, uint64_t value) { +bool MMIOHandler::CheckStore(uint32_t virtual_address, uint64_t value) { for (const auto& range : mapped_ranges_) { - if (((address | (uint64_t)mapping_base_) & range.mask) == range.address) { - range.write(range.context, address, value); + if ((virtual_address & range.mask) == range.address) { + range.write(range.context, virtual_address, value); return true; } } return false; } -uintptr_t MMIOHandler::AddWriteWatch(uint32_t guest_address, size_t length, - WriteWatchCallback callback, - void* callback_context, - void* callback_data) { +uintptr_t MMIOHandler::AddPhysicalWriteWatch(uint32_t guest_address, + size_t length, + WriteWatchCallback callback, + void* callback_context, + void* callback_data) { uint32_t base_address = guest_address; - if (base_address > 0xA0000000) { - base_address -= 0xA0000000; - } + assert_true(base_address < 0x1FFFFFFF); + + // Can only protect sizes matching system page size. + // This means we need to round up, which will cause spurious access + // violations and invalidations. + // TODO(benvanik): only invalidate if actually within the region? + length = xe::round_up(length, xe::page_size()); // Add to table. The slot reservation may evict a previous watch, which // could include our target, so we do it first. @@ -102,29 +110,29 @@ uintptr_t MMIOHandler::AddWriteWatch(uint32_t guest_address, size_t length, write_watch_mutex_.unlock(); // Make the desired range read only under all address spaces. - auto host_address = mapping_base_ + base_address; DWORD old_protect; - VirtualProtect(host_address, length, PAGE_READONLY, &old_protect); - VirtualProtect(host_address + 0xA0000000, length, PAGE_READONLY, - &old_protect); - VirtualProtect(host_address + 0xC0000000, length, PAGE_READONLY, - &old_protect); - VirtualProtect(host_address + 0xE0000000, length, PAGE_READONLY, - &old_protect); + VirtualProtect(physical_membase_ + entry->address, entry->length, + PAGE_READONLY, &old_protect); + VirtualProtect(virtual_membase_ + 0xA0000000 + entry->address, entry->length, + PAGE_READONLY, &old_protect); + VirtualProtect(virtual_membase_ + 0xC0000000 + entry->address, entry->length, + PAGE_READONLY, &old_protect); + VirtualProtect(virtual_membase_ + 0xE0000000 + entry->address, entry->length, + PAGE_READONLY, &old_protect); return reinterpret_cast(entry); } void MMIOHandler::ClearWriteWatch(WriteWatchEntry* entry) { - auto host_address = mapping_base_ + entry->address; DWORD old_protect; - VirtualProtect(host_address, entry->length, PAGE_READWRITE, &old_protect); - VirtualProtect(host_address + 0xA0000000, entry->length, PAGE_READWRITE, - &old_protect); - VirtualProtect(host_address + 0xC0000000, entry->length, PAGE_READWRITE, - &old_protect); - VirtualProtect(host_address + 0xE0000000, entry->length, PAGE_READWRITE, - &old_protect); + VirtualProtect(physical_membase_ + entry->address, entry->length, + PAGE_READWRITE, &old_protect); + VirtualProtect(virtual_membase_ + 0xA0000000 + entry->address, entry->length, + PAGE_READWRITE, &old_protect); + VirtualProtect(virtual_membase_ + 0xC0000000 + entry->address, entry->length, + PAGE_READWRITE, &old_protect); + VirtualProtect(virtual_membase_ + 0xE0000000 + entry->address, entry->length, + PAGE_READWRITE, &old_protect); } void MMIOHandler::CancelWriteWatch(uintptr_t watch_handle) { @@ -145,17 +153,16 @@ void MMIOHandler::CancelWriteWatch(uintptr_t watch_handle) { } bool MMIOHandler::CheckWriteWatch(void* thread_state, uint64_t fault_address) { - uint32_t guest_address = uint32_t(fault_address - uintptr_t(mapping_base_)); - uint32_t base_address = guest_address; - if (base_address > 0xA0000000) { - base_address -= 0xA0000000; + uint32_t physical_address = uint32_t(fault_address); + if (physical_address > 0x1FFFFFFF) { + physical_address &= 0x1FFFFFFF; } std::list pending_invalidates; write_watch_mutex_.lock(); for (auto it = write_watches_.begin(); it != write_watches_.end();) { auto entry = *it; - if (entry->address <= base_address && - entry->address + entry->length > base_address) { + if (entry->address <= physical_address && + entry->address + entry->length > physical_address) { // Hit! pending_invalidates.push_back(entry); // TODO(benvanik): outside of lock? @@ -176,7 +183,7 @@ bool MMIOHandler::CheckWriteWatch(void* thread_state, uint64_t fault_address) { auto entry = pending_invalidates.back(); pending_invalidates.pop_back(); entry->callback(entry->callback_context, entry->callback_data, - guest_address); + physical_address); delete entry; } // Range was watched, so lets eat this access violation. @@ -185,18 +192,21 @@ bool MMIOHandler::CheckWriteWatch(void* thread_state, uint64_t fault_address) { bool MMIOHandler::HandleAccessFault(void* thread_state, uint64_t fault_address) { - if (fault_address < uint64_t(mapping_base_)) { + if (fault_address < uint64_t(virtual_membase_)) { // Quick kill anything below our mapping base. return false; } // Access violations are pretty rare, so we can do a linear search here. + // Only check if in the virtual range, as we only support virtual ranges. const MMIORange* range = nullptr; - for (const auto& test_range : mapped_ranges_) { - if ((fault_address & test_range.mask) == test_range.address) { - // Address is within the range of this mapping. - range = &test_range; - break; + if (fault_address < uint64_t(physical_membase_)) { + for (const auto& test_range : mapped_ranges_) { + if ((uint32_t(fault_address) & test_range.mask) == test_range.address) { + // Address is within the range of this mapping. + range = &test_range; + break; + } } } if (!range) { diff --git a/src/xenia/cpu/mmio_handler.h b/src/xenia/cpu/mmio_handler.h index b872a81cb..70646b85b 100644 --- a/src/xenia/cpu/mmio_handler.h +++ b/src/xenia/cpu/mmio_handler.h @@ -18,8 +18,8 @@ namespace xe { namespace cpu { -typedef uint64_t (*MMIOReadCallback)(void* context, uint64_t addr); -typedef void (*MMIOWriteCallback)(void* context, uint64_t addr, uint64_t value); +typedef uint64_t (*MMIOReadCallback)(void* context, uint32_t addr); +typedef void (*MMIOWriteCallback)(void* context, uint32_t addr, uint64_t value); typedef void (*WriteWatchCallback)(void* context_ptr, void* data_ptr, uint32_t address); @@ -29,19 +29,20 @@ class MMIOHandler { public: virtual ~MMIOHandler(); - static std::unique_ptr Install(uint8_t* mapping_base); + static std::unique_ptr Install(uint8_t* virtual_membase, + uint8_t* physical_membase); static MMIOHandler* global_handler() { return global_handler_; } - bool RegisterRange(uint64_t address, uint64_t mask, uint64_t size, + bool RegisterRange(uint32_t virtual_address, uint32_t mask, uint32_t size, void* context, MMIOReadCallback read_callback, MMIOWriteCallback write_callback); - bool CheckLoad(uint64_t address, uint64_t* out_value); - bool CheckStore(uint64_t address, uint64_t value); + bool CheckLoad(uint32_t virtual_address, uint64_t* out_value); + bool CheckStore(uint32_t virtual_address, uint64_t value); - uintptr_t AddWriteWatch(uint32_t guest_address, size_t length, - WriteWatchCallback callback, void* callback_context, - void* callback_data); + uintptr_t AddPhysicalWriteWatch(uint32_t guest_address, size_t length, + WriteWatchCallback callback, + void* callback_context, void* callback_data); void CancelWriteWatch(uintptr_t watch_handle); public: @@ -56,7 +57,9 @@ class MMIOHandler { void* callback_data; }; - MMIOHandler(uint8_t* mapping_base) : mapping_base_(mapping_base) {} + MMIOHandler(uint8_t* virtual_membase, uint8_t* physical_membase) + : virtual_membase_(virtual_membase), + physical_membase_(physical_membase) {} virtual bool Initialize() = 0; @@ -68,12 +71,13 @@ class MMIOHandler { virtual uint64_t* GetThreadStateRegPtr(void* thread_state_ptr, int32_t be_reg_index) = 0; - uint8_t* mapping_base_; + uint8_t* virtual_membase_; + uint8_t* physical_membase_; struct MMIORange { - uint64_t address; - uint64_t mask; - uint64_t size; + uint32_t address; + uint32_t mask; + uint32_t size; void* context; MMIOReadCallback read; MMIOWriteCallback write; diff --git a/src/xenia/cpu/mmio_handler_win.cc b/src/xenia/cpu/mmio_handler_win.cc index 2711ed167..86beca9ec 100644 --- a/src/xenia/cpu/mmio_handler_win.cc +++ b/src/xenia/cpu/mmio_handler_win.cc @@ -11,6 +11,10 @@ #include +namespace xe { +void CrashDump(); +} // namespace xe + namespace xe { namespace cpu { @@ -18,7 +22,8 @@ LONG CALLBACK MMIOExceptionHandler(PEXCEPTION_POINTERS ex_info); class WinMMIOHandler : public MMIOHandler { public: - WinMMIOHandler(uint8_t* mapping_base) : MMIOHandler(mapping_base) {} + WinMMIOHandler(uint8_t* virtual_membase, uint8_t* physical_membase) + : MMIOHandler(virtual_membase, physical_membase) {} ~WinMMIOHandler() override; protected: @@ -30,8 +35,9 @@ class WinMMIOHandler : public MMIOHandler { int32_t be_reg_index) override; }; -std::unique_ptr CreateMMIOHandler(uint8_t* mapping_base) { - return std::make_unique(mapping_base); +std::unique_ptr CreateMMIOHandler(uint8_t* virtual_membase, + uint8_t* physical_membase) { + return std::make_unique(virtual_membase, physical_membase); } bool WinMMIOHandler::Initialize() { @@ -67,6 +73,7 @@ LONG CALLBACK MMIOExceptionHandler(PEXCEPTION_POINTERS ex_info) { } else { // Failed to handle; continue search for a handler (and die if no other // handler is found). + xe::CrashDump(); return EXCEPTION_CONTINUE_SEARCH; } } diff --git a/src/xenia/cpu/processor.cc b/src/xenia/cpu/processor.cc index d811e2f07..04f90c497 100644 --- a/src/xenia/cpu/processor.cc +++ b/src/xenia/cpu/processor.cc @@ -148,10 +148,16 @@ bool Processor::Setup() { backend_ = std::move(backend); frontend_ = std::move(frontend); - interrupt_thread_state_ = new ThreadState(this, 0, 0, 128 * 1024, 0); + interrupt_thread_state_ = + new ThreadState(this, 0, ThreadStackType::kKernelStack, 0, 128 * 1024, 0); interrupt_thread_state_->set_name("Interrupt"); interrupt_thread_block_ = memory_->SystemHeapAlloc(2048); interrupt_thread_state_->context()->r[13] = interrupt_thread_block_; + XELOGI("Interrupt Thread %X Stack: %.8X-%.8X", + interrupt_thread_state_->thread_id(), + interrupt_thread_state_->stack_address(), + interrupt_thread_state_->stack_address() + + interrupt_thread_state_->stack_size()); return true; } @@ -325,16 +331,20 @@ bool Processor::Execute(ThreadState* thread_state, uint32_t address) { PPCContext* context = thread_state->context(); - // Setup registers. - uint64_t previous_lr = context->lr; + // Pad out stack a bit, as some games seem to overwrite the caller by about + // 16 to 32b. + context->r[1] -= 64 + 112; + // This could be set to anything to give us a unique identifier to track // re-entrancy/etc. + uint64_t previous_lr = context->lr; context->lr = 0xBEBEBEBE; // Execute the function. auto result = fn->Call(thread_state, uint32_t(context->lr)); context->lr = previous_lr; + context->r[1] += 64 + 112; return result; } diff --git a/src/xenia/cpu/raw_module.cc b/src/xenia/cpu/raw_module.cc index e42b355f2..3142958f2 100644 --- a/src/xenia/cpu/raw_module.cc +++ b/src/xenia/cpu/raw_module.cc @@ -30,8 +30,11 @@ bool RawModule::LoadFile(uint32_t base_address, const std::wstring& path) { // Allocate memory. // Since we have no real heap just load it wherever. base_address_ = base_address; + memory_->LookupHeap(base_address_) + ->AllocFixed(base_address_, file_length, 0, + kMemoryAllocationReserve | kMemoryAllocationCommit, + kMemoryProtectRead | kMemoryProtectWrite); uint8_t* p = memory_->TranslateVirtual(base_address_); - std::memset(p, 0, file_length); // Read into memory. fread(p, file_length, 1, file); diff --git a/src/xenia/cpu/test/util.h b/src/xenia/cpu/test/util.h index 2a4819d6f..c566a906f 100644 --- a/src/xenia/cpu/test/util.h +++ b/src/xenia/cpu/test/util.h @@ -64,17 +64,15 @@ class TestFunction { void Run(std::function pre_call, std::function post_call) { for (auto& processor : processors) { - memory->Zero(0, memory_size); - xe::cpu::Function* fn; processor->ResolveFunction(0x1000, &fn); uint32_t stack_size = 64 * 1024; uint32_t stack_address = memory_size - stack_size; uint32_t thread_state_address = stack_address - 0x1000; - auto thread_state = - std::make_unique(processor.get(), 0x100, stack_address, - stack_size, thread_state_address); + auto thread_state = std::make_unique( + processor.get(), 0x100, ThreadStackType::kUserStack, stack_address, + stack_size, thread_state_address); auto ctx = thread_state->context(); ctx->lr = 0xBEBEBEBE; diff --git a/src/xenia/cpu/thread_state.cc b/src/xenia/cpu/thread_state.cc index e8c17c295..0e2b62097 100644 --- a/src/xenia/cpu/thread_state.cc +++ b/src/xenia/cpu/thread_state.cc @@ -10,6 +10,7 @@ #include "xenia/cpu/thread_state.h" #include "xenia/base/assert.h" +#include "xenia/base/logging.h" #include "xenia/base/threading.h" #include "xenia/cpu/processor.h" #include "xenia/debug/debugger.h" @@ -26,15 +27,16 @@ using PPCContext = xe::cpu::frontend::PPCContext; thread_local ThreadState* thread_state_ = nullptr; ThreadState::ThreadState(Processor* processor, uint32_t thread_id, - uint32_t stack_address, uint32_t stack_size, - uint32_t thread_state_address) + ThreadStackType stack_type, uint32_t stack_address, + uint32_t stack_size, uint32_t pcr_address) : processor_(processor), memory_(processor->memory()), thread_id_(thread_id), + stack_type_(stack_type), name_(""), backend_data_(0), stack_size_(stack_size), - thread_state_address_(thread_state_address) { + pcr_address_(pcr_address) { if (thread_id_ == UINT_MAX) { // System thread. Assign the system thread ID with a high bit // set so people know what's up. @@ -43,22 +45,44 @@ ThreadState::ThreadState(Processor* processor, uint32_t thread_id, } backend_data_ = processor->backend()->AllocThreadData(); - uint32_t stack_position; if (!stack_address) { + // We must always allocate 64K as a guard region before stacks, as we can + // only Protect() on system page granularity. stack_size = (stack_size + 0xFFF) & 0xFFFFF000; uint32_t stack_alignment = (stack_size & 0xF000) ? 0x1000 : 0x10000; - uint32_t stack_padding = stack_alignment * 1; + uint32_t stack_padding = uint32_t(xe::page_size()); // Host page size. uint32_t actual_stack_size = stack_padding + stack_size; - stack_address_ = memory()->SystemHeapAlloc(actual_stack_size, stack_alignment); - assert_true(!(stack_address & 0xFFF)); // just to be safe - stack_position = stack_address_ + actual_stack_size; + bool top_down; + switch (stack_type) { + case ThreadStackType::kKernelStack: + top_down = true; + break; + case ThreadStackType::kUserStack: + top_down = false; + break; + default: + assert_unhandled_case(stack_type); + break; + } + memory() + ->LookupHeap(0x70000000) + ->AllocRange(0x70000000, 0x7FFFFFFF, actual_stack_size, stack_alignment, + kMemoryAllocationReserve | kMemoryAllocationCommit, + kMemoryProtectRead | kMemoryProtectWrite, top_down, + &stack_address_); + assert_true(!(stack_address_ & 0xFFF)); // just to be safe stack_allocated_ = true; - memset(memory()->TranslateVirtual(stack_address_), 0xBE, actual_stack_size); - memory()->Protect(stack_address_, stack_padding, X_PAGE_NOACCESS); + stack_base_ = stack_address_ + actual_stack_size; + stack_limit_ = stack_address_ + stack_padding; + memory()->Fill(stack_address_, actual_stack_size, 0xBE); + memory() + ->LookupHeap(stack_address_) + ->Protect(stack_address_, stack_padding, kMemoryProtectNoAccess); } else { stack_address_ = stack_address; - stack_position = stack_address_ + stack_size; stack_allocated_ = false; + stack_base_ = stack_address_ + stack_size; + stack_limit_ = stack_address_; } assert_not_zero(stack_address_); @@ -78,12 +102,8 @@ ThreadState::ThreadState(Processor* processor, uint32_t thread_id, context_->thread_id = thread_id_; // Set initial registers. - context_->r[1] = stack_position; - context_->r[13] = thread_state_address_; - - // Pad out stack a bit, as some games seem to overwrite the caller by about - // 16 to 32b. - context_->r[1] -= 64; + context_->r[1] = stack_base_; + context_->r[13] = pcr_address_; processor_->debugger()->OnThreadCreated(this); } @@ -100,7 +120,7 @@ ThreadState::~ThreadState() { _aligned_free(context_); if (stack_allocated_) { - memory()->SystemHeapFree(stack_address_); + memory()->LookupHeap(stack_address_)->Decommit(stack_address_, stack_size_); } } diff --git a/src/xenia/cpu/thread_state.h b/src/xenia/cpu/thread_state.h index 542ea4638..31df73b39 100644 --- a/src/xenia/cpu/thread_state.h +++ b/src/xenia/cpu/thread_state.h @@ -19,21 +19,30 @@ namespace cpu { class Processor; +enum class ThreadStackType { + kKernelStack, + kUserStack, +}; + class ThreadState { public: - ThreadState(Processor* processor, uint32_t thread_id, uint32_t stack_address, - uint32_t stack_size, uint32_t thread_state_address); + ThreadState(Processor* processor, uint32_t thread_id, + ThreadStackType stack_type, uint32_t stack_address, + uint32_t stack_size, uint32_t pcr_address); ~ThreadState(); Processor* processor() const { return processor_; } Memory* memory() const { return memory_; } uint32_t thread_id() const { return thread_id_; } + ThreadStackType stack_type() const { return stack_type_; } const std::string& name() const { return name_; } void set_name(const std::string& value) { name_ = value; } void* backend_data() const { return backend_data_; } uint32_t stack_address() const { return stack_address_; } uint32_t stack_size() const { return stack_size_; } - uint32_t thread_state_address() const { return thread_state_address_; } + uint32_t stack_base() const { return stack_base_; } + uint32_t stack_limit() const { return stack_limit_; } + uint32_t pcr_address() const { return pcr_address_; } xe::cpu::frontend::PPCContext* context() const { return context_; } bool Suspend() { return Suspend(~0); } @@ -48,12 +57,15 @@ class ThreadState { Processor* processor_; Memory* memory_; uint32_t thread_id_; + ThreadStackType stack_type_; std::string name_; void* backend_data_; uint32_t stack_address_; bool stack_allocated_; uint32_t stack_size_; - uint32_t thread_state_address_; + uint32_t stack_base_; + uint32_t stack_limit_; + uint32_t pcr_address_; // NOTE: must be 64b aligned for SSE ops. xe::cpu::frontend::PPCContext* context_; diff --git a/src/xenia/cpu/xex_module.cc b/src/xenia/cpu/xex_module.cc index f3fbcb3e3..3fbdebc17 100644 --- a/src/xenia/cpu/xex_module.cc +++ b/src/xenia/cpu/xex_module.cc @@ -18,21 +18,25 @@ #include "xenia/cpu/cpu-private.h" #include "xenia/cpu/export_resolver.h" #include "xenia/cpu/processor.h" +#include "xenia/kernel/kernel_state.h" +#include "xenia/kernel/objects/xmodule.h" namespace xe { namespace cpu { using namespace xe::cpu; +using namespace xe::kernel; using PPCContext = xe::cpu::frontend::PPCContext; void UndefinedImport(PPCContext* ppc_state, void* arg0, void* arg1) { - XELOGE("call to undefined kernel import"); + XELOGE("call to undefined import"); } -XexModule::XexModule(Processor* processor) +XexModule::XexModule(Processor* processor, KernelState* state) : Module(processor), processor_(processor), + kernel_state_(state), xex_(nullptr), base_address_(0), low_address_(0), @@ -104,8 +108,25 @@ bool XexModule::SetupLibraryImports(const xe_xex2_import_library_t* library) { for (size_t n = 0; n < import_info_count; n++) { const xe_xex2_import_info_t* info = &import_infos[n]; - KernelExport* kernel_export = - export_resolver->GetExportByOrdinal(library->name, info->ordinal); + // Strip off the extension (for the symbol name) + std::string libname = library->name; + auto dot = libname.find_last_of('.'); + if (dot != libname.npos) { + libname = libname.substr(0, dot); + } + + KernelExport* kernel_export = NULL; // kernel export info + uint32_t user_export_addr = 0; // user export address + + if (kernel_state_->IsKernelModule(library->name)) { + kernel_export = + export_resolver->GetExportByOrdinal(library->name, info->ordinal); + } else { + XModule* module = kernel_state_->GetModule(library->name); + if (module) { + user_export_addr = module->GetProcAddressByOrdinal(info->ordinal); + } + } if (kernel_export) { if (info->thunk_address) { @@ -114,7 +135,7 @@ bool XexModule::SetupLibraryImports(const xe_xex2_import_library_t* library) { snprintf(name, xe::countof(name), "%s", kernel_export->name); } } else { - snprintf(name, xe::countof(name), "__imp_%s_%.3X", library->name, + snprintf(name, xe::countof(name), "__imp_%s_%.3X", libname, info->ordinal); } @@ -127,8 +148,8 @@ bool XexModule::SetupLibraryImports(const xe_xex2_import_library_t* library) { var_info->set_status(SymbolInfo::STATUS_DEFINED); // Grab, if available. + auto slot = memory_->TranslateVirtual(info->value_address); if (kernel_export) { - auto slot = memory_->TranslateVirtual(info->value_address); if (kernel_export->type == KernelExport::Function) { // Not exactly sure what this should be... if (info->thunk_address) { @@ -151,53 +172,80 @@ bool XexModule::SetupLibraryImports(const xe_xex2_import_library_t* library) { kernel_export->name); } } + } else if (user_export_addr) { + xe::store_and_swap(slot, user_export_addr); + } else { + // No module found. + XELOGE("kernel import not found: %s", name); + if (info->thunk_address) { + *slot = xe::byte_swap(info->thunk_address); + } else { + *slot = xe::byte_swap(0xF00DF00D); + } } if (info->thunk_address) { if (kernel_export) { snprintf(name, xe::countof(name), "%s", kernel_export->name); + } else if (user_export_addr) { + snprintf(name, xe::countof(name), "__%s_%.3X", libname, info->ordinal); } else { - snprintf(name, xe::countof(name), "__kernel_%s_%.3X", library->name, + snprintf(name, xe::countof(name), "__kernel_%s_%.3X", libname, info->ordinal); } - // On load we have something like this in memory: - // li r3, 0 - // li r4, 0x1F5 - // mtspr CTR, r11 - // bctr - // Real consoles rewrite this with some code that sets r11. - // If we did that we'd still have to put a thunk somewhere and do the - // dynamic lookup. Instead, we rewrite it to use syscalls, as they - // aren't used on the 360. CPU backends can either take the syscall - // or do something smarter. - // sc - // blr - // nop - // nop - uint8_t* p = memory()->TranslateVirtual(info->thunk_address); - xe::store_and_swap(p + 0x0, 0x44000002); - xe::store_and_swap(p + 0x4, 0x4E800020); - xe::store_and_swap(p + 0x8, 0x60000000); - xe::store_and_swap(p + 0xC, 0x60000000); + if (user_export_addr) { + // Rewrite PPC code to set r11 to the target address + // So we'll have: + // lis r11, user_export_addr + // ori r11, r11, user_export_addr + // mtspr CTR, r11 + // bctr + uint16_t hi_addr = (user_export_addr >> 16) & 0xFFFF; + uint16_t low_addr = user_export_addr & 0xFFFF; - FunctionInfo::ExternHandler handler = 0; - void* handler_data = 0; - if (kernel_export) { - handler = - (FunctionInfo::ExternHandler)kernel_export->function_data.shim; - handler_data = kernel_export->function_data.shim_data; + uint8_t* p = memory()->TranslateVirtual(info->thunk_address); + xe::store_and_swap(p + 0x0, 0x3D600000 | hi_addr); + xe::store_and_swap(p + 0x4, 0x616B0000 | low_addr); } else { - handler = (FunctionInfo::ExternHandler)UndefinedImport; - handler_data = this; - } + // On load we have something like this in memory: + // li r3, 0 + // li r4, 0x1F5 + // mtspr CTR, r11 + // bctr + // Real consoles rewrite this with some code that sets r11. + // If we did that we'd still have to put a thunk somewhere and do the + // dynamic lookup. Instead, we rewrite it to use syscalls, as they + // aren't used on the 360. CPU backends can either take the syscall + // or do something smarter. + // sc + // blr + // nop + // nop + uint8_t* p = memory()->TranslateVirtual(info->thunk_address); + xe::store_and_swap(p + 0x0, 0x44000002); + xe::store_and_swap(p + 0x4, 0x4E800020); + xe::store_and_swap(p + 0x8, 0x60000000); + xe::store_and_swap(p + 0xC, 0x60000000); - FunctionInfo* fn_info; - DeclareFunction(info->thunk_address, &fn_info); - fn_info->set_end_address(info->thunk_address + 16 - 4); - fn_info->set_name(name); - fn_info->SetupExtern(handler, handler_data, NULL); - fn_info->set_status(SymbolInfo::STATUS_DECLARED); + FunctionInfo::ExternHandler handler = 0; + void* handler_data = 0; + if (kernel_export) { + handler = + (FunctionInfo::ExternHandler)kernel_export->function_data.shim; + handler_data = kernel_export->function_data.shim_data; + } else { + handler = (FunctionInfo::ExternHandler)UndefinedImport; + handler_data = this; + } + + FunctionInfo* fn_info; + DeclareFunction(info->thunk_address, &fn_info); + fn_info->set_end_address(info->thunk_address + 16 - 4); + fn_info->set_name(name); + fn_info->SetupExtern(handler, handler_data, NULL); + fn_info->set_status(SymbolInfo::STATUS_DECLARED); + } } } diff --git a/src/xenia/cpu/xex_module.h b/src/xenia/cpu/xex_module.h index 0856ad59b..6806b912c 100644 --- a/src/xenia/cpu/xex_module.h +++ b/src/xenia/cpu/xex_module.h @@ -16,13 +16,17 @@ #include "xenia/kernel/util/xex2.h" namespace xe { + +// KernelState forward decl. +namespace kernel { class KernelState; } + namespace cpu { class Runtime; class XexModule : public xe::cpu::Module { public: - XexModule(Processor* processor); + XexModule(Processor* processor, kernel::KernelState* state); virtual ~XexModule(); xe_xex2_ref xex() const { return xex_; } @@ -40,6 +44,7 @@ class XexModule : public xe::cpu::Module { private: Processor* processor_; + kernel::KernelState* kernel_state_; std::string name_; std::string path_; xe_xex2_ref xex_; diff --git a/src/xenia/emulator.cc b/src/xenia/emulator.cc index a909f2ba8..a88760980 100644 --- a/src/xenia/emulator.cc +++ b/src/xenia/emulator.cc @@ -110,10 +110,6 @@ X_STATUS Emulator::Setup() { if (!processor_->Setup()) { return result; } - result = audio_system_->Setup(); - if (result) { - return result; - } result = graphics_system_->Setup(processor_.get(), main_window_->loop(), main_window_.get()); if (result) { @@ -130,6 +126,11 @@ X_STATUS Emulator::Setup() { // Shared kernel state. kernel_state_ = std::make_unique(this); + result = audio_system_->Setup(); + if (result) { + return result; + } + // HLE kernel modules. xboxkrnl_ = std::make_unique(this, kernel_state_.get()); xam_ = std::make_unique(this, kernel_state_.get()); diff --git a/src/xenia/emulator.h b/src/xenia/emulator.h index ab8145c2e..6d389a7f5 100644 --- a/src/xenia/emulator.h +++ b/src/xenia/emulator.h @@ -66,6 +66,8 @@ class Emulator { } kernel::fs::FileSystem* file_system() const { return file_system_.get(); } + kernel::KernelState* kernel_state() const { return kernel_state_.get(); } + kernel::XboxkrnlModule* xboxkrnl() const { return xboxkrnl_.get(); } kernel::XamModule* xam() const { return xam_.get(); } diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/command_processor.cc index aa80ac1aa..7308b35e9 100644 --- a/src/xenia/gpu/gl4/command_processor.cc +++ b/src/xenia/gpu/gl4/command_processor.cc @@ -456,7 +456,7 @@ void CommandProcessor::EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size) { // CP_RB_RPTR_ADDR Ring Buffer Read Pointer Address 0x70C // ptr = RB_RPTR_ADDR, pointer to write back the address to. - read_ptr_writeback_ptr_ = (primary_buffer_ptr_ & ~0x1FFFFFFF) + ptr; + read_ptr_writeback_ptr_ = ptr; // CP_RB_CNTL Ring Buffer Control 0x704 // block_size = RB_BLKSZ, number of quadwords read between updates of the // read pointer. @@ -966,7 +966,7 @@ bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader, bool CommandProcessor::ExecutePacketType3_INDIRECT_BUFFER( RingbufferReader* reader, uint32_t packet, uint32_t count) { // indirect buffer dispatch - uint32_t list_ptr = reader->Read(); + uint32_t list_ptr = CpuToGpu(reader->Read()); uint32_t list_length = reader->Read(); ExecuteIndirectBuffer(GpuToCpu(list_ptr), list_length); return true; @@ -993,7 +993,7 @@ bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader, poll_reg_addr &= ~0x3; value = xe::load(memory_->TranslatePhysical(poll_reg_addr)); value = GpuSwap(value, endianness); - trace_writer_.WriteMemoryRead(poll_reg_addr, 4); + trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr), 4); } else { // Register. assert_true(poll_reg_addr < RegisterFile::kRegisterCount); @@ -1093,7 +1093,7 @@ bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingbufferReader* reader, // Memory. auto endianness = static_cast(poll_reg_addr & 0x3); poll_reg_addr &= ~0x3; - trace_writer_.WriteMemoryRead(poll_reg_addr, 4); + trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr), 4); value = xe::load(memory_->TranslatePhysical(poll_reg_addr)); value = GpuSwap(value, endianness); } else { @@ -1136,7 +1136,7 @@ bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingbufferReader* reader, write_reg_addr &= ~0x3; write_data = GpuSwap(write_data, endianness); xe::store(memory_->TranslatePhysical(write_reg_addr), write_data); - trace_writer_.WriteMemoryWrite(write_reg_addr, 4); + trace_writer_.WriteMemoryWrite(CpuToGpu(write_reg_addr), 4); } else { // Register. WriteRegister(write_reg_addr, write_data); @@ -1182,7 +1182,7 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_SHD( address &= ~0x3; data_value = GpuSwap(data_value, endianness); xe::store(memory_->TranslatePhysical(address), data_value); - trace_writer_.WriteMemoryWrite(address, 4); + trace_writer_.WriteMemoryWrite(CpuToGpu(address), 4); return true; } @@ -1208,7 +1208,7 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_EXT( xe::copy_and_swap_16_aligned( reinterpret_cast(memory_->TranslatePhysical(address)), extents, xe::countof(extents)); - trace_writer_.WriteMemoryWrite(address, sizeof(extents)); + trace_writer_.WriteMemoryWrite(CpuToGpu(address), sizeof(extents)); return true; } @@ -1364,7 +1364,7 @@ bool CommandProcessor::ExecutePacketType3_LOAD_ALU_CONSTANT( assert_always(); return true; } - trace_writer_.WriteMemoryRead(address, size_dwords * 4); + trace_writer_.WriteMemoryRead(CpuToGpu(address), size_dwords * 4); for (uint32_t n = 0; n < size_dwords; n++, index++) { uint32_t data = xe::load_and_swap( memory_->TranslatePhysical(address + n * 4)); @@ -1395,7 +1395,7 @@ bool CommandProcessor::ExecutePacketType3_IM_LOAD(RingbufferReader* reader, uint32_t start = start_size >> 16; uint32_t size_dwords = start_size & 0xFFFF; // dwords assert_true(start == 0); - trace_writer_.WriteMemoryRead(addr, size_dwords * 4); + trace_writer_.WriteMemoryRead(CpuToGpu(addr), size_dwords * 4); LoadShader(shader_type, addr, memory_->TranslatePhysical(addr), size_dwords); return true; @@ -2106,29 +2106,31 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateBlendState() { draw_batcher_.Flush(DrawBatcher::FlushMode::kStateChange); - static const GLenum blend_map[] = {/* 0 */ GL_ZERO, - /* 1 */ GL_ONE, - /* 2 */ GL_ZERO, // ? - /* 3 */ GL_ZERO, // ? - /* 4 */ GL_SRC_COLOR, - /* 5 */ GL_ONE_MINUS_SRC_COLOR, - /* 6 */ GL_SRC_ALPHA, - /* 7 */ GL_ONE_MINUS_SRC_ALPHA, - /* 8 */ GL_DST_COLOR, - /* 9 */ GL_ONE_MINUS_DST_COLOR, - /* 10 */ GL_DST_ALPHA, - /* 11 */ GL_ONE_MINUS_DST_ALPHA, - /* 12 */ GL_CONSTANT_COLOR, - /* 13 */ GL_ONE_MINUS_CONSTANT_COLOR, - /* 14 */ GL_CONSTANT_ALPHA, - /* 15 */ GL_ONE_MINUS_CONSTANT_ALPHA, - /* 16 */ GL_SRC_ALPHA_SATURATE, + static const GLenum blend_map[] = { + /* 0 */ GL_ZERO, + /* 1 */ GL_ONE, + /* 2 */ GL_ZERO, // ? + /* 3 */ GL_ZERO, // ? + /* 4 */ GL_SRC_COLOR, + /* 5 */ GL_ONE_MINUS_SRC_COLOR, + /* 6 */ GL_SRC_ALPHA, + /* 7 */ GL_ONE_MINUS_SRC_ALPHA, + /* 8 */ GL_DST_COLOR, + /* 9 */ GL_ONE_MINUS_DST_COLOR, + /* 10 */ GL_DST_ALPHA, + /* 11 */ GL_ONE_MINUS_DST_ALPHA, + /* 12 */ GL_CONSTANT_COLOR, + /* 13 */ GL_ONE_MINUS_CONSTANT_COLOR, + /* 14 */ GL_CONSTANT_ALPHA, + /* 15 */ GL_ONE_MINUS_CONSTANT_ALPHA, + /* 16 */ GL_SRC_ALPHA_SATURATE, }; - static const GLenum blend_op_map[] = {/* 0 */ GL_FUNC_ADD, - /* 1 */ GL_FUNC_SUBTRACT, - /* 2 */ GL_MIN, - /* 3 */ GL_MAX, - /* 4 */ GL_FUNC_REVERSE_SUBTRACT, + static const GLenum blend_op_map[] = { + /* 0 */ GL_FUNC_ADD, + /* 1 */ GL_FUNC_SUBTRACT, + /* 2 */ GL_MIN, + /* 3 */ GL_MAX, + /* 4 */ GL_FUNC_REVERSE_SUBTRACT, }; for (int i = 0; i < xe::countof(regs.rb_blendcontrol); ++i) { uint32_t blend_control = regs.rb_blendcontrol[i]; @@ -2181,23 +2183,25 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateDepthStencilState() { draw_batcher_.Flush(DrawBatcher::FlushMode::kStateChange); - static const GLenum compare_func_map[] = {/* 0 */ GL_NEVER, - /* 1 */ GL_LESS, - /* 2 */ GL_EQUAL, - /* 3 */ GL_LEQUAL, - /* 4 */ GL_GREATER, - /* 5 */ GL_NOTEQUAL, - /* 6 */ GL_GEQUAL, - /* 7 */ GL_ALWAYS, + static const GLenum compare_func_map[] = { + /* 0 */ GL_NEVER, + /* 1 */ GL_LESS, + /* 2 */ GL_EQUAL, + /* 3 */ GL_LEQUAL, + /* 4 */ GL_GREATER, + /* 5 */ GL_NOTEQUAL, + /* 6 */ GL_GEQUAL, + /* 7 */ GL_ALWAYS, }; - static const GLenum stencil_op_map[] = {/* 0 */ GL_KEEP, - /* 1 */ GL_ZERO, - /* 2 */ GL_REPLACE, - /* 3 */ GL_INCR_WRAP, - /* 4 */ GL_DECR_WRAP, - /* 5 */ GL_INVERT, - /* 6 */ GL_INCR, - /* 7 */ GL_DECR, + static const GLenum stencil_op_map[] = { + /* 0 */ GL_KEEP, + /* 1 */ GL_ZERO, + /* 2 */ GL_REPLACE, + /* 3 */ GL_INCR_WRAP, + /* 4 */ GL_DECR_WRAP, + /* 5 */ GL_INVERT, + /* 6 */ GL_INCR, + /* 7 */ GL_DECR, }; // A2XX_RB_DEPTHCONTROL_Z_ENABLE if (regs.rb_depthcontrol & 0x00000002) { diff --git a/src/xenia/gpu/gl4/gl4_graphics_system.cc b/src/xenia/gpu/gl4/gl4_graphics_system.cc index a533cb091..bd2ee1f87 100644 --- a/src/xenia/gpu/gl4/gl4_graphics_system.cc +++ b/src/xenia/gpu/gl4/gl4_graphics_system.cc @@ -74,7 +74,7 @@ X_STATUS GL4GraphicsSystem::Setup(cpu::Processor* processor, [this](const SwapParameters& swap_params) { SwapHandler(swap_params); }); // Let the processor know we want register access callbacks. - memory_->AddMappedRange( + memory_->AddVirtualMappedRange( 0x7FC80000, 0xFFFF0000, 0x0000FFFF, this, reinterpret_cast(MMIOReadRegisterThunk), reinterpret_cast(MMIOWriteRegisterThunk)); @@ -275,7 +275,7 @@ void GL4GraphicsSystem::SwapHandler(const SwapParameters& swap_params) { }); } -uint64_t GL4GraphicsSystem::ReadRegister(uint64_t addr) { +uint64_t GL4GraphicsSystem::ReadRegister(uint32_t addr) { uint32_t r = addr & 0xFFFF; switch (r) { @@ -295,7 +295,7 @@ uint64_t GL4GraphicsSystem::ReadRegister(uint64_t addr) { return register_file_.values[r].u32; } -void GL4GraphicsSystem::WriteRegister(uint64_t addr, uint64_t value) { +void GL4GraphicsSystem::WriteRegister(uint32_t addr, uint64_t value) { uint32_t r = addr & 0xFFFF; switch (r) { diff --git a/src/xenia/gpu/gl4/gl4_graphics_system.h b/src/xenia/gpu/gl4/gl4_graphics_system.h index d1af6bcf0..558af2c1c 100644 --- a/src/xenia/gpu/gl4/gl4_graphics_system.h +++ b/src/xenia/gpu/gl4/gl4_graphics_system.h @@ -50,13 +50,13 @@ class GL4GraphicsSystem : public GraphicsSystem { private: void MarkVblank(); void SwapHandler(const SwapParameters& swap_params); - uint64_t ReadRegister(uint64_t addr); - void WriteRegister(uint64_t addr, uint64_t value); + uint64_t ReadRegister(uint32_t addr); + void WriteRegister(uint32_t addr, uint64_t value); - static uint64_t MMIOReadRegisterThunk(GL4GraphicsSystem* gs, uint64_t addr) { + static uint64_t MMIOReadRegisterThunk(GL4GraphicsSystem* gs, uint32_t addr) { return gs->ReadRegister(addr); } - static void MMIOWriteRegisterThunk(GL4GraphicsSystem* gs, uint64_t addr, + static void MMIOWriteRegisterThunk(GL4GraphicsSystem* gs, uint32_t addr, uint64_t value) { gs->WriteRegister(addr, value); } diff --git a/src/xenia/gpu/gl4/texture_cache.cc b/src/xenia/gpu/gl4/texture_cache.cc index adbda5f12..e34839590 100644 --- a/src/xenia/gpu/gl4/texture_cache.cc +++ b/src/xenia/gpu/gl4/texture_cache.cc @@ -490,7 +490,7 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture( // Add a write watch. If any data in the given range is touched we'll get a // callback and evict the texture. We could reuse the storage, though the // driver is likely in a better position to pool that kind of stuff. - entry->write_watch_handle = memory_->AddWriteWatch( + entry->write_watch_handle = memory_->AddPhysicalWriteWatch( texture_info.guest_address, texture_info.input_length, [](void* context_ptr, void* data_ptr, uint32_t address) { auto self = reinterpret_cast(context_ptr); @@ -710,7 +710,9 @@ bool TextureCache::UploadTexture2D(GLuint texture, uint8_t* dest = reinterpret_cast(allocation.host_ptr); uint32_t pitch = std::min(texture_info.size_2d.input_pitch, texture_info.size_2d.output_pitch); - for (uint32_t y = 0; y < texture_info.size_2d.block_height; y++) { + for (uint32_t y = 0; y < std::min(texture_info.size_2d.block_height, + texture_info.size_2d.logical_height); + y++) { TextureSwap(texture_info.endianness, dest, src, pitch); src += texture_info.size_2d.input_pitch; dest += texture_info.size_2d.output_pitch; @@ -735,7 +737,8 @@ bool TextureCache::UploadTexture2D(GLuint texture, auto bpp = (bytes_per_block >> 2) + ((bytes_per_block >> 1) >> (bytes_per_block >> 2)); for (uint32_t y = 0, output_base_offset = 0; - y < texture_info.size_2d.block_height; + y < std::min(texture_info.size_2d.block_height, + texture_info.size_2d.logical_height); y++, output_base_offset += texture_info.size_2d.output_pitch) { auto input_base_offset = TextureInfo::TiledOffset2DOuter( offset_y + y, (texture_info.size_2d.input_width / diff --git a/src/xenia/gpu/xe-gpu-trace-viewer.cc b/src/xenia/gpu/xe-gpu-trace-viewer.cc index 39e94666d..5ca724c4d 100644 --- a/src/xenia/gpu/xe-gpu-trace-viewer.cc +++ b/src/xenia/gpu/xe-gpu-trace-viewer.cc @@ -771,7 +771,15 @@ class TracePlayer : public TraceReader { : loop_(loop), graphics_system_(graphics_system), current_frame_index_(0), - current_command_index_(-1) {} + current_command_index_(-1) { + // Need to allocate all of physical memory so that we can write to it + // during playback. + graphics_system_->memory() + ->LookupHeapByType(true, 4096) + ->AllocFixed(0, 0x1FFFFFFF, 4096, + kMemoryAllocationReserve | kMemoryAllocationCommit, + kMemoryProtectRead | kMemoryProtectWrite); + } ~TracePlayer() = default; GraphicsSystem* graphics_system() const { return graphics_system_; } diff --git a/src/xenia/kernel/fs/devices/host_path_entry.cc b/src/xenia/kernel/fs/devices/host_path_entry.cc index 7acd0fb5b..48a5883ca 100644 --- a/src/xenia/kernel/fs/devices/host_path_entry.cc +++ b/src/xenia/kernel/fs/devices/host_path_entry.cc @@ -140,9 +140,25 @@ X_STATUS HostPathEntry::Open(KernelState* kernel_state, Mode mode, bool async, // TODO(benvanik): plumb through proper disposition/access mode. DWORD desired_access = is_read_only() ? GENERIC_READ : (GENERIC_READ | GENERIC_WRITE); - // mode == Mode::READ ? GENERIC_READ : (GENERIC_READ | GENERIC_WRITE); + if (mode == Mode::READ_APPEND) { + desired_access |= FILE_APPEND_DATA; + } DWORD share_mode = FILE_SHARE_READ; - DWORD creation_disposition = mode == Mode::READ ? OPEN_EXISTING : OPEN_ALWAYS; + DWORD creation_disposition; + switch (mode) { + case Mode::READ: + creation_disposition = OPEN_EXISTING; + break; + case Mode::READ_WRITE: + creation_disposition = OPEN_ALWAYS; + break; + case Mode::READ_APPEND: + creation_disposition = OPEN_EXISTING; + break; + default: + assert_unhandled_case(mode); + break; + } DWORD flags_and_attributes = async ? FILE_FLAG_OVERLAPPED : 0; HANDLE file = CreateFile(local_path_.c_str(), desired_access, share_mode, NULL, @@ -150,7 +166,7 @@ X_STATUS HostPathEntry::Open(KernelState* kernel_state, Mode mode, bool async, flags_and_attributes | FILE_FLAG_BACKUP_SEMANTICS, NULL); if (file == INVALID_HANDLE_VALUE) { // TODO(benvanik): pick correct response. - return X_STATUS_ACCESS_DENIED; + return X_STATUS_NO_SUCH_FILE; } *out_file = new HostPathFile(kernel_state, mode, this, file); diff --git a/src/xenia/kernel/fs/entry.h b/src/xenia/kernel/fs/entry.h index 441dcd981..0cb5bacbf 100644 --- a/src/xenia/kernel/fs/entry.h +++ b/src/xenia/kernel/fs/entry.h @@ -35,6 +35,7 @@ class Device; enum class Mode { READ, READ_WRITE, + READ_APPEND, }; class MemoryMapping { diff --git a/src/xenia/kernel/kernel_state.cc b/src/xenia/kernel/kernel_state.cc index 4a6676cbf..4d64d7f2e 100644 --- a/src/xenia/kernel/kernel_state.cc +++ b/src/xenia/kernel/kernel_state.cc @@ -97,6 +97,19 @@ void KernelState::RegisterModule(XModule* module) {} void KernelState::UnregisterModule(XModule* module) {} +bool KernelState::IsKernelModule(const char* name) { + if (!name) { + // executing module isn't a kernel module + return false; + } else if (strcasecmp(name, "xam.xex") == 0) { + return true; + } else if (strcasecmp(name, "xboxkrnl.exe") == 0) { + return true; + } + + return false; +} + XModule* KernelState::GetModule(const char* name) { if (!name) { // NULL name = self. @@ -114,7 +127,7 @@ XModule* KernelState::GetModule(const char* name) { // Some games request this, for some reason. wtf. return nullptr; } else { - std::lock_guard lock(object_mutex_); + std::lock_guard lock(object_mutex_); for (XUserModule* module : user_modules_) { if ((strcasecmp(xe::find_name_from_path(module->path()).c_str(), name) == @@ -163,7 +176,7 @@ XUserModule* KernelState::LoadUserModule(const char* raw_name) { XUserModule* module = nullptr; { - std::lock_guard lock(object_mutex_); + std::lock_guard lock(object_mutex_); // See if we've already loaded it for (XUserModule* existing_module : user_modules_) { @@ -205,12 +218,12 @@ XUserModule* KernelState::LoadUserModule(const char* raw_name) { } void KernelState::RegisterThread(XThread* thread) { - std::lock_guard lock(object_mutex_); + std::lock_guard lock(object_mutex_); threads_by_id_[thread->thread_id()] = thread; } void KernelState::UnregisterThread(XThread* thread) { - std::lock_guard lock(object_mutex_); + std::lock_guard lock(object_mutex_); auto it = threads_by_id_.find(thread->thread_id()); if (it != threads_by_id_.end()) { threads_by_id_.erase(it); @@ -218,7 +231,7 @@ void KernelState::UnregisterThread(XThread* thread) { } void KernelState::OnThreadExecute(XThread* thread) { - std::lock_guard lock(object_mutex_); + std::lock_guard lock(object_mutex_); // Must be called on executing thread. assert_true(XThread::GetCurrentThread() == thread); @@ -241,7 +254,7 @@ void KernelState::OnThreadExecute(XThread* thread) { } void KernelState::OnThreadExit(XThread* thread) { - std::lock_guard lock(object_mutex_); + std::lock_guard lock(object_mutex_); // Must be called on executing thread. assert_true(XThread::GetCurrentThread() == thread); @@ -264,7 +277,7 @@ void KernelState::OnThreadExit(XThread* thread) { } XThread* KernelState::GetThreadByID(uint32_t thread_id) { - std::lock_guard lock(object_mutex_); + std::lock_guard lock(object_mutex_); XThread* thread = nullptr; auto it = threads_by_id_.find(thread_id); if (it != threads_by_id_.end()) { @@ -276,7 +289,7 @@ XThread* KernelState::GetThreadByID(uint32_t thread_id) { } void KernelState::RegisterNotifyListener(XNotifyListener* listener) { - std::lock_guard lock(object_mutex_); + std::lock_guard lock(object_mutex_); notify_listeners_.push_back(listener); // Games seem to expect a few notifications on startup, only for the first @@ -300,7 +313,7 @@ void KernelState::RegisterNotifyListener(XNotifyListener* listener) { } void KernelState::UnregisterNotifyListener(XNotifyListener* listener) { - std::lock_guard lock(object_mutex_); + std::lock_guard lock(object_mutex_); for (auto it = notify_listeners_.begin(); it != notify_listeners_.end(); ++it) { if (*it == listener) { @@ -311,7 +324,7 @@ void KernelState::UnregisterNotifyListener(XNotifyListener* listener) { } void KernelState::BroadcastNotification(XNotificationID id, uint32_t data) { - std::lock_guard lock(object_mutex_); + std::lock_guard lock(object_mutex_); for (auto it = notify_listeners_.begin(); it != notify_listeners_.end(); ++it) { (*it)->EnqueueNotification(id, data); diff --git a/src/xenia/kernel/kernel_state.h b/src/xenia/kernel/kernel_state.h index 5e705eb08..f70f123e6 100644 --- a/src/xenia/kernel/kernel_state.h +++ b/src/xenia/kernel/kernel_state.h @@ -63,13 +63,14 @@ class KernelState { ContentManager* content_manager() const { return content_manager_.get(); } ObjectTable* object_table() const { return object_table_; } - std::mutex& object_mutex() { return object_mutex_; } + std::recursive_mutex& object_mutex() { return object_mutex_; } uint32_t process_type() const { return process_type_; } void set_process_type(uint32_t value) { process_type_ = value; } void RegisterModule(XModule* module); void UnregisterModule(XModule* module); + bool IsKernelModule(const char* name); XModule* GetModule(const char* name); XUserModule* GetExecutableModule(); void SetExecutableModule(XUserModule* module); @@ -105,7 +106,7 @@ class KernelState { std::unique_ptr content_manager_; ObjectTable* object_table_; - std::mutex object_mutex_; + std::recursive_mutex object_mutex_; std::unordered_map threads_by_id_; std::vector notify_listeners_; bool has_notified_startup_; diff --git a/src/xenia/kernel/objects/xthread.cc b/src/xenia/kernel/objects/xthread.cc index cc9f2fda4..623787f04 100644 --- a/src/xenia/kernel/objects/xthread.cc +++ b/src/xenia/kernel/objects/xthread.cc @@ -34,6 +34,7 @@ XThread::XThread(KernelState* kernel_state, uint32_t stack_size, : XObject(kernel_state, kTypeThread), thread_id_(++next_xthread_id), thread_handle_(0), + pcr_address_(0), thread_state_address_(0), thread_state_(0), event_(NULL), @@ -76,7 +77,7 @@ XThread::~XThread() { } kernel_state()->memory()->SystemHeapFree(scratch_address_); kernel_state()->memory()->SystemHeapFree(tls_address_); - kernel_state()->memory()->SystemHeapFree(thread_state_address_); + kernel_state()->memory()->SystemHeapFree(pcr_address_); if (thread_handle_) { // TODO(benvanik): platform kill @@ -105,8 +106,8 @@ uint32_t XThread::GetCurrentThreadHandle() { return thread->handle(); } -uint32_t XThread::GetCurrentThreadId(const uint8_t* thread_state_block) { - return xe::load_and_swap(thread_state_block + 0x14C); +uint32_t XThread::GetCurrentThreadId(const uint8_t* pcr) { + return xe::load_and_swap(pcr + 0x2D8 + 0x14C); } uint32_t XThread::last_error() { @@ -137,14 +138,15 @@ X_STATUS XThread::Create() { // 0x160: last error // So, at offset 0x100 we have a 4b pointer to offset 200, then have the // structure. - thread_state_address_ = memory()->SystemHeapAlloc(0xAB0); + pcr_address_ = memory()->SystemHeapAlloc(0x2D8 + 0xAB0); + thread_state_address_ = pcr_address_ + 0x2D8; if (!thread_state_address_) { XELOGW("Unable to allocate thread state block"); return X_STATUS_NO_MEMORY; } // Set native info. - SetNativePointer(thread_state_address_); + SetNativePointer(thread_state_address_, true); XUserModule* module = kernel_state()->GetExecutableModule(); @@ -154,8 +156,12 @@ X_STATUS XThread::Create() { scratch_address_ = memory()->SystemHeapAlloc(scratch_size_); // Allocate TLS block. - const xe_xex2_header_t* header = module->xex_header(); - uint32_t tls_size = header->tls_info.slot_count * header->tls_info.data_size; + uint32_t tls_size = 32; // Default 32 (is this OK?) + if (module && module->xex_header()) { + const xe_xex2_header_t* header = module->xex_header(); + tls_size = header->tls_info.slot_count * header->tls_info.data_size; + } + tls_address_ = memory()->SystemHeapAlloc(tls_size); if (!tls_address_) { XELOGW("Unable to allocate thread local storage block"); @@ -163,9 +169,40 @@ X_STATUS XThread::Create() { return X_STATUS_NO_MEMORY; } - // Copy in default TLS info. - // TODO(benvanik): is this correct? - memory()->Copy(tls_address_, header->tls_info.raw_data_address, tls_size); + // Copy in default TLS info (or zero it out) + if (module && module->xex_header()) { + const xe_xex2_header_t* header = module->xex_header(); + + // Copy in default TLS info. + // TODO(benvanik): is this correct? + memory()->Copy(tls_address_, header->tls_info.raw_data_address, tls_size); + } else { + memory()->Fill(tls_address_, tls_size, 0); + } + + if (module) { + module->Release(); + } + + // Allocate processor thread state. + // This is thread safe. + thread_state_ = new ThreadState(kernel_state()->processor(), thread_id_, + ThreadStackType::kUserStack, 0, + creation_params_.stack_size, pcr_address_); + XELOGI("XThread%04X (%X) Stack: %.8X-%.8X", handle(), + thread_state_->thread_id(), thread_state_->stack_limit(), + thread_state_->stack_base()); + + uint8_t* pcr = memory()->TranslateVirtual(pcr_address_); + std::memset(pcr, 0x0, 0x2D8 + 0xAB0); // Zero the PCR + xe::store_and_swap(pcr + 0x000, tls_address_); + xe::store_and_swap(pcr + 0x030, pcr_address_); + xe::store_and_swap(pcr + 0x070, thread_state_->stack_address() + + thread_state_->stack_size()); + xe::store_and_swap(pcr + 0x074, thread_state_->stack_address()); + xe::store_and_swap(pcr + 0x100, thread_state_address_); + xe::store_and_swap (pcr + 0x10C, 1); // Current CPU(?) + xe::store_and_swap(pcr + 0x150, 0); // DPC active bool? // Setup the thread state block (last error/etc). uint8_t* p = memory()->TranslateVirtual(thread_state_address_); @@ -180,6 +217,9 @@ X_STATUS XThread::Create() { xe::store_and_swap(p + 0x04C, thread_state_address_ + 0x018); xe::store_and_swap(p + 0x054, 0x102); xe::store_and_swap(p + 0x056, 1); + xe::store_and_swap( + p + 0x05C, thread_state_->stack_address() + thread_state_->stack_size()); + xe::store_and_swap(p + 0x060, thread_state_->stack_address()); xe::store_and_swap(p + 0x068, tls_address_); xe::store_and_swap(p + 0x06C, 0); xe::store_and_swap(p + 0x074, thread_state_address_ + 0x074); @@ -192,7 +232,8 @@ X_STATUS XThread::Create() { // A88 = APC // 18 = timer xe::store_and_swap(p + 0x09C, 0xFDFFD7FF); - xe::store_and_swap(p + 0x100, thread_state_address_); + xe::store_and_swap( + p + 0x0D0, thread_state_->stack_address() + thread_state_->stack_size()); FILETIME t; GetSystemTimeAsFileTime(&t); xe::store_and_swap( @@ -200,32 +241,18 @@ X_STATUS XThread::Create() { xe::store_and_swap(p + 0x144, thread_state_address_ + 0x144); xe::store_and_swap(p + 0x148, thread_state_address_ + 0x144); xe::store_and_swap(p + 0x14C, thread_id_); - // TODO(benvanik): figure out why RtlGetLastError changes on this: - // xe::store_and_swap(p + 0x150, creation_params_.start_address); + xe::store_and_swap(p + 0x150, creation_params_.start_address); xe::store_and_swap(p + 0x154, thread_state_address_ + 0x154); xe::store_and_swap(p + 0x158, thread_state_address_ + 0x154); xe::store_and_swap(p + 0x160, 0); // last error xe::store_and_swap(p + 0x16C, creation_params_.creation_flags); xe::store_and_swap(p + 0x17C, 1); - // Allocate processor thread state. - // This is thread safe. - thread_state_ = - new ThreadState(kernel_state()->processor(), thread_id_, 0, - creation_params_.stack_size, thread_state_address_); - - xe::store_and_swap( - p + 0x05C, thread_state_->stack_address() + thread_state_->stack_size()); - xe::store_and_swap(p + 0x060, thread_state_->stack_address()); - xe::store_and_swap( - p + 0x0D0, thread_state_->stack_address() + thread_state_->stack_size()); - SetNativePointer(thread_state_address_); X_STATUS return_code = PlatformCreate(); if (XFAILED(return_code)) { XELOGW("Unable to create platform thread (%.8X)", return_code); - module->Release(); return return_code; } @@ -238,7 +265,6 @@ X_STATUS XThread::Create() { SetAffinity(proc_mask); } - module->Release(); return X_STATUS_SUCCESS; } @@ -509,7 +535,19 @@ void XThread::RundownAPCs() { int32_t XThread::QueryPriority() { return GetThreadPriority(thread_handle_); } void XThread::SetPriority(int32_t increment) { - SetThreadPriority(thread_handle_, increment); + int target_priority = 0; + if (increment > 0x11) { + target_priority = THREAD_PRIORITY_HIGHEST; + } else if (increment > 0) { + target_priority = THREAD_PRIORITY_ABOVE_NORMAL; + } else if (increment < -0x22) { + target_priority = THREAD_PRIORITY_IDLE; + } else if (increment < -0x11) { + target_priority = THREAD_PRIORITY_LOWEST; + } else { + target_priority = THREAD_PRIORITY_NORMAL; + } + SetThreadPriority(thread_handle_, target_priority); } void XThread::SetAffinity(uint32_t affinity) { @@ -583,5 +621,28 @@ X_STATUS XThread::Delay(uint32_t processor_mode, uint32_t alertable, void* XThread::GetWaitHandle() { return event_->GetWaitHandle(); } +XHostThread::XHostThread(KernelState* kernel_state, uint32_t stack_size, + uint32_t creation_flags, std::function host_fn): + XThread(kernel_state, stack_size, 0, 0, 0, creation_flags), + host_fn_(host_fn) { +} + +void XHostThread::Execute() { + XELOGKERNEL("XThread::Execute thid %d (handle=%.8X, '%s', native=%.8X, )", + thread_id_, handle(), name_.c_str(), + xe::threading::current_thread_id()); + + // Let the kernel know we are starting. + kernel_state()->OnThreadExecute(this); + + int ret = host_fn_(); + + // Let the kernel know we are exiting. + kernel_state()->OnThreadExit(this); + + // Exit. + Exit(ret); +} + } // namespace kernel } // namespace xe diff --git a/src/xenia/kernel/objects/xthread.h b/src/xenia/kernel/objects/xthread.h index d210d7edd..481ecb26f 100644 --- a/src/xenia/kernel/objects/xthread.h +++ b/src/xenia/kernel/objects/xthread.h @@ -33,8 +33,9 @@ class XThread : public XObject { static XThread* GetCurrentThread(); static uint32_t GetCurrentThreadHandle(); - static uint32_t GetCurrentThreadId(const uint8_t* thread_state_block); + static uint32_t GetCurrentThreadId(const uint8_t* pcr); + uint32_t pcr_ptr() const { return pcr_address_; } uint32_t thread_state_ptr() const { return thread_state_address_; } cpu::ThreadState* thread_state() const { return thread_state_; } uint32_t thread_id() const { return thread_id_; } @@ -46,7 +47,7 @@ class XThread : public XObject { X_STATUS Create(); X_STATUS Exit(int exit_code); - void Execute(); + virtual void Execute(); static void EnterCriticalRegion(); static void LeaveCriticalRegion(); @@ -68,7 +69,7 @@ class XThread : public XObject { virtual void* GetWaitHandle(); - private: + protected: X_STATUS PlatformCreate(); void PlatformDestroy(); X_STATUS PlatformExit(int exit_code); @@ -89,6 +90,7 @@ class XThread : public XObject { uint32_t scratch_address_; uint32_t scratch_size_; uint32_t tls_address_; + uint32_t pcr_address_; uint32_t thread_state_address_; cpu::ThreadState* thread_state_; @@ -101,6 +103,17 @@ class XThread : public XObject { XEvent* event_; }; +class XHostThread : public XThread { + public: + XHostThread(KernelState* kernel_state, uint32_t stack_size, + uint32_t creation_flags, std::function host_fn); + + virtual void Execute(); + + private: + std::function host_fn_; +}; + } // namespace kernel } // namespace xe diff --git a/src/xenia/kernel/objects/xuser_module.cc b/src/xenia/kernel/objects/xuser_module.cc index 3d83a1b48..230627477 100644 --- a/src/xenia/kernel/objects/xuser_module.cc +++ b/src/xenia/kernel/objects/xuser_module.cc @@ -129,7 +129,7 @@ X_STATUS XUserModule::LoadFromMemory(const void* addr, const size_t length) { // Prepare the module for execution. // Runtime takes ownership. - auto xex_module = std::make_unique(processor); + auto xex_module = std::make_unique(processor, kernel_state()); if (!xex_module->Load(name_, path_, xex_)) { return X_STATUS_UNSUCCESSFUL; } @@ -351,18 +351,38 @@ void XUserModule::Dump() { int unimpl_count = 0; for (size_t m = 0; m < import_info_count; m++) { const xe_xex2_import_info_t* info = &import_infos[m]; - KernelExport* kernel_export = + + if (kernel_state_->IsKernelModule(library->name)) { + KernelExport* kernel_export = export_resolver->GetExportByOrdinal(library->name, info->ordinal); - if (kernel_export) { - known_count++; - if (kernel_export->is_implemented) { - impl_count++; + if (kernel_export) { + known_count++; + if (kernel_export->is_implemented) { + impl_count++; + } else { + unimpl_count++; + } } else { + unknown_count++; unimpl_count++; } } else { - unknown_count++; - unimpl_count++; + // User module + XModule* module = kernel_state_->GetModule(library->name); + if (module) { + uint32_t export_addr = + module->GetProcAddressByOrdinal(info->ordinal); + if (export_addr) { + impl_count++; + known_count++; + } else { + unimpl_count++; + unknown_count++; + } + } else { + unimpl_count++; + unknown_count++; + } } } printf(" Total: %4u\n", uint32_t(import_info_count)); @@ -377,13 +397,23 @@ void XUserModule::Dump() { // Listing. for (size_t m = 0; m < import_info_count; m++) { const xe_xex2_import_info_t* info = &import_infos[m]; - KernelExport* kernel_export = - export_resolver->GetExportByOrdinal(library->name, info->ordinal); const char* name = "UNKNOWN"; bool implemented = false; - if (kernel_export) { - name = kernel_export->name; - implemented = kernel_export->is_implemented; + + KernelExport* kernel_export; + if (kernel_state_->IsKernelModule(library->name)) { + kernel_export = + export_resolver->GetExportByOrdinal(library->name, info->ordinal); + if (kernel_export) { + name = kernel_export->name; + implemented = kernel_export->is_implemented; + } + } else { + XModule* module = kernel_state_->GetModule(library->name); + if (module && module->GetProcAddressByOrdinal(info->ordinal)) { + // TODO: Name lookup + implemented = true; + } } if (kernel_export && kernel_export->type == KernelExport::Variable) { printf(" V %.8X %.3X (%3d) %s %s\n", info->value_address, diff --git a/src/xenia/kernel/util/xex2.cc b/src/xenia/kernel/util/xex2.cc index c46687aba..da6409b20 100644 --- a/src/xenia/kernel/util/xex2.cc +++ b/src/xenia/kernel/util/xex2.cc @@ -536,8 +536,12 @@ int xe_xex2_read_image_uncompressed(const xe_xex2_header_t *header, // Allocate in-place the XEX memory. const uint32_t exe_length = xex_length - header->exe_offset; uint32_t uncompressed_size = exe_length; - uint32_t alloc_result = memory->HeapAlloc( - header->exe_address, uncompressed_size, xe::MEMORY_FLAG_ZERO); + bool alloc_result = + memory->LookupHeap(header->exe_address) + ->AllocFixed( + header->exe_address, uncompressed_size, 4096, + xe::kMemoryAllocationReserve | xe::kMemoryAllocationCommit, + xe::kMemoryProtectRead | xe::kMemoryProtectWrite); if (!alloc_result) { XELOGE("Unable to allocate XEX memory at %.8X-%.8X.", header->exe_address, uncompressed_size); @@ -588,22 +592,26 @@ int xe_xex2_read_image_basic_compressed(const xe_xex2_header_t *header, // Calculate the total size of the XEX image from its headers. uint32_t total_size = 0; for (uint32_t i = 0; i < header->section_count; i++) { - xe_xex2_section_t& section = header->sections[i]; + xe_xex2_section_t §ion = header->sections[i]; total_size += section.info.page_count * section.page_size; } // Allocate in-place the XEX memory. - uint32_t alloc_result = memory->HeapAlloc( - header->exe_address, total_size, xe::MEMORY_FLAG_ZERO); + bool alloc_result = + memory->LookupHeap(header->exe_address) + ->AllocFixed( + header->exe_address, total_size, 4096, + xe::kMemoryAllocationReserve | xe::kMemoryAllocationCommit, + xe::kMemoryProtectRead | xe::kMemoryProtectWrite); if (!alloc_result) { XELOGE("Unable to allocate XEX memory at %.8X-%.8X.", header->exe_address, uncompressed_size); return 1; } uint8_t *buffer = memory->TranslateVirtual(header->exe_address); + std::memset(buffer, 0, total_size); // Quickly zero the contents. uint8_t *d = buffer; - std::memset(buffer, 0, uncompressed_size); uint32_t rk[4 * (MAXNR + 1)]; uint8_t ivec[16] = {0}; @@ -731,8 +739,12 @@ int xe_xex2_read_image_compressed(const xe_xex2_header_t *header, } // Allocate in-place the XEX memory. - uint32_t alloc_result = memory->HeapAlloc( - header->exe_address, uncompressed_size, xe::MEMORY_FLAG_ZERO); + bool alloc_result = + memory->LookupHeap(header->exe_address) + ->AllocFixed( + header->exe_address, uncompressed_size, 4096, + xe::kMemoryAllocationReserve | xe::kMemoryAllocationCommit, + xe::kMemoryProtectRead | xe::kMemoryProtectWrite); if (!alloc_result) { XELOGE("Unable to allocate XEX memory at %.8X-%.8X.", header->exe_address, uncompressed_size); @@ -1084,4 +1096,4 @@ uint32_t xe_xex2_lookup_export(xe_xex2_ref xex, uint16_t ordinal) { // No match return 0; -} \ No newline at end of file +} diff --git a/src/xenia/kernel/xam_info.cc b/src/xenia/kernel/xam_info.cc index 54726765e..58c4f2453 100644 --- a/src/xenia/kernel/xam_info.cc +++ b/src/xenia/kernel/xam_info.cc @@ -41,7 +41,7 @@ SHIM_CALL XGetAVPack_shim(PPCContext* ppc_state, KernelState* state) { SHIM_CALL XGetGameRegion_shim(PPCContext* ppc_state, KernelState* state) { XELOGD("XGetGameRegion()"); - SHIM_SET_RETURN_64(XEX_REGION_ALL); + SHIM_SET_RETURN_64(0xFFFF); } SHIM_CALL XGetLanguage_shim(PPCContext* ppc_state, KernelState* state) { diff --git a/src/xenia/kernel/xam_table.inc b/src/xenia/kernel/xam_table.inc index 37c04437e..b68bb130e 100644 --- a/src/xenia/kernel/xam_table.inc +++ b/src/xenia/kernel/xam_table.inc @@ -81,6 +81,8 @@ XE_EXPORT(xam, 0x00000051, NetDll_XNetReplaceKey, XE_EXPORT(xam, 0x00000052, NetDll_XNetGetXnAddrPlatform, Function, 0), XE_EXPORT(xam, 0x00000053, NetDll_XNetGetSystemLinkPort, Function, 0), XE_EXPORT(xam, 0x00000054, NetDll_XNetSetSystemLinkPort, Function, 0), +XE_EXPORT(xam, 0x00000055, xam_055, Function, 0), +XE_EXPORT(xam, 0x00000056, xam_056, Function, 0), XE_EXPORT(xam, 0x00000065, NetDll_XnpLoadConfigParams, Function, 0), XE_EXPORT(xam, 0x00000066, NetDll_XnpSaveConfigParams, Function, 0), XE_EXPORT(xam, 0x00000067, NetDll_XnpConfigUPnP, Function, 0), @@ -954,6 +956,7 @@ XE_EXPORT(xam, 0x0000048F, XuiSceneEnableTransitionDependency, XE_EXPORT(xam, 0x00000490, XamVoiceGetMicArrayAudioEx, Function, 0), XE_EXPORT(xam, 0x00000491, XamVoiceDisableMicArray, Function, 0), XE_EXPORT(xam, 0x00000497, XamVoiceIsActiveProcess, Function, 0), +XE_EXPORT(xam, 0x0000049E, XGetVideoCapabilities, Function, 0), XE_EXPORT(xam, 0x000004B0, XMPRegisterCodec, Function, 0), XE_EXPORT(xam, 0x00000514, XamIsCurrentTitleIptv, Function, 0), XE_EXPORT(xam, 0x00000515, XamIsIptvEnabled, Function, 0), diff --git a/src/xenia/kernel/xam_ui.cc b/src/xenia/kernel/xam_ui.cc index bab247f4b..628ad58d9 100644 --- a/src/xenia/kernel/xam_ui.cc +++ b/src/xenia/kernel/xam_ui.cc @@ -108,10 +108,27 @@ SHIM_CALL XamShowMessageBoxUI_shim(PPCContext* ppc_state, KernelState* state) { SHIM_SET_RETURN_32(X_ERROR_IO_PENDING); } +SHIM_CALL XamShowDirtyDiscErrorUI_shim(PPCContext* ppc_state, + KernelState* state) { + uint32_t user_index = SHIM_GET_ARG_32(0); + + XELOGD("XamShowDirtyDiscErrorUI(%d)", user_index); + + int button_pressed = 0; + TaskDialog(state->emulator()->main_window()->hwnd(), GetModuleHandle(nullptr), + L"Disc Read Error", + L"Game is claiming to be unable to read game data!", nullptr, + TDCBF_CLOSE_BUTTON, TD_ERROR_ICON, &button_pressed); + + // This is death, and should never return. + assert_always(); +} + } // namespace kernel } // namespace xe void xe::kernel::xam::RegisterUIExports( xe::cpu::ExportResolver* export_resolver, KernelState* state) { SHIM_SET_MAPPING("xam.xex", XamShowMessageBoxUI, state); + SHIM_SET_MAPPING("xam.xex", XamShowDirtyDiscErrorUI, state); } diff --git a/src/xenia/kernel/xam_video.cc b/src/xenia/kernel/xam_video.cc index a93555d25..3054f9b0f 100644 --- a/src/xenia/kernel/xam_video.cc +++ b/src/xenia/kernel/xam_video.cc @@ -7,6 +7,7 @@ ****************************************************************************** */ +#include "xenia/base/logging.h" #include "xenia/kernel/kernel_state.h" #include "xenia/kernel/util/shim_utils.h" #include "xenia/kernel/xam_private.h" @@ -20,13 +21,22 @@ void xeVdQueryVideoMode(X_VIDEO_MODE* video_mode); SHIM_CALL XGetVideoMode_shim(PPCContext* ppc_state, KernelState* state) { uint32_t video_mode_ptr = SHIM_GET_ARG_32(0); X_VIDEO_MODE* video_mode = (X_VIDEO_MODE*)SHIM_MEM_ADDR(video_mode_ptr); + + XELOGD("XGetVideoMode(%.8X)", video_mode_ptr); + xeVdQueryVideoMode(video_mode); } +SHIM_CALL XGetVideoCapabilities_shim(PPCContext* ppc_state, KernelState* state) { + XELOGD("XGetVideoCapabilities()"); + SHIM_SET_RETURN_32(0); +} + } // namespace kernel } // namespace xe void xe::kernel::xam::RegisterVideoExports( xe::cpu::ExportResolver* export_resolver, KernelState* state) { + SHIM_SET_MAPPING("xam.xex", XGetVideoCapabilities, state); SHIM_SET_MAPPING("xam.xex", XGetVideoMode, state); } diff --git a/src/xenia/kernel/xboxkrnl_io.cc b/src/xenia/kernel/xboxkrnl_io.cc index f9a747577..4e6723262 100644 --- a/src/xenia/kernel/xboxkrnl_io.cc +++ b/src/xenia/kernel/xboxkrnl_io.cc @@ -60,10 +60,13 @@ struct FileDisposition { }; struct FileAccess { - static const uint32_t X_GENERIC_READ = 1 << 0; - static const uint32_t X_GENERIC_WRITE = 1 << 1; - static const uint32_t X_GENERIC_EXECUTE = 1 << 2; - static const uint32_t X_GENERIC_ALL = 1 << 3; + static const uint32_t X_GENERIC_READ = 0x80000000; + static const uint32_t X_GENERIC_WRITE = 0x40000000; + static const uint32_t X_GENERIC_EXECUTE = 0x20000000; + static const uint32_t X_GENERIC_ALL = 0x10000000; + static const uint32_t X_FILE_READ_DATA = 0x00000001; + static const uint32_t X_FILE_WRITE_DATA = 0x00000002; + static const uint32_t X_FILE_APPEND_DATA = 0x00000004; }; X_STATUS NtCreateFile(PPCContext* ppc_state, KernelState* state, @@ -100,9 +103,11 @@ X_STATUS NtCreateFile(PPCContext* ppc_state, KernelState* state, entry = fs->ResolvePath(object_name); } - if (creation_disposition != FileDisposition::X_FILE_OPEN || - desired_access & FileAccess::X_GENERIC_WRITE || - desired_access & FileAccess::X_GENERIC_ALL) { + bool wants_write = desired_access & FileAccess::X_GENERIC_WRITE || + desired_access & FileAccess::X_GENERIC_ALL || + desired_access & FileAccess::X_FILE_WRITE_DATA || + desired_access & FileAccess::X_FILE_APPEND_DATA; + if (wants_write) { if (entry && entry->is_read_only()) { // We don't support any write modes. XELOGW("Attempted to open the file/dir for create/write"); @@ -116,10 +121,15 @@ X_STATUS NtCreateFile(PPCContext* ppc_state, KernelState* state, info = X_FILE_DOES_NOT_EXIST; } else { // Open the file/directory. - result = fs->Open(std::move(entry), state, - desired_access & FileAccess::X_GENERIC_WRITE - ? fs::Mode::READ_WRITE - : fs::Mode::READ, + fs::Mode mode; + if (desired_access & FileAccess::X_FILE_APPEND_DATA) { + mode = fs::Mode::READ_APPEND; + } else if (wants_write) { + mode = fs::Mode::READ_WRITE; + } else { + mode = fs::Mode::READ; + } + result = fs->Open(std::move(entry), state, mode, false, // TODO(benvanik): pick async mode, if needed. &file); } diff --git a/src/xenia/kernel/xboxkrnl_memory.cc b/src/xenia/kernel/xboxkrnl_memory.cc index c9e64a4b6..89ff0f9db 100644 --- a/src/xenia/kernel/xboxkrnl_memory.cc +++ b/src/xenia/kernel/xboxkrnl_memory.cc @@ -17,19 +17,55 @@ namespace xe { namespace kernel { +uint32_t ToXdkProtectFlags(uint32_t protect) { + uint32_t result = 0; + if (!(protect & kMemoryProtectRead) && !(protect & kMemoryProtectWrite)) { + result = X_PAGE_NOACCESS; + } else if ((protect & kMemoryProtectRead) && + !(protect & kMemoryProtectWrite)) { + result = X_PAGE_READONLY; + } else { + result = X_PAGE_READWRITE; + } + if (protect & kMemoryProtectNoCache) { + result = X_PAGE_NOCACHE; + } + if (protect & kMemoryProtectWriteCombine) { + result = X_PAGE_WRITECOMBINE; + } + return result; +} + +uint32_t FromXdkProtectFlags(uint32_t protect) { + uint32_t result = 0; + if ((protect & X_PAGE_READONLY) | (protect & X_PAGE_EXECUTE_READ)) { + result |= kMemoryProtectRead; + } else if ((protect & X_PAGE_READWRITE) | + (protect & X_PAGE_EXECUTE_READWRITE)) { + result |= kMemoryProtectRead | kMemoryProtectWrite; + } + if (protect & X_PAGE_NOCACHE) { + result |= kMemoryProtectNoCache; + } + if (protect & X_PAGE_WRITECOMBINE) { + result |= kMemoryProtectWriteCombine; + } + return result; +} + SHIM_CALL NtAllocateVirtualMemory_shim(PPCContext* ppc_state, KernelState* state) { uint32_t base_addr_ptr = SHIM_GET_ARG_32(0); uint32_t base_addr_value = SHIM_MEM_32(base_addr_ptr); uint32_t region_size_ptr = SHIM_GET_ARG_32(1); uint32_t region_size_value = SHIM_MEM_32(region_size_ptr); - uint32_t allocation_type = SHIM_GET_ARG_32(2); // X_MEM_* bitmask - uint32_t protect_bits = SHIM_GET_ARG_32(3); // X_PAGE_* bitmask + uint32_t alloc_type = SHIM_GET_ARG_32(2); // X_MEM_* bitmask + uint32_t protect_bits = SHIM_GET_ARG_32(3); // X_PAGE_* bitmask uint32_t unknown = SHIM_GET_ARG_32(4); XELOGD("NtAllocateVirtualMemory(%.8X(%.8X), %.8X(%.8X), %.8X, %.8X, %.8X)", base_addr_ptr, base_addr_value, region_size_ptr, region_size_value, - allocation_type, protect_bits, unknown); + alloc_type, protect_bits, unknown); // NTSTATUS // _Inout_ PVOID *BaseAddress, @@ -52,12 +88,12 @@ SHIM_CALL NtAllocateVirtualMemory_shim(PPCContext* ppc_state, return; } // Check allocation type. - if (!(allocation_type & (X_MEM_COMMIT | X_MEM_RESET | X_MEM_RESERVE))) { + if (!(alloc_type & (X_MEM_COMMIT | X_MEM_RESET | X_MEM_RESERVE))) { SHIM_SET_RETURN_32(X_STATUS_INVALID_PARAMETER); return; } // If MEM_RESET is set only MEM_RESET can be set. - if (allocation_type & X_MEM_RESET && (allocation_type & ~X_MEM_RESET)) { + if (alloc_type & X_MEM_RESET && (alloc_type & ~X_MEM_RESET)) { SHIM_SET_RETURN_32(X_STATUS_INVALID_PARAMETER); return; } @@ -68,37 +104,60 @@ SHIM_CALL NtAllocateVirtualMemory_shim(PPCContext* ppc_state, } // Adjust size. - uint32_t adjusted_size = region_size_value; - // TODO(benvanik): adjust based on page size flags/etc? - - // TODO(benvanik): support different allocation types. - // Right now we treat everything as a commit and ignore allocations that have - // already happened. - if (base_addr_value) { - // Having a pointer already means that this is likely a follow-on COMMIT. - assert_true(!(allocation_type & X_MEM_RESERVE) && - (allocation_type & X_MEM_COMMIT)); - SHIM_SET_MEM_32(base_addr_ptr, base_addr_value); - SHIM_SET_MEM_32(region_size_ptr, adjusted_size); - SHIM_SET_RETURN_32(X_STATUS_SUCCESS); - return; + uint32_t page_size = 4096; + if (alloc_type & X_MEM_LARGE_PAGES) { + page_size = 64 * 1024; } + if (int32_t(region_size_value) < 0) { + // Some games pass in negative sizes. + region_size_value = -int32_t(region_size_value); + } + uint32_t adjusted_size = xe::round_up(region_size_value, page_size); // Allocate. - uint32_t flags = (allocation_type & X_MEM_NOZERO) ? 0 : MEMORY_FLAG_ZERO; - uint32_t addr = (uint32_t)state->memory()->HeapAlloc(base_addr_value, - adjusted_size, flags); - if (!addr) { + uint32_t allocation_type = 0; + if (alloc_type & X_MEM_RESERVE) { + allocation_type |= kMemoryAllocationReserve; + } + if (alloc_type & X_MEM_COMMIT) { + allocation_type |= kMemoryAllocationCommit; + } + if (alloc_type & X_MEM_RESET) { + XELOGE("X_MEM_RESET not implemented"); + assert_always(); + } + uint32_t protect = FromXdkProtectFlags(protect_bits); + uint32_t address = 0; + if (base_addr_value) { + auto heap = state->memory()->LookupHeap(base_addr_value); + if (heap->AllocFixed(base_addr_value, adjusted_size, page_size, + allocation_type, protect)) { + address = base_addr_value; + } + } else { + bool top_down = !!(alloc_type & X_MEM_TOP_DOWN); + auto heap = state->memory()->LookupHeapByType(false, page_size); + heap->Alloc(adjusted_size, page_size, allocation_type, protect, top_down, + &address); + } + if (!address) { // Failed - assume no memory available. SHIM_SET_RETURN_32(X_STATUS_NO_MEMORY); return; } - XELOGD("NtAllocateVirtualMemory = %.8X", addr); + // Zero memory, if needed. + if (address && !(alloc_type & X_MEM_NOZERO)) { + if (alloc_type & X_MEM_COMMIT) { + state->memory()->Zero(address, adjusted_size); + } + } + + XELOGD("NtAllocateVirtualMemory = %.8X", address); // Stash back. // Maybe set X_STATUS_ALREADY_COMMITTED if MEM_COMMIT? - SHIM_SET_MEM_32(base_addr_ptr, addr); + SHIM_SET_MEM_32(base_addr_ptr, address); SHIM_SET_MEM_32(region_size_ptr, adjusted_size); SHIM_SET_RETURN_32(X_STATUS_SUCCESS); } @@ -130,22 +189,24 @@ SHIM_CALL NtFreeVirtualMemory_shim(PPCContext* ppc_state, KernelState* state) { return; } - // TODO(benvanik): ignore decommits for now. + auto heap = state->memory()->LookupHeap(base_addr_value); + bool result = false; if (free_type == X_MEM_DECOMMIT) { - SHIM_SET_RETURN_32(X_STATUS_SUCCESS); - return; - } + // If zero, we may need to query size (free whole region). + assert_not_zero(region_size_value); - // Free. - uint32_t flags = 0; - uint32_t freed_size = state->memory()->HeapFree(base_addr_value, flags); - if (!freed_size) { + region_size_value = xe::round_up(region_size_value, heap->page_size()); + result = heap->Decommit(base_addr_value, region_size_value); + } else { + result = heap->Release(base_addr_value, ®ion_size_value); + } + if (!result) { SHIM_SET_RETURN_32(X_STATUS_UNSUCCESSFUL); return; } SHIM_SET_MEM_32(base_addr_ptr, base_addr_value); - SHIM_SET_MEM_32(region_size_ptr, freed_size); + SHIM_SET_MEM_32(region_size_ptr, region_size_value); SHIM_SET_RETURN_32(X_STATUS_SUCCESS); } @@ -168,9 +229,9 @@ SHIM_CALL NtQueryVirtualMemory_shim(PPCContext* ppc_state, KernelState* state) { XELOGD("NtQueryVirtualMemory(%.8X, %.8X)", base_address, memory_basic_information_ptr); - AllocationInfo alloc_info; - size_t result = state->memory()->QueryInformation(base_address, &alloc_info); - if (!result) { + auto heap = state->memory()->LookupHeap(base_address); + HeapAllocationInfo alloc_info; + if (!heap->QueryRegionInfo(base_address, &alloc_info)) { SHIM_SET_RETURN_32(X_STATUS_INVALID_PARAMETER); return; } @@ -179,15 +240,21 @@ SHIM_CALL NtQueryVirtualMemory_shim(PPCContext* ppc_state, KernelState* state) { static_cast(alloc_info.base_address); memory_basic_information->allocation_base = static_cast(alloc_info.allocation_base); - memory_basic_information->allocation_protect = alloc_info.allocation_protect; + memory_basic_information->allocation_protect = + ToXdkProtectFlags(alloc_info.allocation_protect); memory_basic_information->region_size = static_cast(alloc_info.region_size); - memory_basic_information->state = alloc_info.state; - memory_basic_information->protect = alloc_info.protect; + uint32_t x_state = 0; + if (alloc_info.state & kMemoryAllocationReserve) { + x_state |= X_MEM_RESERVE; + } + if (alloc_info.state & kMemoryAllocationCommit) { + x_state |= X_MEM_COMMIT; + } + memory_basic_information->state = x_state; + memory_basic_information->protect = ToXdkProtectFlags(alloc_info.protect); memory_basic_information->type = alloc_info.type; - XELOGE("NtQueryVirtualMemory NOT IMPLEMENTED"); - SHIM_SET_RETURN_32(X_STATUS_SUCCESS); } @@ -242,27 +309,20 @@ SHIM_CALL MmAllocatePhysicalMemoryEx_shim(PPCContext* ppc_state, assert_true(min_addr_range == 0); assert_true(max_addr_range == 0xFFFFFFFF); - // Allocate. - uint32_t flags = MEMORY_FLAG_PHYSICAL; - uint32_t base_address = (uint32_t)state->memory()->HeapAlloc( - 0, adjusted_size, flags, adjusted_alignment); - if (!base_address) { + uint32_t allocation_type = kMemoryAllocationReserve | kMemoryAllocationCommit; + uint32_t protect = FromXdkProtectFlags(protect_bits); + bool top_down = true; + auto heap = state->memory()->LookupHeapByType(true, page_size); + uint32_t base_address; + if (!heap->AllocRange(min_addr_range, max_addr_range, adjusted_size, + adjusted_alignment, allocation_type, protect, top_down, + &base_address)) { // Failed - assume no memory available. SHIM_SET_RETURN_32(0); return; } XELOGD("MmAllocatePhysicalMemoryEx = %.8X", base_address); - // Move the address into the right range. - // if (protect_bits & X_MEM_LARGE_PAGES) { - // base_address += 0xA0000000; - //} else if (protect_bits & X_MEM_16MB_PAGES) { - // base_address += 0xC0000000; - //} else { - // base_address += 0xE0000000; - //} - base_address += 0xA0000000; - SHIM_SET_RETURN_64(base_address); } @@ -274,14 +334,10 @@ SHIM_CALL MmFreePhysicalMemory_shim(PPCContext* ppc_state, KernelState* state) { // base_address = result of MmAllocatePhysicalMemory. - // Strip off physical bits before passing down. - base_address &= ~0xE0000000; + assert_true((base_address & 0x1F) == 0); - // TODO(benvanik): free memory. - XELOGE("xeMmFreePhysicalMemory NOT IMPLEMENTED"); - // uint32_t size = ?; - // xe_memory_heap_free( - // state->memory(), base_address, size); + auto heap = state->memory()->LookupHeap(base_address); + heap->Release(base_address); } SHIM_CALL MmQueryAddressProtect_shim(PPCContext* ppc_state, @@ -290,7 +346,12 @@ SHIM_CALL MmQueryAddressProtect_shim(PPCContext* ppc_state, XELOGD("MmQueryAddressProtect(%.8X)", base_address); - uint32_t access = state->memory()->QueryProtect(base_address); + auto heap = state->memory()->LookupHeap(base_address); + uint32_t access; + if (!heap->QueryProtect(base_address, &access)) { + access = 0; + } + access = ToXdkProtectFlags(access); SHIM_SET_RETURN_32(access); } @@ -301,9 +362,13 @@ SHIM_CALL MmQueryAllocationSize_shim(PPCContext* ppc_state, XELOGD("MmQueryAllocationSize(%.8X)", base_address); - size_t size = state->memory()->QuerySize(base_address); + auto heap = state->memory()->LookupHeap(base_address); + uint32_t size; + if (!heap->QuerySize(base_address, &size)) { + size = 0; + } - SHIM_SET_RETURN_32(static_cast(size)); + SHIM_SET_RETURN_32(size); } SHIM_CALL MmQueryStatistics_shim(PPCContext* ppc_state, KernelState* state) { @@ -372,19 +437,12 @@ SHIM_CALL MmGetPhysicalAddress_shim(PPCContext* ppc_state, KernelState* state) { // ); // base_address = result of MmAllocatePhysicalMemory. - // We are always using virtual addresses, right now, since we don't need - // physical ones. We could munge up the address here to another mapped view - // of memory. + uint32_t physical_address = base_address & 0x1FFFFFFF; + if (base_address >= 0xE0000000) { + physical_address += 0x1000; + } - /*if (protect_bits & X_MEM_LARGE_PAGES) { - base_address |= 0xA0000000; - } else if (protect_bits & X_MEM_16MB_PAGES) { - base_address |= 0xC0000000; - } else { - base_address |= 0xE0000000; - }*/ - - SHIM_SET_RETURN_64(base_address); + SHIM_SET_RETURN_64(physical_address); } SHIM_CALL MmMapIoSpace_shim(PPCContext* ppc_state, KernelState* state) { diff --git a/src/xenia/kernel/xboxkrnl_ob.cc b/src/xenia/kernel/xboxkrnl_ob.cc index 8e8a874c5..71cbcc68c 100644 --- a/src/xenia/kernel/xboxkrnl_ob.cc +++ b/src/xenia/kernel/xboxkrnl_ob.cc @@ -83,6 +83,11 @@ SHIM_CALL ObReferenceObjectByHandle_shim(PPCContext* ppc_state, } break; } } break; + case 0xD017BEEF: { // ExSemaphoreObjectType + // TODO(benvanik): implement. + assert_unhandled_case(object_type_ptr); + native_ptr = 0xDEADF00D; + } break; case 0xD01BBEEF: { // ExThreadObjectType XThread* thread = (XThread*)object; native_ptr = thread->thread_state_ptr(); diff --git a/src/xenia/kernel/xboxkrnl_rtl.cc b/src/xenia/kernel/xboxkrnl_rtl.cc index a10c51e11..30310634c 100644 --- a/src/xenia/kernel/xboxkrnl_rtl.cc +++ b/src/xenia/kernel/xboxkrnl_rtl.cc @@ -522,8 +522,8 @@ SHIM_CALL RtlEnterCriticalSection_shim(PPCContext* ppc_state, // XELOGD("RtlEnterCriticalSection(%.8X)", cs_ptr); - const uint8_t* thread_state_block = SHIM_MEM_ADDR(ppc_state->r[13]); - uint32_t thread_id = XThread::GetCurrentThreadId(thread_state_block); + const uint8_t* pcr = SHIM_MEM_ADDR(ppc_state->r[13]); + uint32_t thread_id = XThread::GetCurrentThreadId(pcr); auto cs = (X_RTL_CRITICAL_SECTION*)SHIM_MEM_ADDR(cs_ptr); @@ -564,8 +564,8 @@ SHIM_CALL RtlTryEnterCriticalSection_shim(PPCContext* ppc_state, // XELOGD("RtlTryEnterCriticalSection(%.8X)", cs_ptr); - const uint8_t* thread_state_block = SHIM_MEM_ADDR(ppc_state->r[13]); - uint32_t thread_id = XThread::GetCurrentThreadId(thread_state_block); + const uint8_t* pcr = SHIM_MEM_ADDR(ppc_state->r[13]); + uint32_t thread_id = XThread::GetCurrentThreadId(pcr); auto cs = (X_RTL_CRITICAL_SECTION*)SHIM_MEM_ADDR(cs_ptr); diff --git a/src/xenia/kernel/xboxkrnl_threading.cc b/src/xenia/kernel/xboxkrnl_threading.cc index ca8bc6c12..91e575cb5 100644 --- a/src/xenia/kernel/xboxkrnl_threading.cc +++ b/src/xenia/kernel/xboxkrnl_threading.cc @@ -64,6 +64,9 @@ void AssertNoNameCollision(KernelState* state, uint32_t obj_attributes_ptr) { // with a success of NAME_EXISTS. // If the name exists and its type doesn't match, we do NAME_COLLISION. // Otherwise, we add like normal. + if (!obj_attributes_ptr) { + return; + } uint32_t name_str_ptr = xe::load_and_swap( state->memory()->TranslateVirtual(obj_attributes_ptr + 4)); if (name_str_ptr) { @@ -460,9 +463,7 @@ SHIM_CALL NtCreateEvent_shim(PPCContext* ppc_state, KernelState* state) { // TODO(benvanik): check for name collision. May return existing object if // type matches. - if (obj_attributes_ptr) { - AssertNoNameCollision(state, obj_attributes_ptr); - } + AssertNoNameCollision(state, obj_attributes_ptr); XEvent* ev = new XEvent(state); ev->Initialize(!event_type, !!initial_state); @@ -1304,6 +1305,35 @@ SHIM_CALL KeRemoveQueueDpc_shim(PPCContext* ppc_state, KernelState* state) { SHIM_SET_RETURN_64(result ? 1 : 0); } +std::mutex global_list_mutex_; + +// http://www.nirsoft.net/kernel_struct/vista/SLIST_HEADER.html +SHIM_CALL InterlockedPopEntrySList_shim(PPCContext* ppc_state, + KernelState* state) { + uint32_t plist_ptr = SHIM_GET_ARG_32(0); + + XELOGD("InterlockedPopEntrySList(%.8X)", plist_ptr); + + std::lock_guard lock(global_list_mutex_); + + uint8_t* p = state->memory()->TranslateVirtual(plist_ptr); + auto first = xe::load_and_swap(p); + if (first == 0) { + // List empty! + SHIM_SET_RETURN_32(0); + return; + } + + uint8_t* p2 = state->memory()->TranslateVirtual(first); + auto second = xe::load_and_swap(p2); + + // Now drop the first element + xe::store_and_swap(p, second); + + // Return the one we popped + SHIM_SET_RETURN_32(first); +} + } // namespace kernel } // namespace xe @@ -1379,4 +1409,6 @@ void xe::kernel::xboxkrnl::RegisterThreadingExports( SHIM_SET_MAPPING("xboxkrnl.exe", KeInitializeDpc, state); SHIM_SET_MAPPING("xboxkrnl.exe", KeInsertQueueDpc, state); SHIM_SET_MAPPING("xboxkrnl.exe", KeRemoveQueueDpc, state); + + SHIM_SET_MAPPING("xboxkrnl.exe", InterlockedPopEntrySList, state); } diff --git a/src/xenia/kernel/xboxkrnl_video.cc b/src/xenia/kernel/xboxkrnl_video.cc index dabf9741c..d660c16d5 100644 --- a/src/xenia/kernel/xboxkrnl_video.cc +++ b/src/xenia/kernel/xboxkrnl_video.cc @@ -380,6 +380,11 @@ SHIM_CALL VdPersistDisplay_shim(PPCContext* ppc_state, KernelState* state) { // unk1_ptr needs to be populated with a pointer passed to // MmFreePhysicalMemory(1, *unk1_ptr). + auto heap = state->memory()->LookupHeapByType(true, 16 * 1024); + uint32_t unk1_value; + heap->Alloc(64, 32, kMemoryAllocationReserve | kMemoryAllocationCommit, + kMemoryProtectNoAccess, false, &unk1_value); + SHIM_SET_MEM_32(unk1_ptr, unk1_value); // ? SHIM_SET_RETURN_64(1); diff --git a/src/xenia/kernel/xobject.cc b/src/xenia/kernel/xobject.cc index 88474ed87..7dc1ffb8a 100644 --- a/src/xenia/kernel/xobject.cc +++ b/src/xenia/kernel/xobject.cc @@ -113,6 +113,7 @@ X_STATUS XObject::Wait(uint32_t wait_reason, uint32_t processor_mode, // Or X_STATUS_ALERTED? return X_STATUS_USER_APC; case WAIT_TIMEOUT: + YieldProcessor(); return X_STATUS_TIMEOUT; default: case WAIT_FAILED: @@ -151,13 +152,16 @@ X_STATUS XObject::WaitMultiple(uint32_t count, XObject** objects, return result; } -void XObject::SetNativePointer(uint32_t native_ptr) { - std::lock_guard lock(kernel_state_->object_mutex()); +void XObject::SetNativePointer(uint32_t native_ptr, bool uninitialized) { + std::lock_guard lock(kernel_state_->object_mutex()); auto header = kernel_state_->memory()->TranslateVirtual(native_ptr); - assert_true(!(header->wait_list_blink & 0x1)); + // Memory uninitialized, so don't bother with the check. + if (!uninitialized) { + assert_true(!(header->wait_list_blink & 0x1)); + } // Stash pointer in struct. uint64_t object_ptr = reinterpret_cast(this); @@ -177,7 +181,7 @@ XObject* XObject::GetObject(KernelState* kernel_state, void* native_ptr, // We identify this by checking the low bit of wait_list_blink - if it's 1, // we have already put our pointer in there. - std::lock_guard lock(kernel_state->object_mutex()); + std::lock_guard lock(kernel_state->object_mutex()); auto header = reinterpret_cast(native_ptr); diff --git a/src/xenia/kernel/xobject.h b/src/xenia/kernel/xobject.h index 762acc398..dcfb1f43a 100644 --- a/src/xenia/kernel/xobject.h +++ b/src/xenia/kernel/xobject.h @@ -78,7 +78,7 @@ class XObject { virtual void* GetWaitHandle() { return 0; } protected: - void SetNativePointer(uint32_t native_ptr); + void SetNativePointer(uint32_t native_ptr, bool uninitialized = false); static uint32_t TimeoutTicksToMs(int64_t timeout_ticks); diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc index 928f9af78..45fd00ac4 100644 --- a/src/xenia/memory.cc +++ b/src/xenia/memory.cc @@ -18,8 +18,6 @@ #include "xenia/base/math.h" #include "xenia/cpu/mmio_handler.h" -using namespace xe; - // TODO(benvanik): move xbox.h out #include "xenia/xbox.h" @@ -27,28 +25,15 @@ using namespace xe; #include #endif // WIN32 -#define MSPACES 1 -#define USE_LOCKS 0 -#define USE_DL_PREFIX 1 -#define HAVE_MORECORE 0 -#define HAVE_MREMAP 0 -#define malloc_getpagesize 4096 -#define DEFAULT_GRANULARITY 64 * 1024 -#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T -#define MALLOC_ALIGNMENT 32 -#define MALLOC_INSPECT_ALL 1 -#if XE_DEBUG -#define FOOTERS 0 -#endif // XE_DEBUG -#include "third_party/dlmalloc/malloc.c.h" - -DEFINE_bool(log_heap, false, "Log heap structure on alloc/free."); -DEFINE_uint64( - heap_guard_pages, 0, - "Allocate the given number of guard pages around all heap chunks."); DEFINE_bool(scribble_heap, false, "Scribble 0xCD into all allocated heap memory."); +namespace xe { + +uint32_t get_page_count(uint32_t value, uint32_t page_size) { + return xe::round_up(value, page_size) / page_size; +} + /** * Memory map: * 0x00000000 - 0x3FFFFFFF (1024mb) - virtual 4k pages @@ -81,40 +66,11 @@ DEFINE_bool(scribble_heap, false, * this. */ -const uint32_t kMemoryPhysicalHeapLow = 0x00010000; -const uint32_t kMemoryPhysicalHeapHigh = 0x20000000; -const uint32_t kMemoryVirtualHeapLow = 0x20000000; -const uint32_t kMemoryVirtualHeapHigh = 0x40000000; +static Memory* active_memory_ = nullptr; -class xe::MemoryHeap { - public: - MemoryHeap(Memory* memory, bool is_physical); - ~MemoryHeap(); - - int Initialize(uint32_t low, uint32_t high); - - uint32_t Alloc(uint32_t base_address, uint32_t size, uint32_t flags, - uint32_t alignment); - uint32_t Free(uint32_t address, uint32_t size); - uint32_t QuerySize(uint32_t base_address); - - void Dump(); - - private: - static uint32_t next_heap_id_; - static void DumpHandler(void* start, void* end, size_t used_bytes, - void* context); - - private: - Memory* memory_; - uint32_t heap_id_; - bool is_physical_; - std::mutex lock_; - uint32_t size_; - uint8_t* ptr_; - mspace space_; -}; -uint32_t MemoryHeap::next_heap_id_ = 1; +void CrashDump() { + active_memory_->DumpMap(); +} Memory::Memory() : virtual_membase_(nullptr), @@ -124,22 +80,26 @@ Memory::Memory() mapping_(0), mapping_base_(nullptr) { system_page_size_ = uint32_t(xe::page_size()); - virtual_heap_ = new MemoryHeap(this, false); - physical_heap_ = new MemoryHeap(this, true); + assert_zero(active_memory_); + active_memory_ = this; } Memory::~Memory() { + assert_true(active_memory_ == this); + active_memory_ = nullptr; + // Uninstall the MMIO handler, as we won't be able to service more // requests. mmio_handler_.reset(); - if (mapping_base_) { - // GPU writeback. - VirtualFree(TranslateVirtual(0xC0000000), 0x00100000, MEM_DECOMMIT); - } - - delete physical_heap_; - delete virtual_heap_; + heaps_.v00000000.Dispose(); + heaps_.v40000000.Dispose(); + heaps_.v80000000.Dispose(); + heaps_.v90000000.Dispose(); + heaps_.vA0000000.Dispose(); + heaps_.vC0000000.Dispose(); + heaps_.vE0000000.Dispose(); + heaps_.physical.Dispose(); // Unmap all views and close mapping. if (mapping_) { @@ -157,15 +117,15 @@ int Memory::Initialize() { // Create main page file-backed mapping. This is all reserved but // uncommitted (so it shouldn't expand page file). #if XE_PLATFORM_WIN32 - mapping_ = - CreateFileMapping(INVALID_HANDLE_VALUE, NULL, - PAGE_READWRITE | SEC_RESERVE, 1, 0, // entire 4gb space - NULL); + mapping_ = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, + PAGE_READWRITE | SEC_RESERVE, + // entire 4gb space + 512mb physical: + 1, 0x1FFFFFFF, NULL); #else char mapping_path[] = "/xenia/mapping/XXXXXX"; mktemp(mapping_path); mapping_ = shm_open(mapping_path, O_CREAT, 0); - ftruncate(mapping_, 0x100000000); + ftruncate(mapping_, 0x11FFFFFFF); #endif // XE_PLATFORM_WIN32 if (!mapping_) { XELOGE("Unable to reserve the 4gb guest address space."); @@ -189,66 +149,92 @@ int Memory::Initialize() { return 1; } virtual_membase_ = mapping_base_; - physical_membase_ = virtual_membase_; + physical_membase_ = mapping_base_ + 0x100000000ull; - // Prepare heaps. - virtual_heap_->Initialize(kMemoryVirtualHeapLow, kMemoryVirtualHeapHigh); - physical_heap_->Initialize(kMemoryPhysicalHeapLow, - kMemoryPhysicalHeapHigh - 0x1000); + // Prepare virtual heaps. + heaps_.v00000000.Initialize(virtual_membase_, 0x00000000, 0x40000000, 4096); + heaps_.v40000000.Initialize(virtual_membase_, 0x40000000, + 0x40000000 - 0x01000000, 64 * 1024); + heaps_.v80000000.Initialize(virtual_membase_, 0x80000000, 0x10000000, + 64 * 1024); + heaps_.v90000000.Initialize(virtual_membase_, 0x90000000, 0x10000000, 4096); + + // Prepare physical heaps. + heaps_.physical.Initialize(physical_membase_, 0x00000000, 0x20000000, 4096); + // HACK: should be 64k, but with us overlaying A and E it needs to be 4. + /*heaps_.vA0000000.Initialize(virtual_membase_, 0xA0000000, 0x20000000, + 64 * 1024, &heaps_.physical);*/ + heaps_.vA0000000.Initialize(virtual_membase_, 0xA0000000, 0x20000000, + 4 * 1024, &heaps_.physical); + heaps_.vC0000000.Initialize(virtual_membase_, 0xC0000000, 0x20000000, + 16 * 1024 * 1024, &heaps_.physical); + heaps_.vE0000000.Initialize(virtual_membase_, 0xE0000000, 0x1FD00000, 4096, + &heaps_.physical); + + // Take the first page at 0 so we can check for writes. + heaps_.v00000000.AllocFixed( + 0x00000000, 4096, 4096, + kMemoryAllocationReserve | kMemoryAllocationCommit, + // 0u); + kMemoryProtectRead | kMemoryProtectWrite); // GPU writeback. // 0xC... is physical, 0x7F... is virtual. We may need to overlay these. - VirtualAlloc(TranslatePhysical(0x00000000), 0x00100000, MEM_COMMIT, - PAGE_READWRITE); + heaps_.vC0000000.AllocFixed( + 0xC0000000, 0x01000000, 32, + kMemoryAllocationReserve | kMemoryAllocationCommit, + kMemoryProtectRead | kMemoryProtectWrite); // Add handlers for MMIO. - mmio_handler_ = cpu::MMIOHandler::Install(mapping_base_); + mmio_handler_ = + cpu::MMIOHandler::Install(virtual_membase_, physical_membase_); if (!mmio_handler_) { XELOGE("Unable to install MMIO handlers"); assert_always(); return 1; } - // I have no idea what this is, but games try to read/write there. - VirtualAlloc(TranslateVirtual(0x40000000), 0x00010000, MEM_COMMIT, - PAGE_READWRITE); - xe::store_and_swap(TranslateVirtual(0x40000000), 0x00C40000); - xe::store_and_swap(TranslateVirtual(0x40000004), 0x00010000); + // ? + uint32_t unk_phys_alloc; + heaps_.vA0000000.Alloc(0x340000, 64 * 1024, kMemoryAllocationReserve, + kMemoryProtectNoAccess, true, &unk_phys_alloc); return 0; } const static struct { - uint32_t virtual_address_start; - uint32_t virtual_address_end; - uint32_t target_address; + uint64_t virtual_address_start; + uint64_t virtual_address_end; + uint64_t target_address; } map_info[] = { - 0x00000000, 0x3FFFFFFF, - 0x00000000, // (1024mb) - virtual 4k pages - 0x40000000, 0x7EFFFFFF, - 0x40000000, // (1024mb) - virtual 64k pages (cont) - 0x7F000000, 0x7F0FFFFF, - 0x00000000, // (1mb) - GPU writeback - 0x7F100000, 0x7FFFFFFF, - 0x00100000, // (15mb) - XPS? - 0x80000000, 0x8FFFFFFF, - 0x80000000, // (256mb) - xex 64k pages - 0x90000000, 0x9FFFFFFF, - 0x80000000, // (256mb) - xex 4k pages - 0xA0000000, 0xBFFFFFFF, - 0x00000000, // (512mb) - physical 64k pages - 0xC0000000, 0xDFFFFFFF, - 0x00000000, // - physical 16mb pages - 0xE0000000, 0xFFFFFFFF, - 0x00000000, // - physical 4k pages + // (1024mb) - virtual 4k pages + 0x00000000, 0x3FFFFFFF, 0x0000000000000000ull, + // (1024mb) - virtual 64k pages (cont) + 0x40000000, 0x7EFFFFFF, 0x0000000040000000ull, + // (16mb) - GPU writeback + 15mb of XPS? + 0x7F000000, 0x7FFFFFFF, 0x0000000100000000ull, + // (256mb) - xex 64k pages + 0x80000000, 0x8FFFFFFF, 0x0000000080000000ull, + // (256mb) - xex 4k pages + 0x90000000, 0x9FFFFFFF, 0x0000000080000000ull, + // (512mb) - physical 64k pages + 0xA0000000, 0xBFFFFFFF, 0x0000000100000000ull, + // - physical 16mb pages + 0xC0000000, 0xDFFFFFFF, 0x0000000100000000ull, + // - physical 4k pages + 0xE0000000, 0xFFFFFFFF, 0x0000000100000000ull, + // - physical raw + 0x100000000, 0x11FFFFFFF, 0x0000000100000000ull, }; int Memory::MapViews(uint8_t* mapping_base) { assert_true(xe::countof(map_info) == xe::countof(views_.all_views)); for (size_t n = 0; n < xe::countof(map_info); n++) { #if XE_PLATFORM_WIN32 + DWORD target_address_low = static_cast(map_info[n].target_address); + DWORD target_address_high = + static_cast(map_info[n].target_address >> 32); views_.all_views[n] = reinterpret_cast(MapViewOfFileEx( - mapping_, FILE_MAP_ALL_ACCESS, 0x00000000, - (DWORD)map_info[n].target_address, + mapping_, FILE_MAP_ALL_ACCESS, target_address_high, target_address_low, map_info[n].virtual_address_end - map_info[n].virtual_address_start + 1, mapping_base + map_info[n].virtual_address_start)); #else @@ -281,6 +267,43 @@ void Memory::UnmapViews() { } } +BaseHeap* Memory::LookupHeap(uint32_t address) { + if (address < 0x40000000) { + return &heaps_.v00000000; + } else if (address < 0x80000000) { + return &heaps_.v40000000; + } else if (address < 0x90000000) { + return &heaps_.v80000000; + } else if (address < 0xA0000000) { + return &heaps_.v90000000; + } else if (address < 0xC0000000) { + return &heaps_.vA0000000; + } else if (address < 0xE0000000) { + return &heaps_.vC0000000; + } else { + return &heaps_.vE0000000; + } +} + +BaseHeap* Memory::LookupHeapByType(bool physical, uint32_t page_size) { + if (physical) { + if (page_size <= 4096) { + // HACK: should be vE0000000 + return &heaps_.vA0000000; + } else if (page_size <= 64 * 1024) { + return &heaps_.vA0000000; + } else { + return &heaps_.vC0000000; + } + } else { + if (page_size <= 4096) { + return &heaps_.v00000000; + } else { + return &heaps_.v40000000; + } + } +} + void Memory::Zero(uint32_t address, uint32_t size) { std::memset(TranslateVirtual(address), 0, size); } @@ -319,23 +342,27 @@ uint32_t Memory::SearchAligned(uint32_t start, uint32_t end, return 0; } -bool Memory::AddMappedRange(uint32_t address, uint32_t mask, uint32_t size, - void* context, cpu::MMIOReadCallback read_callback, - cpu::MMIOWriteCallback write_callback) { +bool Memory::AddVirtualMappedRange(uint32_t virtual_address, uint32_t mask, + uint32_t size, void* context, + cpu::MMIOReadCallback read_callback, + cpu::MMIOWriteCallback write_callback) { DWORD protect = PAGE_NOACCESS; - if (!VirtualAlloc(TranslateVirtual(address), size, MEM_COMMIT, protect)) { + if (!VirtualAlloc(TranslateVirtual(virtual_address), size, MEM_COMMIT, + protect)) { XELOGE("Unable to map range; commit/protect failed"); return false; } - return mmio_handler_->RegisterRange(address, mask, size, context, + return mmio_handler_->RegisterRange(virtual_address, mask, size, context, read_callback, write_callback); } -uintptr_t Memory::AddWriteWatch(uint32_t guest_address, uint32_t length, - cpu::WriteWatchCallback callback, - void* callback_context, void* callback_data) { - return mmio_handler_->AddWriteWatch(guest_address, length, callback, - callback_context, callback_data); +uintptr_t Memory::AddPhysicalWriteWatch(uint32_t physical_address, + uint32_t length, + cpu::WriteWatchCallback callback, + void* callback_context, + void* callback_data) { + return mmio_handler_->AddPhysicalWriteWatch( + physical_address, length, callback, callback_context, callback_data); } void Memory::CancelWriteWatch(uintptr_t watch_handle) { @@ -346,11 +373,15 @@ uint32_t Memory::SystemHeapAlloc(uint32_t size, uint32_t alignment, uint32_t system_heap_flags) { // TODO(benvanik): lightweight pool. bool is_physical = !!(system_heap_flags & kSystemHeapPhysical); - uint32_t flags = MEMORY_FLAG_ZERO; - if (is_physical) { - flags |= MEMORY_FLAG_PHYSICAL; + auto heap = LookupHeapByType(is_physical, 4096); + uint32_t address; + if (!heap->Alloc(size, alignment, + kMemoryAllocationReserve | kMemoryAllocationCommit, + kMemoryProtectRead | kMemoryProtectWrite, false, &address)) { + return 0; } - return HeapAlloc(0, size, flags, alignment); + Zero(address, size); + return address; } void Memory::SystemHeapFree(uint32_t address) { @@ -358,315 +389,760 @@ void Memory::SystemHeapFree(uint32_t address) { return; } // TODO(benvanik): lightweight pool. - HeapFree(address, 0); + auto heap = LookupHeapByType(false, 4096); + heap->Release(address); } -uint32_t Memory::HeapAlloc(uint32_t base_address, uint32_t size, uint32_t flags, - uint32_t alignment) { - // If we were given a base address we are outside of the normal heap and - // will place wherever asked (so long as it doesn't overlap the heap). - if (!base_address) { - // Normal allocation from the managed heap. - uint32_t result; - if (flags & MEMORY_FLAG_PHYSICAL) { - result = physical_heap_->Alloc(base_address, size, flags, alignment); - } else { - result = virtual_heap_->Alloc(base_address, size, flags, alignment); +void Memory::DumpMap() { + XELOGE("=================================================================="); + XELOGE("Memory Dump"); + XELOGE("=================================================================="); + XELOGE(" System Page Size: %d (%.8X)", system_page_size_, system_page_size_); + XELOGE(" Virtual Membase: %.16llX", virtual_membase_); + XELOGE(" Physical Membase: %.16llX", physical_membase_); + XELOGE(""); + XELOGE("------------------------------------------------------------------"); + XELOGE("Virtual Heaps"); + XELOGE("------------------------------------------------------------------"); + XELOGE(""); + heaps_.v00000000.DumpMap(); + heaps_.v40000000.DumpMap(); + heaps_.v80000000.DumpMap(); + heaps_.v90000000.DumpMap(); + XELOGE(""); + XELOGE("------------------------------------------------------------------"); + XELOGE("Physical Heaps"); + XELOGE("------------------------------------------------------------------"); + XELOGE(""); + heaps_.physical.DumpMap(); + heaps_.vA0000000.DumpMap(); + heaps_.vC0000000.DumpMap(); + heaps_.vE0000000.DumpMap(); + XELOGE(""); +} + +DWORD ToWin32ProtectFlags(uint32_t protect) { + DWORD result = 0; + if ((protect & kMemoryProtectRead) && !(protect & kMemoryProtectWrite)) { + result |= PAGE_READONLY; + } else if ((protect & kMemoryProtectRead) && + (protect & kMemoryProtectWrite)) { + result |= PAGE_READWRITE; + } else { + result |= PAGE_NOACCESS; + } + // if (protect & kMemoryProtectNoCache) { + // result |= PAGE_NOCACHE; + //} + // if (protect & kMemoryProtectWriteCombine) { + // result |= PAGE_WRITECOMBINE; + //} + return result; +} + +uint32_t FromWin32ProtectFlags(DWORD protect) { + uint32_t result = 0; + if (protect & PAGE_READONLY) { + result |= kMemoryProtectRead; + } else if (protect & PAGE_READWRITE) { + result |= kMemoryProtectRead | kMemoryProtectWrite; + } + if (protect & PAGE_NOCACHE) { + result |= kMemoryProtectNoCache; + } + if (protect & PAGE_WRITECOMBINE) { + result |= kMemoryProtectWriteCombine; + } + return result; +} + +BaseHeap::BaseHeap() + : membase_(nullptr), heap_base_(0), heap_size_(0), page_size_(0) {} + +BaseHeap::~BaseHeap() = default; + +void BaseHeap::Initialize(uint8_t* membase, uint32_t heap_base, + uint32_t heap_size, uint32_t page_size) { + membase_ = membase; + heap_base_ = heap_base; + heap_size_ = heap_size - 1; + page_size_ = page_size; + page_table_.resize(heap_size / page_size); +} + +void BaseHeap::Dispose() { + // Walk table and release all regions. + for (uint32_t page_number = 0; page_number < page_table_.size(); + ++page_number) { + auto& page_entry = page_table_[page_number]; + if (page_entry.state) { + VirtualFree(membase_ + heap_base_ + page_number * page_size_, 0, + MEM_RELEASE); + page_number += page_entry.region_page_count; } - if (result) { - if (flags & MEMORY_FLAG_ZERO) { - memset(TranslateVirtual(result), 0, size); + } +} + +void BaseHeap::DumpMap() { + std::lock_guard lock(heap_mutex_); + XELOGE("------------------------------------------------------------------"); + XELOGE("Heap: %.8X-%.8X", heap_base_, heap_base_ + heap_size_); + XELOGE("------------------------------------------------------------------"); + XELOGE(" Heap Base: %.8X", heap_base_); + XELOGE(" Heap Size: %d (%.8X)", heap_size_, heap_size_); + XELOGE(" Page Size: %d (%.8X)", page_size_, page_size_); + XELOGE(" Page Count: %lld", page_table_.size()); + bool is_empty_span = false; + uint32_t empty_span_start = 0; + for (uint32_t i = 0; i < uint32_t(page_table_.size()); ++i) { + auto& page = page_table_[i]; + if (!page.state) { + if (!is_empty_span) { + is_empty_span = true; + empty_span_start = i; } + continue; } - return result; - } else { - if (base_address >= kMemoryVirtualHeapLow && - base_address < kMemoryVirtualHeapHigh) { - // Overlapping managed heap. - assert_always(); - return 0; + if (is_empty_span) { + XELOGE(" %.8X-%.8X %6dp %10db unreserved", + heap_base_ + empty_span_start * page_size_, + heap_base_ + i * page_size_, i - empty_span_start, + (i - empty_span_start) * page_size_); + is_empty_span = false; } - if (base_address >= kMemoryPhysicalHeapLow && - base_address < kMemoryPhysicalHeapHigh) { - // Overlapping managed heap. - assert_always(); - return 0; + const char* state_name = " "; + if (page.state & kMemoryAllocationCommit) { + state_name = "COM"; + } else if (page.state & kMemoryAllocationReserve) { + state_name = "RES"; } - - uint8_t* p = TranslateVirtual(base_address); - // TODO(benvanik): check if address range is in use with a query. - - void* pv = VirtualAlloc(p, size, MEM_COMMIT, PAGE_READWRITE); - if (!pv) { - // Failed. - assert_always(); - return 0; - } - - if (flags & MEMORY_FLAG_ZERO) { - memset(pv, 0, size); - } - - return base_address; + char access_r = (page.current_protect & kMemoryProtectRead) ? 'R' : ' '; + char access_w = (page.current_protect & kMemoryProtectWrite) ? 'W' : ' '; + XELOGE(" %.8X-%.8X %6dp %10db %s %c%c", heap_base_ + i * page_size_, + heap_base_ + (i + page.region_page_count) * page_size_, + page.region_page_count, page.region_page_count * page_size_, + state_name, access_r, access_w); + i += page.region_page_count - 1; + } + if (is_empty_span) { + XELOGE(" %.8X-%.8X - %d unreserved pages)", + heap_base_ + empty_span_start * page_size_, heap_base_ + heap_size_, + page_table_.size() - empty_span_start); } } -int Memory::HeapFree(uint32_t address, uint32_t size) { - if (address >= kMemoryVirtualHeapLow && address < kMemoryVirtualHeapHigh) { - return virtual_heap_->Free(address, size) ? 0 : 1; - } else if (address >= kMemoryPhysicalHeapLow && - address < kMemoryPhysicalHeapHigh) { - return physical_heap_->Free(address, size) ? 0 : 1; - } else { - // A placed address. Decommit. - uint8_t* p = TranslateVirtual(address); - return VirtualFree(p, size, MEM_DECOMMIT) ? 0 : 1; - } +bool BaseHeap::Alloc(uint32_t size, uint32_t alignment, + uint32_t allocation_type, uint32_t protect, bool top_down, + uint32_t* out_address) { + *out_address = 0; + size = xe::round_up(size, page_size_); + alignment = xe::round_up(alignment, page_size_); + uint32_t low_address = heap_base_; + uint32_t high_address = heap_base_ + heap_size_; + return AllocRange(low_address, high_address, size, alignment, allocation_type, + protect, top_down, out_address); } -bool Memory::QueryInformation(uint32_t base_address, AllocationInfo* mem_info) { - uint8_t* p = TranslateVirtual(base_address); - MEMORY_BASIC_INFORMATION mbi; - if (!VirtualQuery(p, &mbi, sizeof(mbi))) { +bool BaseHeap::AllocFixed(uint32_t base_address, uint32_t size, + uint32_t alignment, uint32_t allocation_type, + uint32_t protect) { + alignment = xe::round_up(alignment, page_size_); + size = xe::align(size, alignment); + assert_true(base_address % alignment == 0); + uint32_t page_count = get_page_count(size, page_size_); + uint32_t start_page_number = (base_address - heap_base_) / page_size_; + uint32_t end_page_number = start_page_number + page_count - 1; + if (start_page_number >= page_table_.size() || + end_page_number > page_table_.size()) { + XELOGE("BaseHeap::Alloc passed out of range address range"); return false; } - mem_info->base_address = base_address; - mem_info->allocation_base = static_cast( - reinterpret_cast(mbi.AllocationBase) - virtual_membase_); - mem_info->allocation_protect = mbi.AllocationProtect; - mem_info->region_size = mbi.RegionSize; - mem_info->state = mbi.State; - mem_info->protect = mbi.Protect; - mem_info->type = mbi.Type; + + std::lock_guard lock(heap_mutex_); + + // - If we are reserving the entire range requested must not be already + // reserved. + // - If we are committing it's ok for pages within the range to already be + // committed. + for (uint32_t page_number = start_page_number; page_number <= end_page_number; + ++page_number) { + uint32_t state = page_table_[page_number].state; + if ((allocation_type == kMemoryAllocationReserve) && state) { + // Already reserved. + XELOGE("BaseHeap::Alloc attempting to reserve an already reserved range"); + return false; + } + if ((allocation_type == kMemoryAllocationCommit) && + !(state & kMemoryAllocationReserve)) { + // Attempting a commit-only op on an unreserved page. + XELOGE("BaseHeap::Alloc attempting commit on unreserved page"); + return false; + } + } + + // Allocate from host. + if (allocation_type == kMemoryAllocationReserve) { + // Reserve is not needed, as we are mapped already. + } else { + DWORD flAllocationType = 0; + if (allocation_type & kMemoryAllocationCommit) { + flAllocationType |= MEM_COMMIT; + } + LPVOID result = + VirtualAlloc(membase_ + heap_base_ + start_page_number * page_size_, + page_count * page_size_, flAllocationType, + ToWin32ProtectFlags(protect)); + if (!result) { + XELOGE("BaseHeap::Alloc failed to alloc range from host"); + return false; + } + + if (FLAGS_scribble_heap && protect & kMemoryProtectWrite) { + memset(result, 0xCD, page_count * page_size_); + } + } + + // Set page state. + for (uint32_t page_number = start_page_number; page_number <= end_page_number; + ++page_number) { + auto& page_entry = page_table_[page_number]; + if (allocation_type & kMemoryAllocationReserve) { + // Region is based on reservation. + page_entry.base_address = start_page_number; + page_entry.region_page_count = page_count; + } + page_entry.allocation_protect = protect; + page_entry.current_protect = protect; + page_entry.state = kMemoryAllocationReserve | allocation_type; + } + return true; } -uint32_t Memory::QuerySize(uint32_t base_address) { - if (base_address >= kMemoryVirtualHeapLow && - base_address < kMemoryVirtualHeapHigh) { - return virtual_heap_->QuerySize(base_address); - } else if (base_address >= kMemoryPhysicalHeapLow && - base_address < kMemoryPhysicalHeapHigh) { - return physical_heap_->QuerySize(base_address); +bool BaseHeap::AllocRange(uint32_t low_address, uint32_t high_address, + uint32_t size, uint32_t alignment, + uint32_t allocation_type, uint32_t protect, + bool top_down, uint32_t* out_address) { + *out_address = 0; + + alignment = xe::round_up(alignment, page_size_); + uint32_t page_count = get_page_count(size, page_size_); + low_address = std::max(heap_base_, xe::align(low_address, alignment)); + high_address = + std::min(heap_base_ + heap_size_, xe::align(high_address, alignment)); + uint32_t low_page_number = (low_address - heap_base_) / page_size_; + uint32_t high_page_number = (high_address - heap_base_) / page_size_; + low_page_number = std::min(uint32_t(page_table_.size()) - 1, low_page_number); + high_page_number = + std::min(uint32_t(page_table_.size()) - 1, high_page_number); + + std::lock_guard lock(heap_mutex_); + + // Find a free page range. + // The base page must match the requested alignment, so we first scan for + // a free aligned page and only then check for continuous free pages. + // TODO(benvanik): optimized searching (free list buckets, bitmap, etc). + uint32_t start_page_number = UINT_MAX; + uint32_t end_page_number = UINT_MAX; + uint32_t page_scan_stride = alignment / page_size_; + high_page_number = high_page_number - (high_page_number % page_scan_stride); + if (top_down) { + for (int64_t base_page_number = high_page_number - page_count; + base_page_number >= low_page_number; + base_page_number -= page_scan_stride) { + bool is_free = page_table_[base_page_number].state == 0; + if (page_table_[base_page_number].state != 0) { + // Base page not free, skip to next usable page. + continue; + } + // Check requested range to ensure free. + start_page_number = uint32_t(base_page_number); + end_page_number = uint32_t(base_page_number) + page_count - 1; + assert_true(end_page_number < page_table_.size()); + bool any_taken = false; + for (uint32_t page_number = uint32_t(base_page_number); + !any_taken && page_number <= end_page_number; ++page_number) { + bool is_free = page_table_[page_number].state == 0; + if (!is_free) { + // At least one page in the range is used, skip to next. + any_taken = true; + break; + } + } + if (!any_taken) { + // Found our place. + break; + } + // Retry. + start_page_number = end_page_number = UINT_MAX; + } } else { - // A placed address. - uint8_t* p = TranslateVirtual(base_address); - MEMORY_BASIC_INFORMATION mem_info; - if (VirtualQuery(p, &mem_info, sizeof(mem_info))) { - return uint32_t(mem_info.RegionSize); - } else { - // Error. - return 0; + for (uint32_t base_page_number = low_page_number; + base_page_number <= high_page_number; + base_page_number += page_scan_stride) { + bool is_free = page_table_[base_page_number].state == 0; + if (page_table_[base_page_number].state != 0) { + // Base page not free, skip to next usable page. + continue; + } + // Check requested range to ensure free. + start_page_number = base_page_number; + end_page_number = base_page_number + page_count - 1; + bool any_taken = false; + for (uint32_t page_number = base_page_number; + !any_taken && page_number <= end_page_number; ++page_number) { + bool is_free = page_table_[page_number].state == 0; + if (!is_free) { + // At least one page in the range is used, skip to next. + any_taken = true; + break; + } + } + if (!any_taken) { + // Found our place. + break; + } + // Retry. + start_page_number = end_page_number = UINT_MAX; } } -} - -int Memory::Protect(uint32_t address, uint32_t size, uint32_t access) { - uint8_t* p = TranslateVirtual(address); - - size_t heap_guard_size = FLAGS_heap_guard_pages * 4096; - p += heap_guard_size; - - DWORD new_protect = access; - new_protect = - new_protect & - (X_PAGE_NOACCESS | X_PAGE_READONLY | X_PAGE_READWRITE | X_PAGE_WRITECOPY | - X_PAGE_GUARD | X_PAGE_NOCACHE | X_PAGE_WRITECOMBINE); - - DWORD old_protect; - return VirtualProtect(p, size, new_protect, &old_protect) == TRUE ? 0 : 1; -} - -uint32_t Memory::QueryProtect(uint32_t address) { - uint8_t* p = TranslateVirtual(address); - MEMORY_BASIC_INFORMATION info; - size_t info_size = VirtualQuery((void*)p, &info, sizeof(info)); - if (!info_size) { - return 0; - } - return info.Protect; -} - -MemoryHeap::MemoryHeap(Memory* memory, bool is_physical) - : memory_(memory), is_physical_(is_physical) { - heap_id_ = next_heap_id_++; -} - -MemoryHeap::~MemoryHeap() { - if (space_) { - std::lock_guard guard(lock_); - destroy_mspace(space_); - space_ = NULL; + if (start_page_number == UINT_MAX || end_page_number == UINT_MAX) { + // Out of memory. + XELOGE("BaseHeap::Alloc failed to find contiguous range"); + assert_always("Heap exhausted!"); + return false; } - if (ptr_) { - VirtualFree(ptr_, 0, MEM_RELEASE); - } -} - -int MemoryHeap::Initialize(uint32_t low, uint32_t high) { - // Commit the memory where our heap will live and allocate it. - // TODO(benvanik): replace dlmalloc with an implementation that can commit - // as it goes. - size_ = high - low; - ptr_ = memory_->views_.v00000000 + low; - void* heap_result = VirtualAlloc(ptr_, size_, MEM_COMMIT, PAGE_READWRITE); - if (!heap_result) { - return 1; - } - space_ = create_mspace_with_base(ptr_, size_, 0); - - return 0; -} - -uint32_t MemoryHeap::Alloc(uint32_t base_address, uint32_t size, uint32_t flags, - uint32_t alignment) { - size_t alloc_size = size; - if (int32_t(alloc_size) < 0) { - alloc_size = uint32_t(-alloc_size); - } - size_t heap_guard_size = FLAGS_heap_guard_pages * 4096; - if (heap_guard_size) { - alignment = std::max(alignment, static_cast(heap_guard_size)); - alloc_size = - static_cast(xe::round_up(alloc_size, heap_guard_size)); - } - - lock_.lock(); - uint8_t* p = (uint8_t*)mspace_memalign(space_, alignment, - alloc_size + heap_guard_size * 2); - assert_true(reinterpret_cast(p) <= 0xFFFFFFFFFull); - if (FLAGS_heap_guard_pages) { - size_t real_size = mspace_usable_size(p); - DWORD old_protect; - VirtualProtect(p, heap_guard_size, PAGE_NOACCESS, &old_protect); - p += heap_guard_size; - VirtualProtect(p + alloc_size, heap_guard_size, PAGE_NOACCESS, - &old_protect); - } - if (FLAGS_log_heap) { - Dump(); - } - lock_.unlock(); - if (!p) { - return 0; - } - - if (is_physical_) { - // If physical, we need to commit the memory in the physical address ranges - // so that it can be accessed. - VirtualAlloc(memory_->views_.vA0000000 + (p - memory_->views_.v00000000), - alloc_size, MEM_COMMIT, PAGE_READWRITE); - VirtualAlloc(memory_->views_.vC0000000 + (p - memory_->views_.v00000000), - alloc_size, MEM_COMMIT, PAGE_READWRITE); - VirtualAlloc(memory_->views_.vE0000000 + (p - memory_->views_.v00000000), - alloc_size, MEM_COMMIT, PAGE_READWRITE); - } - - if (flags & MEMORY_FLAG_ZERO) { - memset(p, 0, alloc_size); - } else if (FLAGS_scribble_heap) { - // Trash the memory so that we can see bad read-before-write bugs easier. - memset(p, 0xCD, alloc_size); - } - - uint32_t address = - (uint32_t)((uintptr_t)p - (uintptr_t)memory_->mapping_base_); - - return address; -} - -uint32_t MemoryHeap::Free(uint32_t address, uint32_t size) { - uint8_t* p = memory_->TranslateVirtual(address); - - // Heap allocated address. - size_t heap_guard_size = FLAGS_heap_guard_pages * 4096; - p -= heap_guard_size; - size_t real_size = mspace_usable_size(p); - real_size -= heap_guard_size * 2; - if (!real_size) { - return 0; - } - - if (FLAGS_scribble_heap) { - // Trash the memory so that we can see bad read-before-write bugs easier. - memset(p + heap_guard_size, 0xDC, size); - } - - lock_.lock(); - if (FLAGS_heap_guard_pages) { - DWORD old_protect; - VirtualProtect(p, heap_guard_size, PAGE_READWRITE, &old_protect); - VirtualProtect(p + heap_guard_size + real_size, heap_guard_size, - PAGE_READWRITE, &old_protect); - } - mspace_free(space_, p); - if (FLAGS_log_heap) { - Dump(); - } - lock_.unlock(); - - if (is_physical_) { - // If physical, decommit from physical ranges too. - VirtualFree(memory_->views_.vA0000000 + (p - memory_->views_.v00000000), - size, MEM_DECOMMIT); - VirtualFree(memory_->views_.vC0000000 + (p - memory_->views_.v00000000), - size, MEM_DECOMMIT); - VirtualFree(memory_->views_.vE0000000 + (p - memory_->views_.v00000000), - size, MEM_DECOMMIT); - } - - return static_cast(real_size); -} - -uint32_t MemoryHeap::QuerySize(uint32_t base_address) { - uint8_t* p = memory_->TranslateVirtual(base_address); - - // Heap allocated address. - uint32_t heap_guard_size = uint32_t(FLAGS_heap_guard_pages * 4096); - p -= heap_guard_size; - uint32_t real_size = uint32_t(mspace_usable_size(p)); - real_size -= heap_guard_size * 2; - if (!real_size) { - return 0; - } - - return real_size; -} - -void MemoryHeap::Dump() { - XELOGI("MemoryHeap::Dump - %s", is_physical_ ? "physical" : "virtual"); - if (FLAGS_heap_guard_pages) { - XELOGI(" (heap guard pages enabled, stats will be wrong)"); - } - struct mallinfo info = mspace_mallinfo(space_); - XELOGI(" arena: %lld", info.arena); - XELOGI(" ordblks: %lld", info.ordblks); - XELOGI(" hblks: %lld", info.hblks); - XELOGI(" hblkhd: %lld", info.hblkhd); - XELOGI(" usmblks: %lld", info.usmblks); - XELOGI(" uordblks: %lld", info.uordblks); - XELOGI(" fordblks: %lld", info.fordblks); - XELOGI(" keepcost: %lld", info.keepcost); - mspace_inspect_all(space_, DumpHandler, this); -} - -void MemoryHeap::DumpHandler(void* start, void* end, size_t used_bytes, - void* context) { - MemoryHeap* heap = (MemoryHeap*)context; - Memory* memory = heap->memory_; - size_t heap_guard_size = FLAGS_heap_guard_pages * 4096; - uint64_t start_addr = (uint64_t)start + heap_guard_size; - uint64_t end_addr = (uint64_t)end - heap_guard_size; - uint32_t guest_start = - (uint32_t)(start_addr - (uintptr_t)memory->mapping_base_); - uint32_t guest_end = (uint32_t)(end_addr - (uintptr_t)memory->mapping_base_); - if (int32_t(end_addr - start_addr) > 0) { - XELOGI(" - %.8X-%.8X (%10db) %.16llX-%.16llX - %9db used", guest_start, - guest_end, (guest_end - guest_start), start_addr, end_addr, - used_bytes); + // Allocate from host. + if (allocation_type == kMemoryAllocationReserve) { + // Reserve is not needed, as we are mapped already. } else { - XELOGI(" - %.16llX-%.16llX - %9db used", - start, end, used_bytes); + DWORD flAllocationType = 0; + if (allocation_type & kMemoryAllocationCommit) { + flAllocationType |= MEM_COMMIT; + } + LPVOID result = + VirtualAlloc(membase_ + heap_base_ + start_page_number * page_size_, + page_count * page_size_, flAllocationType, + ToWin32ProtectFlags(protect)); + if (!result) { + XELOGE("BaseHeap::Alloc failed to alloc range from host"); + return false; + } + + if (FLAGS_scribble_heap && protect & kMemoryProtectWrite) { + std::memset(result, 0xCD, page_count * page_size_); + } } + + // Set page state. + for (uint32_t page_number = start_page_number; page_number <= end_page_number; + ++page_number) { + auto& page_entry = page_table_[page_number]; + page_entry.base_address = start_page_number; + page_entry.region_page_count = page_count; + page_entry.allocation_protect = protect; + page_entry.current_protect = protect; + page_entry.state = kMemoryAllocationReserve | allocation_type; + } + + *out_address = heap_base_ + start_page_number * page_size_; + return true; } + +bool BaseHeap::Decommit(uint32_t address, uint32_t size) { + uint32_t page_count = get_page_count(size, page_size_); + uint32_t start_page_number = (address - heap_base_) / page_size_; + uint32_t end_page_number = start_page_number + page_count - 1; + start_page_number = + std::min(uint32_t(page_table_.size()) - 1, start_page_number); + end_page_number = std::min(uint32_t(page_table_.size()) - 1, end_page_number); + + std::lock_guard lock(heap_mutex_); + + // Release from host. + // TODO(benvanik): find a way to actually decommit memory; + // mapped memory cannot be decommitted. + /*BOOL result = + VirtualFree(membase_ + heap_base_ + start_page_number * page_size_, + page_count * page_size_, MEM_DECOMMIT); + if (!result) { + PLOGW("BaseHeap::Decommit failed due to host VirtualFree failure"); + return false; + }*/ + + // Perform table change. + for (uint32_t page_number = start_page_number; page_number <= end_page_number; + ++page_number) { + auto& page_entry = page_table_[page_number]; + page_entry.state &= ~kMemoryAllocationCommit; + } + + return true; +} + +bool BaseHeap::Release(uint32_t base_address, uint32_t* out_region_size) { + std::lock_guard lock(heap_mutex_); + + // Given address must be a region base address. + uint32_t base_page_number = (base_address - heap_base_) / page_size_; + auto base_page_entry = page_table_[base_page_number]; + if (base_page_entry.base_address != base_page_number) { + XELOGE("BaseHeap::Release failed because address is not a region start"); + // return false; + } + + if (out_region_size) { + *out_region_size = base_page_entry.region_page_count * page_size_; + } + + // Release from host not needed as mapping reserves the range for us. + // TODO(benvanik): protect with NOACCESS? + /*BOOL result = VirtualFree( + membase_ + heap_base_ + base_page_number * page_size_, 0, MEM_RELEASE); + if (!result) { + PLOGE("BaseHeap::Release failed due to host VirtualFree failure"); + return false; + }*/ + // Instead, we just protect it, if we can. + if (page_size_ == xe::page_size() || + ((base_page_entry.region_page_count * page_size_) % xe::page_size() == + 0) && + ((base_page_number * page_size_) % xe::page_size() == 0)) { + DWORD old_protect; + if (!VirtualProtect(membase_ + heap_base_ + base_page_number * page_size_, + base_page_entry.region_page_count * page_size_, + PAGE_NOACCESS, &old_protect)) { + XELOGW("BaseHeap::Release failed due to host VirtualProtect failure"); + } + } + + // Perform table change. + uint32_t end_page_number = + base_page_number + base_page_entry.region_page_count - 1; + for (uint32_t page_number = base_page_number; page_number <= end_page_number; + ++page_number) { + auto& page_entry = page_table_[page_number]; + page_entry.qword = 0; + } + + return true; +} + +bool BaseHeap::Protect(uint32_t address, uint32_t size, uint32_t protect) { + uint32_t page_count = xe::round_up(size, page_size_) / page_size_; + uint32_t start_page_number = (address - heap_base_) / page_size_; + uint32_t end_page_number = start_page_number + page_count - 1; + start_page_number = + std::min(uint32_t(page_table_.size()) - 1, start_page_number); + end_page_number = std::min(uint32_t(page_table_.size()) - 1, end_page_number); + + std::lock_guard lock(heap_mutex_); + + // Ensure all pages are in the same reserved region and all are committed. + uint32_t first_base_address = UINT_MAX; + for (uint32_t page_number = start_page_number; page_number <= end_page_number; + ++page_number) { + auto page_entry = page_table_[page_number]; + if (first_base_address == UINT_MAX) { + first_base_address = page_entry.base_address; + } else if (first_base_address != page_entry.base_address) { + XELOGE("BaseHeap::Protect failed due to request spanning regions"); + return false; + } + if (!(page_entry.state & kMemoryAllocationCommit)) { + XELOGE("BaseHeap::Protect failed due to uncommitted page"); + return false; + } + } + + // Attempt host change (hopefully won't fail). + // We can only do this if our size matches system page granularity. + if (page_size_ == xe::page_size() || + ((page_count * page_size_) % xe::page_size() == 0) && + ((start_page_number * page_size_) % xe::page_size() == 0)) { + DWORD new_protect = ToWin32ProtectFlags(protect); + DWORD old_protect; + if (!VirtualProtect(membase_ + heap_base_ + start_page_number * page_size_, + page_count * page_size_, new_protect, &old_protect)) { + XELOGE("BaseHeap::Protect failed due to host VirtualProtect failure"); + return false; + } + } else { + XELOGW("BaseHeap::Protect: ignoring request as not 64k page aligned"); + } + + // Perform table change. + for (uint32_t page_number = start_page_number; page_number <= end_page_number; + ++page_number) { + auto& page_entry = page_table_[page_number]; + page_entry.current_protect = protect; + } + + return true; +} + +bool BaseHeap::QueryRegionInfo(uint32_t base_address, + HeapAllocationInfo* out_info) { + uint32_t start_page_number = (base_address - heap_base_) / page_size_; + if (start_page_number > page_table_.size()) { + XELOGE("BaseHeap::QueryRegionInfo base page out of range"); + return false; + } + + std::lock_guard lock(heap_mutex_); + + auto start_page_entry = page_table_[start_page_number]; + out_info->base_address = base_address; + out_info->allocation_base = 0; + out_info->allocation_protect = 0; + out_info->region_size = 0; + out_info->state = 0; + out_info->protect = 0; + out_info->type = 0; + if (start_page_entry.state) { + // Committed/reserved region. + out_info->allocation_base = start_page_entry.base_address * page_size_; + out_info->allocation_protect = start_page_entry.allocation_protect; + out_info->state = start_page_entry.state; + out_info->protect = start_page_entry.current_protect; + out_info->type = 0x20000; + for (uint32_t page_number = start_page_number; + page_number < start_page_number + start_page_entry.region_page_count; + ++page_number) { + auto page_entry = page_table_[page_number]; + if (page_entry.base_address != start_page_entry.base_address || + page_entry.state != start_page_entry.state || + page_entry.current_protect != start_page_entry.current_protect) { + // Different region or different properties within the region; done. + break; + } + out_info->region_size += page_size_; + } + } else { + // Free region. + for (uint32_t page_number = start_page_number; + page_number < page_table_.size(); ++page_number) { + auto page_entry = page_table_[page_number]; + if (page_entry.state) { + // First non-free page; done with region. + break; + } + out_info->region_size += page_size_; + } + } + return true; +} + +bool BaseHeap::QuerySize(uint32_t address, uint32_t* out_size) { + uint32_t page_number = (address - heap_base_) / page_size_; + if (page_number > page_table_.size()) { + XELOGE("BaseHeap::QuerySize base page out of range"); + *out_size = 0; + return false; + } + std::lock_guard lock(heap_mutex_); + auto page_entry = page_table_[page_number]; + *out_size = page_entry.region_page_count * page_size_; + return true; +} + +bool BaseHeap::QueryProtect(uint32_t address, uint32_t* out_protect) { + uint32_t page_number = (address - heap_base_) / page_size_; + if (page_number > page_table_.size()) { + XELOGE("BaseHeap::QueryProtect base page out of range"); + *out_protect = 0; + return false; + } + std::lock_guard lock(heap_mutex_); + auto page_entry = page_table_[page_number]; + *out_protect = page_entry.current_protect; + return true; +} + +uint32_t BaseHeap::GetPhysicalAddress(uint32_t address) { + // Only valid for memory in this range - will be bogus if the origin was + // outside of it. + uint32_t physical_address = address & 0x1FFFFFFF; + if (address >= 0xE0000000) { + physical_address += 0x1000; + } + return physical_address; +} + +VirtualHeap::VirtualHeap() = default; + +VirtualHeap::~VirtualHeap() = default; + +void VirtualHeap::Initialize(uint8_t* membase, uint32_t heap_base, + uint32_t heap_size, uint32_t page_size) { + BaseHeap::Initialize(membase, heap_base, heap_size, page_size); +} + +PhysicalHeap::PhysicalHeap() : parent_heap_(nullptr) {} + +PhysicalHeap::~PhysicalHeap() = default; + +void PhysicalHeap::Initialize(uint8_t* membase, uint32_t heap_base, + uint32_t heap_size, uint32_t page_size, + VirtualHeap* parent_heap) { + BaseHeap::Initialize(membase, heap_base, heap_size, page_size); + parent_heap_ = parent_heap; +} + +bool PhysicalHeap::Alloc(uint32_t size, uint32_t alignment, + uint32_t allocation_type, uint32_t protect, + bool top_down, uint32_t* out_address) { + *out_address = 0; + + // Default top-down. Since parent heap is bottom-up this prevents collisions. + top_down = true; + + // Adjust alignment size our page size differs from the parent. + size = xe::round_up(size, page_size_); + alignment = xe::round_up(alignment, page_size_); + + std::lock_guard lock(heap_mutex_); + + // Allocate from parent heap (gets our physical address in 0-512mb). + uint32_t parent_low_address = GetPhysicalAddress(heap_base_); + uint32_t parent_high_address = GetPhysicalAddress(heap_base_ + heap_size_); + uint32_t parent_address; + if (!parent_heap_->AllocRange(parent_low_address, parent_high_address, size, + alignment, allocation_type, protect, top_down, + &parent_address)) { + XELOGE( + "PhysicalHeap::Alloc unable to alloc physical memory in parent heap"); + return false; + } + if (heap_base_ >= 0xE0000000) { + parent_address -= 0x1000; + } + + // Given the address we've reserved in the parent heap, pin that here. + // Shouldn't be possible for it to be allocated already. + uint32_t address = heap_base_ + parent_address; + if (!BaseHeap::AllocFixed(address, size, alignment, allocation_type, + protect)) { + XELOGE( + "PhysicalHeap::Alloc unable to pin physical memory in physical heap"); + // TODO(benvanik): don't leak parent memory. + return false; + } + *out_address = address; + return true; +} + +bool PhysicalHeap::AllocFixed(uint32_t base_address, uint32_t size, + uint32_t alignment, uint32_t allocation_type, + uint32_t protect) { + // Adjust alignment size our page size differs from the parent. + size = xe::round_up(size, page_size_); + alignment = xe::round_up(alignment, page_size_); + + std::lock_guard lock(heap_mutex_); + + // Allocate from parent heap (gets our physical address in 0-512mb). + // NOTE: this can potentially overwrite heap contents if there are already + // committed pages in the requested physical range. + // TODO(benvanik): flag for ensure-not-committed? + uint32_t parent_base_address = GetPhysicalAddress(base_address); + if (!parent_heap_->AllocFixed(parent_base_address, size, alignment, + allocation_type, protect)) { + XELOGE( + "PhysicalHeap::Alloc unable to alloc physical memory in parent heap"); + return false; + } + if (heap_base_ >= 0xE0000000) { + parent_base_address -= 0x1000; + } + + // Given the address we've reserved in the parent heap, pin that here. + // Shouldn't be possible for it to be allocated already. + uint32_t address = heap_base_ + parent_base_address; + if (!BaseHeap::AllocFixed(address, size, alignment, allocation_type, + protect)) { + XELOGE( + "PhysicalHeap::Alloc unable to pin physical memory in physical heap"); + // TODO(benvanik): don't leak parent memory. + return false; + } + + return true; +} + +bool PhysicalHeap::AllocRange(uint32_t low_address, uint32_t high_address, + uint32_t size, uint32_t alignment, + uint32_t allocation_type, uint32_t protect, + bool top_down, uint32_t* out_address) { + *out_address = 0; + + // Adjust alignment size our page size differs from the parent. + size = xe::round_up(size, page_size_); + alignment = xe::round_up(alignment, page_size_); + + std::lock_guard lock(heap_mutex_); + + // Allocate from parent heap (gets our physical address in 0-512mb). + low_address = std::max(heap_base_, low_address); + high_address = std::min(heap_base_ + heap_size_, high_address); + uint32_t parent_low_address = GetPhysicalAddress(low_address); + uint32_t parent_high_address = GetPhysicalAddress(high_address); + uint32_t parent_address; + if (!parent_heap_->AllocRange(parent_low_address, parent_high_address, size, + alignment, allocation_type, protect, top_down, + &parent_address)) { + XELOGE( + "PhysicalHeap::Alloc unable to alloc physical memory in parent heap"); + return false; + } + if (heap_base_ >= 0xE0000000) { + parent_address -= 0x1000; + } + + // Given the address we've reserved in the parent heap, pin that here. + // Shouldn't be possible for it to be allocated already. + uint32_t address = heap_base_ + parent_address; + if (!BaseHeap::AllocFixed(address, size, alignment, allocation_type, + protect)) { + XELOGE( + "PhysicalHeap::Alloc unable to pin physical memory in physical heap"); + // TODO(benvanik): don't leak parent memory. + return false; + } + *out_address = address; + return true; +} + +bool PhysicalHeap::Decommit(uint32_t address, uint32_t size) { + std::lock_guard lock(heap_mutex_); + uint32_t parent_address = GetPhysicalAddress(address); + if (!parent_heap_->Decommit(parent_address, size)) { + XELOGE("PhysicalHeap::Decommit failed due to parent heap failure"); + return false; + } + return BaseHeap::Decommit(address, size); +} + +bool PhysicalHeap::Release(uint32_t base_address, uint32_t* out_region_size) { + std::lock_guard lock(heap_mutex_); + uint32_t parent_base_address = GetPhysicalAddress(base_address); + if (!parent_heap_->Release(parent_base_address, out_region_size)) { + XELOGE("PhysicalHeap::Release failed due to parent heap failure"); + return false; + } + return BaseHeap::Release(base_address, out_region_size); +} + +bool PhysicalHeap::Protect(uint32_t address, uint32_t size, uint32_t protect) { + std::lock_guard lock(heap_mutex_); + uint32_t parent_address = GetPhysicalAddress(address); + bool parent_result = parent_heap_->Protect(parent_address, size, protect); + if (!parent_result) { + XELOGE("PhysicalHeap::Protect failed due to parent heap failure"); + return false; + } + return BaseHeap::Protect(address, size, protect); +} + +} // namespace xe diff --git a/src/xenia/memory.h b/src/xenia/memory.h index c8c164939..14bb09095 100644 --- a/src/xenia/memory.h +++ b/src/xenia/memory.h @@ -12,6 +12,7 @@ #include #include +#include #include #include "xenia/base/platform.h" @@ -25,25 +26,129 @@ enum SystemHeapFlag : uint32_t { kSystemHeapDefault = kSystemHeapVirtual, }; -class MemoryHeap; -// TODO(benvanik): move to heap. -enum { - MEMORY_FLAG_64KB_PAGES = (1 << 1), - MEMORY_FLAG_ZERO = (1 << 2), - MEMORY_FLAG_PHYSICAL = (1 << 3), +enum MemoryAllocationFlag : uint32_t { + kMemoryAllocationReserve = 1 << 0, + kMemoryAllocationCommit = 1 << 1, +}; + +enum MemoryProtectFlag : uint32_t { + kMemoryProtectRead = 1 << 0, + kMemoryProtectWrite = 1 << 1, + kMemoryProtectNoCache = 1 << 2, + kMemoryProtectWriteCombine = 1 << 3, + + kMemoryProtectNoAccess = 0, }; -// TODO(benvanik): move to heap. // Equivalent to the Win32 MEMORY_BASIC_INFORMATION struct. -struct AllocationInfo { +struct HeapAllocationInfo { + // A pointer to the base address of the region of pages. uint32_t base_address; + // A pointer to the base address of a range of pages allocated by the + // VirtualAlloc function. The page pointed to by the BaseAddress member is + // contained within this allocation range. uint32_t allocation_base; - uint32_t allocation_protect; // TBD - size_t region_size; - uint32_t state; // TBD - uint32_t protect; // TBD - uint32_t type; // TBD + // The memory protection option when the region was initially allocated. + uint32_t allocation_protect; + // The size of the region beginning at the base address in which all pages + // have identical attributes, in bytes. + uint32_t region_size; + // The state of the pages in the region (commit/free/reserve). + uint32_t state; + // The access protection of the pages in the region. + uint32_t protect; + // The type of pages in the region (private). + uint32_t type; +}; + +union PageEntry { + struct { + uint32_t base_address : 20; // in 4k pages + uint32_t region_page_count : 20; // in 4k pages + uint32_t allocation_protect : 4; + uint32_t current_protect : 4; + uint32_t state : 2; + uint32_t reserved : 14; + }; + uint64_t qword; +}; + +class BaseHeap { + public: + virtual ~BaseHeap(); + + uint32_t page_size() const { return page_size_; } + + virtual void Dispose(); + + void DumpMap(); + + virtual bool Alloc(uint32_t size, uint32_t alignment, + uint32_t allocation_type, uint32_t protect, bool top_down, + uint32_t* out_address); + virtual bool AllocFixed(uint32_t base_address, uint32_t size, + uint32_t alignment, uint32_t allocation_type, + uint32_t protect); + virtual bool AllocRange(uint32_t low_address, uint32_t high_address, + uint32_t size, uint32_t alignment, + uint32_t allocation_type, uint32_t protect, + bool top_down, uint32_t* out_address); + virtual bool Decommit(uint32_t address, uint32_t size); + virtual bool Release(uint32_t address, uint32_t* out_region_size = nullptr); + virtual bool Protect(uint32_t address, uint32_t size, uint32_t protect); + + bool QueryRegionInfo(uint32_t base_address, HeapAllocationInfo* out_info); + bool QuerySize(uint32_t address, uint32_t* out_size); + bool QueryProtect(uint32_t address, uint32_t* out_protect); + uint32_t GetPhysicalAddress(uint32_t address); + + protected: + BaseHeap(); + + void Initialize(uint8_t* membase, uint32_t heap_base, uint32_t heap_size, + uint32_t page_size); + + uint8_t* membase_; + uint32_t heap_base_; + uint32_t heap_size_; + uint32_t page_size_; + std::vector page_table_; + std::recursive_mutex heap_mutex_; +}; + +class VirtualHeap : public BaseHeap { + public: + VirtualHeap(); + ~VirtualHeap() override; + + void Initialize(uint8_t* membase, uint32_t heap_base, uint32_t heap_size, + uint32_t page_size); +}; + +class PhysicalHeap : public BaseHeap { + public: + PhysicalHeap(); + ~PhysicalHeap() override; + + void Initialize(uint8_t* membase, uint32_t heap_base, uint32_t heap_size, + uint32_t page_size, VirtualHeap* parent_heap); + + bool Alloc(uint32_t size, uint32_t alignment, uint32_t allocation_type, + uint32_t protect, bool top_down, uint32_t* out_address) override; + bool AllocFixed(uint32_t base_address, uint32_t size, uint32_t alignment, + uint32_t allocation_type, uint32_t protect) override; + bool AllocRange(uint32_t low_address, uint32_t high_address, uint32_t size, + uint32_t alignment, uint32_t allocation_type, + uint32_t protect, bool top_down, + uint32_t* out_address) override; + bool Decommit(uint32_t address, uint32_t size) override; + bool Release(uint32_t base_address, + uint32_t* out_region_size = nullptr) override; + bool Protect(uint32_t address, uint32_t size, uint32_t protect) override; + + protected: + VirtualHeap* parent_heap_; }; class Memory { @@ -82,27 +187,24 @@ class Memory { uint32_t SearchAligned(uint32_t start, uint32_t end, const uint32_t* values, size_t value_count); - bool AddMappedRange(uint32_t address, uint32_t mask, uint32_t size, - void* context, cpu::MMIOReadCallback read_callback, - cpu::MMIOWriteCallback write_callback); + bool AddVirtualMappedRange(uint32_t virtual_address, uint32_t mask, + uint32_t size, void* context, + cpu::MMIOReadCallback read_callback, + cpu::MMIOWriteCallback write_callback); - uintptr_t AddWriteWatch(uint32_t guest_address, uint32_t length, - cpu::WriteWatchCallback callback, - void* callback_context, void* callback_data); + uintptr_t AddPhysicalWriteWatch(uint32_t physical_address, uint32_t length, + cpu::WriteWatchCallback callback, + void* callback_context, void* callback_data); void CancelWriteWatch(uintptr_t watch_handle); uint32_t SystemHeapAlloc(uint32_t size, uint32_t alignment = 0x20, uint32_t system_heap_flags = kSystemHeapDefault); void SystemHeapFree(uint32_t address); - uint32_t HeapAlloc(uint32_t base_address, uint32_t size, uint32_t flags, - uint32_t alignment = 0x20); - int HeapFree(uint32_t address, uint32_t size); - bool QueryInformation(uint32_t base_address, AllocationInfo* mem_info); - uint32_t QuerySize(uint32_t base_address); + BaseHeap* LookupHeap(uint32_t address); + BaseHeap* LookupHeapByType(bool physical, uint32_t page_size); - int Protect(uint32_t address, uint32_t size, uint32_t access); - uint32_t QueryProtect(uint32_t address); + void DumpMap(); private: int MapViews(uint8_t* mapping_base); @@ -122,22 +224,31 @@ class Memory { uint8_t* v00000000; uint8_t* v40000000; uint8_t* v7F000000; - uint8_t* v7F100000; uint8_t* v80000000; uint8_t* v90000000; uint8_t* vA0000000; uint8_t* vC0000000; uint8_t* vE0000000; + uint8_t* physical; }; uint8_t* all_views[9]; } views_; std::unique_ptr mmio_handler_; - MemoryHeap* virtual_heap_; - MemoryHeap* physical_heap_; + struct { + VirtualHeap v00000000; + VirtualHeap v40000000; + VirtualHeap v80000000; + VirtualHeap v90000000; - friend class MemoryHeap; + VirtualHeap physical; + PhysicalHeap vA0000000; + PhysicalHeap vC0000000; + PhysicalHeap vE0000000; + } heaps_; + + friend class BaseHeap; }; } // namespace xe