diff --git a/.appveyor.yml b/.appveyor.yml index 483b57e61..47e28b5ef 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -13,6 +13,10 @@ skip_commits: - docs/*/* - .github/* - .github/*/* + - src/**/*_posix.* + - src/**/*_linux.* + - src/**/*_x11.* + - src/**/*_gtk.* - LICENSE - README.md - .azure-pipelines.yml diff --git a/.clang-format b/.clang-format index 9f8b391e3..f9aa6536d 100644 --- a/.clang-format +++ b/.clang-format @@ -3,3 +3,6 @@ BasedOnStyle: Google DerivePointerAlignment: false PointerAlignment: Left SortIncludes: true + +# Regroup causes unnecessary noise due to clang-format bug. +IncludeBlocks: Preserve diff --git a/.gitmodules b/.gitmodules index 9da0b4148..b7fb5ee6e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -25,9 +25,6 @@ [submodule "third_party/premake-export-compile-commands"] path = third_party/premake-export-compile-commands url = https://github.com/xenia-project/premake-export-compile-commands.git -[submodule "third_party/yaml-cpp"] - path = third_party/yaml-cpp - url = https://github.com/jbeder/yaml-cpp.git [submodule "third_party/spirv-headers"] path = third_party/spirv-headers url = https://github.com/KhronosGroup/SPIRV-Headers.git diff --git a/.travis.yml b/.travis.yml index 96ebf723e..0135e71c4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,10 +10,11 @@ addons: apt: sources: - ubuntu-toolchain-r-test - - llvm-toolchain-bionic-7 + - llvm-toolchain-9 packages: - - clang-7 - - llvm-7-dev + - clang-9 + - clang-format-9 + - llvm-9-dev - g++-8 - python3 - libc++-dev @@ -28,10 +29,10 @@ addons: matrix: include: - - env: C_COMPILER=clang-7 CXX_COMPILER=clang++-7 LINT=true + - env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 LINT=true sudo: false - - env: C_COMPILER=clang-7 CXX_COMPILER=clang++-7 BUILD=true CONFIG=Debug - - env: C_COMPILER=clang-7 CXX_COMPILER=clang++-7 BUILD=true CONFIG=Release + - env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 BUILD=true CONFIG=Debug + - env: C_COMPILER=clang-9 CXX_COMPILER=clang++-9 BUILD=true CONFIG=Release git: # We handle submodules ourselves in xenia-build setup. @@ -44,6 +45,8 @@ before_script: # Dump useful info. - $CXX --version - python3 --version + - clang-format-9 --version + - clang-format-9 -style=file -dump-config # Add Vulkan dependencies. - travis_retry wget http://mirrors.kernel.org/ubuntu/pool/universe/v/vulkan/libvulkan1_$LIBVULKAN_VERSION+dfsg1-1_amd64.deb - travis_retry wget http://mirrors.kernel.org/ubuntu/pool/universe/v/vulkan/libvulkan-dev_$LIBVULKAN_VERSION+dfsg1-1_amd64.deb diff --git a/premake5.lua b/premake5.lua index 20a5cd454..e4a858601 100644 --- a/premake5.lua +++ b/premake5.lua @@ -229,7 +229,6 @@ solution("xenia") include("third_party/spirv-tools.lua") include("third_party/volk.lua") include("third_party/xxhash.lua") - include("third_party/yaml-cpp.lua") include("src/xenia") include("src/xenia/app") diff --git a/src/xenia/app/emulator_window.cc b/src/xenia/app/emulator_window.cc index 7a7bc52f8..c1bc733fb 100644 --- a/src/xenia/app/emulator_window.cc +++ b/src/xenia/app/emulator_window.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * + * Copyright 2020 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -172,7 +172,9 @@ bool EmulatorWindow::Initialize() { ShowCommitID(); } break; - default: { handled = false; } break; + default: { + handled = false; + } break; } e->set_handled(handled); }); diff --git a/src/xenia/app/xenia_main.cc b/src/xenia/app/xenia_main.cc index 06c47fefc..25f2e1f64 100644 --- a/src/xenia/app/xenia_main.cc +++ b/src/xenia/app/xenia_main.cc @@ -89,7 +89,8 @@ class Factory { void Add(const std::string& name, std::function(Args...)> instantiate) { - Add(name, []() { return true; }, instantiate); + auto always_available = []() { return true; }; + Add(name, always_available, instantiate); } template diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index cdd1a6fe5..515862659 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2019 Ben Vanik. All rights reserved. * + * Copyright 2020 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -2668,6 +2668,48 @@ struct LOG2_V128 : Sequence> { }; EMITTER_OPCODE_TABLE(OPCODE_LOG2, LOG2_F32, LOG2_F64, LOG2_V128); +struct DOT_PRODUCT_V128 { + static void Emit(X64Emitter& e, Xmm dest, Xmm src1, Xmm src2, uint8_t imm) { + // TODO(benvanik): apparently this is very slow + // - find alternative? + Xbyak::Label end; + e.inLocalLabel(); + + // Grab space to put MXCSR. + // TODO(gibbed): stick this in TLS or + // something? + e.sub(e.rsp, 8); + + // Grab MXCSR and mask off the overflow flag, + // because it's sticky. + e.vstmxcsr(e.dword[e.rsp]); + e.mov(e.eax, e.dword[e.rsp]); + e.and_(e.eax, uint32_t(~8)); + e.mov(e.dword[e.rsp], e.eax); + e.vldmxcsr(e.dword[e.rsp]); + + // Hey we can do the dot product now. + e.vdpps(dest, src1, src2, imm); + + // Load MXCSR... + e.vstmxcsr(e.dword[e.rsp]); + + // ..free our temporary space and get MXCSR at + // the same time + e.pop(e.rax); + + // Did we overflow? + e.test(e.al, 8); + e.jz(end); + + // Infinity? HA! Give NAN. + e.vmovdqa(dest, e.GetXmmConstPtr(XMMQNaN)); + + e.L(end); + e.outLocalLabel(); + } +}; + // ============================================================================ // OPCODE_DOT_PRODUCT_3 // ============================================================================ @@ -2676,12 +2718,10 @@ struct DOT_PRODUCT_3_V128 I> { static void Emit(X64Emitter& e, const EmitArgType& i) { // https://msdn.microsoft.com/en-us/library/bb514054(v=vs.90).aspx - EmitCommutativeBinaryXmmOp(e, i, - [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { - // TODO(benvanik): apparently this is very slow - // - find alternative? - e.vdpps(dest, src1, src2, 0b01110001); - }); + EmitCommutativeBinaryXmmOp( + e, i, [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + DOT_PRODUCT_V128::Emit(e, dest, src1, src2, 0b01110001); + }); } }; EMITTER_OPCODE_TABLE(OPCODE_DOT_PRODUCT_3, DOT_PRODUCT_3_V128); @@ -2694,12 +2734,10 @@ struct DOT_PRODUCT_4_V128 I> { static void Emit(X64Emitter& e, const EmitArgType& i) { // https://msdn.microsoft.com/en-us/library/bb514054(v=vs.90).aspx - EmitCommutativeBinaryXmmOp(e, i, - [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { - // TODO(benvanik): apparently this is very slow - // - find alternative? - e.vdpps(dest, src1, src2, 0b11110001); - }); + EmitCommutativeBinaryXmmOp( + e, i, [](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) { + DOT_PRODUCT_V128::Emit(e, dest, src1, src2, 0b11110001); + }); } }; EMITTER_OPCODE_TABLE(OPCODE_DOT_PRODUCT_4, DOT_PRODUCT_4_V128); diff --git a/src/xenia/cpu/compiler/passes/register_allocation_pass.cc b/src/xenia/cpu/compiler/passes/register_allocation_pass.cc index 1b1f4acd3..bd7380184 100644 --- a/src/xenia/cpu/compiler/passes/register_allocation_pass.cc +++ b/src/xenia/cpu/compiler/passes/register_allocation_pass.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * + * Copyright 2020 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -347,9 +347,9 @@ bool RegisterAllocationPass::SpillOneRegister(HIRBuilder* builder, Block* block, DumpUsage("SpillOneRegister (pre)"); // Pick the one with the furthest next use. assert_true(!usage_set->upcoming_uses.empty()); - auto furthest_usage = std::max_element(usage_set->upcoming_uses.begin(), - usage_set->upcoming_uses.end(), - RegisterUsage::Comparer()); + auto furthest_usage = + std::max_element(usage_set->upcoming_uses.begin(), + usage_set->upcoming_uses.end(), &RegisterUsage::Compare); assert_true(furthest_usage->value->def->block == block); assert_true(furthest_usage->use->instr->block == block); auto spill_value = furthest_usage->value; diff --git a/src/xenia/cpu/compiler/passes/register_allocation_pass.h b/src/xenia/cpu/compiler/passes/register_allocation_pass.h index f14c41dc6..27f7e6560 100644 --- a/src/xenia/cpu/compiler/passes/register_allocation_pass.h +++ b/src/xenia/cpu/compiler/passes/register_allocation_pass.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * + * Copyright 2020 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -39,11 +39,9 @@ class RegisterAllocationPass : public CompilerPass { RegisterUsage() : value(nullptr), use(nullptr) {} RegisterUsage(hir::Value* value_, hir::Value::Use* use_) : value(value_), use(use_) {} - struct Comparer : std::binary_function { - bool operator()(const RegisterUsage& a, const RegisterUsage& b) const { - return a.use->instr->ordinal < b.use->instr->ordinal; - } - }; + static bool Compare(const RegisterUsage& a, const RegisterUsage& b) { + return a.use->instr->ordinal < b.use->instr->ordinal; + } }; struct RegisterSetUsage { const backend::MachineInfo::RegisterSet* set = nullptr; diff --git a/src/xenia/debug/ui/debug_window.cc b/src/xenia/debug/ui/debug_window.cc index 5d9472359..2fd758d24 100644 --- a/src/xenia/debug/ui/debug_window.cc +++ b/src/xenia/debug/ui/debug_window.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * + * Copyright 2020 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -17,7 +17,6 @@ #include "third_party/capstone/include/capstone/x86.h" #include "third_party/imgui/imgui.h" #include "third_party/imgui/imgui_internal.h" -#include "third_party/yaml-cpp/include/yaml-cpp/yaml.h" #include "xenia/base/clock.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" diff --git a/src/xenia/debug/ui/premake5.lua b/src/xenia/debug/ui/premake5.lua index 84ca9bbd8..7503528b3 100644 --- a/src/xenia/debug/ui/premake5.lua +++ b/src/xenia/debug/ui/premake5.lua @@ -11,11 +11,9 @@ project("xenia-debug-ui") "xenia-base", "xenia-cpu", "xenia-ui", - "yaml-cpp", }) defines({ }) includedirs({ - project_root.."/third_party/yaml-cpp/include/", }) local_platform_files() diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index 98566a35a..658e7f707 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -1157,44 +1157,51 @@ bool CommandProcessor::ExecutePacketType3_DRAW_INDX(RingBuffer* reader, // ID = dword0 & 0x3F; // use = dword0 & 0x40; uint32_t dword0 = reader->ReadAndSwap(); // viz query info - uint32_t dword1 = reader->ReadAndSwap(); - uint32_t index_count = dword1 >> 16; - auto prim_type = static_cast(dword1 & 0x3F); + reg::VGT_DRAW_INITIATOR vgt_draw_initiator; + vgt_draw_initiator.value = reader->ReadAndSwap(); + WriteRegister(XE_GPU_REG_VGT_DRAW_INITIATOR, vgt_draw_initiator.value); + bool is_indexed = false; IndexBufferInfo index_buffer_info; - uint32_t src_sel = (dword1 >> 6) & 0x3; - if (src_sel == 0x0) { - // DI_SRC_SEL_DMA - // Indexed draw. - is_indexed = true; - index_buffer_info.guest_base = reader->ReadAndSwap(); - uint32_t index_size = reader->ReadAndSwap(); - index_buffer_info.endianness = static_cast(index_size >> 30); - index_size &= 0x00FFFFFF; - bool index_32bit = (dword1 >> 11) & 0x1; - index_buffer_info.format = - index_32bit ? IndexFormat::kInt32 : IndexFormat::kInt16; - index_size *= index_32bit ? 4 : 2; - index_buffer_info.length = index_size; - index_buffer_info.count = index_count; - } else if (src_sel == 0x1) { - // DI_SRC_SEL_IMMEDIATE - assert_always(); - } else if (src_sel == 0x2) { - // DI_SRC_SEL_AUTO_INDEX - // Auto draw. - index_buffer_info.guest_base = 0; - index_buffer_info.length = 0; - } else { - // Invalid source select. - assert_always(); + switch (vgt_draw_initiator.source_select) { + case xenos::SourceSelect::kDMA: { + // Indexed draw. + is_indexed = true; + index_buffer_info.guest_base = reader->ReadAndSwap(); + uint32_t index_size = reader->ReadAndSwap(); + index_buffer_info.endianness = static_cast(index_size >> 30); + index_size &= 0x00FFFFFF; + index_buffer_info.format = vgt_draw_initiator.index_size; + index_size *= + (vgt_draw_initiator.index_size == IndexFormat::kInt32) ? 4 : 2; + index_buffer_info.length = index_size; + index_buffer_info.count = vgt_draw_initiator.num_indices; + } break; + case xenos::SourceSelect::kImmediate: { + // TODO(Triang3l): VGT_IMMED_DATA. + assert_always(); + } break; + case xenos::SourceSelect::kAutoIndex: { + // Auto draw. + index_buffer_info.guest_base = 0; + index_buffer_info.length = 0; + } break; + default: { + // Invalid source select. + assert_always(); + } break; } - bool success = IssueDraw(prim_type, index_count, - is_indexed ? &index_buffer_info : nullptr); + bool success = + IssueDraw(vgt_draw_initiator.prim_type, vgt_draw_initiator.num_indices, + is_indexed ? &index_buffer_info : nullptr, + xenos::IsMajorModeExplicit(vgt_draw_initiator.major_mode, + vgt_draw_initiator.prim_type)); if (!success) { - XELOGE("PM4_DRAW_INDX(%d, %d, %d): Failed in backend", index_count, - prim_type, src_sel); + XELOGE("PM4_DRAW_INDX(%d, %d, %d): Failed in backend", + vgt_draw_initiator.num_indices, + uint32_t(vgt_draw_initiator.prim_type), + uint32_t(vgt_draw_initiator.source_select)); } return true; @@ -1204,21 +1211,27 @@ bool CommandProcessor::ExecutePacketType3_DRAW_INDX_2(RingBuffer* reader, uint32_t packet, uint32_t count) { // draw using supplied indices in packet - uint32_t dword0 = reader->ReadAndSwap(); - uint32_t index_count = dword0 >> 16; - auto prim_type = static_cast(dword0 & 0x3F); - uint32_t src_sel = (dword0 >> 6) & 0x3; - assert_true(src_sel == 0x2); // 'SrcSel=AutoIndex' + reg::VGT_DRAW_INITIATOR vgt_draw_initiator; + vgt_draw_initiator.value = reader->ReadAndSwap(); + WriteRegister(XE_GPU_REG_VGT_DRAW_INITIATOR, vgt_draw_initiator.value); + assert_true(vgt_draw_initiator.source_select == + xenos::SourceSelect::kAutoIndex); // Index buffer unused as automatic. - // bool index_32bit = (dword0 >> 11) & 0x1; - // uint32_t indices_size = index_count * (index_32bit ? 4 : 2); + // uint32_t indices_size = + // vgt_draw_initiator.num_indices * + // (vgt_draw_initiator.index_size == IndexFormat::kInt32 ? 4 : 2); // uint32_t index_ptr = reader->ptr(); + // TODO(Triang3l): VGT_IMMED_DATA. reader->AdvanceRead((count - 1) * sizeof(uint32_t)); - bool success = IssueDraw(prim_type, index_count, nullptr); + bool success = IssueDraw( + vgt_draw_initiator.prim_type, vgt_draw_initiator.num_indices, nullptr, + xenos::IsMajorModeExplicit(vgt_draw_initiator.major_mode, + vgt_draw_initiator.prim_type)); if (!success) { - XELOGE("PM4_DRAW_INDX_IMM(%d, %d): Failed in backend", index_count, - prim_type); + XELOGE("PM4_DRAW_INDX_IMM(%d, %d): Failed in backend", + vgt_draw_initiator.num_indices, + uint32_t(vgt_draw_initiator.prim_type)); } return true; diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h index c224e4504..caa53b33a 100644 --- a/src/xenia/gpu/command_processor.h +++ b/src/xenia/gpu/command_processor.h @@ -239,7 +239,8 @@ class CommandProcessor { uint32_t dword_count) = 0; virtual bool IssueDraw(PrimitiveType prim_type, uint32_t index_count, - IndexBufferInfo* index_buffer_info) = 0; + IndexBufferInfo* index_buffer_info, + bool major_mode_explicit) = 0; virtual bool IssueCopy() = 0; virtual void InitializeTrace() = 0; diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index a1088f427..ae05946f6 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1245,7 +1245,8 @@ Shader* D3D12CommandProcessor::LoadShader(ShaderType shader_type, bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, uint32_t index_count, - IndexBufferInfo* index_buffer_info) { + IndexBufferInfo* index_buffer_info, + bool major_mode_explicit) { auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice(); auto& regs = *register_file_; @@ -1272,8 +1273,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, // Check if using tessellation to get the correct primitive type. bool tessellated; - if (uint32_t(primitive_type) >= - uint32_t(PrimitiveType::kExplicitMajorModeForceStart)) { + if (major_mode_explicit) { tessellated = regs.Get().path_select == xenos::VGTOutputPath::kTessellationEnable; } else { @@ -2115,8 +2115,7 @@ bool D3D12CommandProcessor::EndSubmission(bool is_swap) { } root_signatures_.clear(); - // TODO(Triang3l): Shared memory cache clear. - // shared_memory_->ClearCache(); + shared_memory_->ClearCache(); } } diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index b0ed394cc..c6bfa8c2e 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -175,7 +175,8 @@ class D3D12CommandProcessor : public CommandProcessor { uint32_t dword_count) override; bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count, - IndexBufferInfo* index_buffer_info) override; + IndexBufferInfo* index_buffer_info, + bool major_mode_explicit) override; bool IssueCopy() override; void InitializeTrace() override; diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index a88537672..e87567d01 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -140,7 +140,22 @@ bool SharedMemory::Initialize() { void SharedMemory::Shutdown() { ResetTraceGPUWrittenBuffer(); - // TODO(Triang3l): Do something in case any watches are still registered. + FireWatches(0, (kBufferSize - 1) >> page_size_log2_, false); + assert_true(global_watches_.empty()); + // No watches now, so no references to the pools accessible by guest threads - + // safe not to enter the global critical region. + watch_node_first_free_ = nullptr; + watch_node_current_pool_allocated_ = 0; + for (WatchNode* pool : watch_node_pools_) { + delete[] pool; + } + watch_node_pools_.clear(); + watch_range_first_free_ = nullptr; + watch_range_current_pool_allocated_ = 0; + for (WatchRange* pool : watch_range_pools_) { + delete[] pool; + } + watch_range_pools_.clear(); if (memory_invalidation_callback_handle_ != nullptr) { memory_->UnregisterPhysicalMemoryInvalidationCallback( @@ -164,6 +179,36 @@ void SharedMemory::Shutdown() { } } +void SharedMemory::ClearCache() { + upload_buffer_pool_->ClearCache(); + + // Keeping GPU-written data, so "invalidated by GPU". + FireWatches(0, (kBufferSize - 1) >> page_size_log2_, true); + // No watches now, so no references to the pools accessible by guest threads - + // safe not to enter the global critical region. + watch_node_first_free_ = nullptr; + watch_node_current_pool_allocated_ = 0; + for (WatchNode* pool : watch_node_pools_) { + delete[] pool; + } + watch_node_pools_.clear(); + watch_range_first_free_ = nullptr; + watch_range_current_pool_allocated_ = 0; + for (WatchRange* pool : watch_range_pools_) { + delete[] pool; + } + watch_range_pools_.clear(); + + { + auto global_lock = global_critical_region_.Acquire(); + for (SystemPageFlagsBlock& block : system_page_flags_) { + block.valid = block.valid_and_gpu_written; + } + } + + // TODO(Triang3l): Unmap and destroy heaps. +} + void SharedMemory::CompletedSubmissionUpdated() { upload_buffer_pool_->Reclaim(command_processor_->GetCompletedSubmission()); } diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h index af99fa15b..d0eec5093 100644 --- a/src/xenia/gpu/d3d12/shared_memory.h +++ b/src/xenia/gpu/d3d12/shared_memory.h @@ -37,6 +37,7 @@ class SharedMemory { bool Initialize(); void Shutdown(); + void ClearCache(); ID3D12Resource* GetBuffer() const { return buffer_; } D3D12_GPU_VIRTUAL_ADDRESS GetGPUAddress() const { diff --git a/src/xenia/gpu/null/null_command_processor.cc b/src/xenia/gpu/null/null_command_processor.cc index ba5500acd..b19e7fcb0 100644 --- a/src/xenia/gpu/null/null_command_processor.cc +++ b/src/xenia/gpu/null/null_command_processor.cc @@ -44,7 +44,8 @@ Shader* NullCommandProcessor::LoadShader(ShaderType shader_type, bool NullCommandProcessor::IssueDraw(PrimitiveType prim_type, uint32_t index_count, - IndexBufferInfo* index_buffer_info) { + IndexBufferInfo* index_buffer_info, + bool major_mode_explicit) { return true; } diff --git a/src/xenia/gpu/null/null_command_processor.h b/src/xenia/gpu/null/null_command_processor.h index 916668269..f33c1b126 100644 --- a/src/xenia/gpu/null/null_command_processor.h +++ b/src/xenia/gpu/null/null_command_processor.h @@ -41,7 +41,8 @@ class NullCommandProcessor : public CommandProcessor { uint32_t dword_count) override; bool IssueDraw(PrimitiveType prim_type, uint32_t index_count, - IndexBufferInfo* index_buffer_info) override; + IndexBufferInfo* index_buffer_info, + bool major_mode_explicit) override; bool IssueCopy() override; void InitializeTrace() override; diff --git a/src/xenia/gpu/register_table.inc b/src/xenia/gpu/register_table.inc index c305b5a03..8ac7ec5d2 100644 --- a/src/xenia/gpu/register_table.inc +++ b/src/xenia/gpu/register_table.inc @@ -131,7 +131,10 @@ XE_GPU_REGISTER(0x2182, kDword, SQ_INTERPOLATOR_CNTL) XE_GPU_REGISTER(0x2183, kDword, SQ_WRAPPING_0) XE_GPU_REGISTER(0x2184, kDword, SQ_WRAPPING_1) +// These three registers are set by the command processor. XE_GPU_REGISTER(0x21F9, kDword, VGT_EVENT_INITIATOR) +XE_GPU_REGISTER(0x21FC, kDword, VGT_DRAW_INITIATOR) +XE_GPU_REGISTER(0x21FD, kDword, VGT_IMMED_DATA) XE_GPU_REGISTER(0x2200, kDword, RB_DEPTHCONTROL) XE_GPU_REGISTER(0x2201, kDword, RB_BLENDCONTROL0) diff --git a/src/xenia/gpu/registers.h b/src/xenia/gpu/registers.h index 5b6fdc54b..af5ee74d9 100644 --- a/src/xenia/gpu/registers.h +++ b/src/xenia/gpu/registers.h @@ -145,6 +145,22 @@ union SQ_CONTEXT_MISC { *******************************************************************************/ +union VGT_DRAW_INITIATOR { + // Different than on A2xx and R6xx/R7xx. + struct { + PrimitiveType prim_type : 6; // +0 + xenos::SourceSelect source_select : 2; // +6 + xenos::MajorMode major_mode : 2; // +8 + uint32_t : 1; // +10 + IndexFormat index_size : 1; // +11 + uint32_t not_eop : 1; // +12 + uint32_t : 3; // +13 + uint32_t num_indices : 16; // +16 + }; + uint32_t value; + static constexpr Register register_index = XE_GPU_REG_VGT_DRAW_INITIATOR; +}; + union VGT_OUTPUT_PATH_CNTL { struct { xenos::VGTOutputPath path_select : 2; // +0 diff --git a/src/xenia/gpu/trace_viewer.cc b/src/xenia/gpu/trace_viewer.cc index 77ab7c022..d11faaecc 100644 --- a/src/xenia/gpu/trace_viewer.cc +++ b/src/xenia/gpu/trace_viewer.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * + * Copyright 2020 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -123,9 +123,9 @@ bool TraceViewer::Setup() { // Create the emulator but don't initialize so we can setup the window. emulator_ = std::make_unique(L"", L""); - X_STATUS result = - emulator_->Setup(window_.get(), nullptr, - [this]() { return CreateGraphicsSystem(); }, nullptr); + X_STATUS result = emulator_->Setup( + window_.get(), nullptr, [this]() { return CreateGraphicsSystem(); }, + nullptr); if (XFAILED(result)) { XELOGE("Failed to setup emulator: %.8X", result); return false; diff --git a/src/xenia/gpu/vk/vulkan_command_processor.cc b/src/xenia/gpu/vk/vulkan_command_processor.cc index 62bdb5677..d7c46090f 100644 --- a/src/xenia/gpu/vk/vulkan_command_processor.cc +++ b/src/xenia/gpu/vk/vulkan_command_processor.cc @@ -40,7 +40,8 @@ Shader* VulkanCommandProcessor::LoadShader(ShaderType shader_type, bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, uint32_t index_count, - IndexBufferInfo* index_buffer_info) { + IndexBufferInfo* index_buffer_info, + bool major_mode_explicit) { return true; } diff --git a/src/xenia/gpu/vk/vulkan_command_processor.h b/src/xenia/gpu/vk/vulkan_command_processor.h index 8157c3590..43e621814 100644 --- a/src/xenia/gpu/vk/vulkan_command_processor.h +++ b/src/xenia/gpu/vk/vulkan_command_processor.h @@ -40,7 +40,8 @@ class VulkanCommandProcessor : public CommandProcessor { uint32_t dword_count) override; bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count, - IndexBufferInfo* index_buffer_info) override; + IndexBufferInfo* index_buffer_info, + bool major_mode_explicit) override; bool IssueCopy() override; void InitializeTrace() override; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 46244fbb4..43f734a62 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -598,7 +598,8 @@ Shader* VulkanCommandProcessor::LoadShader(ShaderType shader_type, bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, uint32_t index_count, - IndexBufferInfo* index_buffer_info) { + IndexBufferInfo* index_buffer_info, + bool major_mode_explicit) { auto& regs = *register_file_; #if FINE_GRAINED_DRAW_SCOPES diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 94f7ae401..c4959fd0a 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -81,7 +81,8 @@ class VulkanCommandProcessor : public CommandProcessor { uint32_t dword_count) override; bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count, - IndexBufferInfo* index_buffer_info) override; + IndexBufferInfo* index_buffer_info, + bool major_mode_explicit) override; bool PopulateConstants(VkCommandBuffer command_buffer, VulkanShader* vertex_shader, VulkanShader* pixel_shader); diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 0cda866ba..aa8174ecc 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -37,12 +37,11 @@ enum class PrimitiveType : uint32_t { kQuadStrip = 0x0E, kPolygon = 0x0F, - // Starting with this primitive mode, registers like VGT_OUTPUT_PATH_CNTL have - // effect (deduced from R6xx/R7xx registers, and Halo 3 also doesn't reset - // VGT_OUTPUT_PATH_CNTL after the first draw with tessellation). - // TODO(Triang3l): Find out if VGT_DRAW_INITIATOR (0x21FC on Adreno 2xx, but - // not seen being used in games) specifies the major mode (or if it's set - // somewhere else). + // Starting with this primitive type, explicit major mode is assumed (in the + // R6xx/R7xx registers, k2DCopyRectListV0 is 22, and implicit major mode is + // only used for primitive types 0 through 21) - and tessellation patches use + // the range that starts from k2DCopyRectListV0. + // TODO(Triang3l): Verify if this is also true for the Xenos. kExplicitMajorModeForceStart = 0x10, k2DCopyRectListV0 = 0x10, @@ -460,6 +459,25 @@ typedef enum { XE_GPU_INVALIDATE_MASK_ALL = 0x7FFF, } XE_GPU_INVALIDATE_MASK; +// VGT_DRAW_INITIATOR::DI_SRC_SEL_* +enum class SourceSelect : uint32_t { + kDMA, + kImmediate, + kAutoIndex, +}; + +// VGT_DRAW_INITIATOR::DI_MAJOR_MODE_* +enum class MajorMode : uint32_t { + kImplicit, + kExplicit, +}; + +inline bool IsMajorModeExplicit(MajorMode major_mode, + PrimitiveType primitive_type) { + return major_mode != MajorMode::kImplicit || + primitive_type >= PrimitiveType::kExplicitMajorModeForceStart; +} + // instr_arbitrary_filter_t enum class ArbitraryFilter : uint32_t { k2x4Sym = 0, diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc index 26b67ff94..fcc2bb588 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_io.cc @@ -170,23 +170,15 @@ dword_result_t NtReadFile(dword_t file_handle, dword_t event_handle, if (XSUCCEEDED(result)) { if (true || file->is_synchronous()) { - // some games NtReadFile() directly into texture memory - auto heap = kernel_memory()->LookupHeap(buffer.guest_address()); - if (heap && heap->IsGuestPhysicalHeap()) { - kernel_memory()->TriggerPhysicalMemoryCallbacks( - xe::global_critical_region::AcquireDirect(), buffer.guest_address(), - buffer_length, true, true); - } - // Synchronous. - size_t bytes_read = 0; + uint32_t bytes_read = 0; result = file->Read( - buffer, buffer_length, + buffer.guest_address(), buffer_length, byte_offset_ptr ? static_cast(*byte_offset_ptr) : -1, &bytes_read, apc_context); if (io_status_block) { io_status_block->status = result; - io_status_block->information = static_cast(bytes_read); + io_status_block->information = bytes_read; } // Queue the APC callback. It must be delivered via the APC mechanism even @@ -218,7 +210,8 @@ dword_result_t NtReadFile(dword_t file_handle, dword_t event_handle, state, file, (XAsyncRequest::CompletionCallback)xeNtReadFileCompleted, call_state);*/ - // result = file->Read(buffer, buffer_length, byte_offset, request); + // result = file->Read(buffer.guest_address(), buffer_length, byte_offset, + // request); if (io_status_block) { io_status_block->status = X_STATUS_PENDING; io_status_block->information = 0; @@ -266,9 +259,9 @@ dword_result_t NtWriteFile(dword_t file_handle, dword_t event_handle, // TODO(benvanik): async path. if (true || file->is_synchronous()) { // Synchronous request. - size_t bytes_written = 0; + uint32_t bytes_written = 0; result = file->Write( - buffer, buffer_length, + buffer.guest_address(), buffer_length, byte_offset_ptr ? static_cast(*byte_offset_ptr) : -1, &bytes_written, apc_context); if (io_status_block) { @@ -517,10 +510,12 @@ dword_result_t NtQueryInformationFile( // XctdDecompression. /* uint32_t magic; - size_t bytes_read; - size_t cur_pos = file->position(); + uint32_t bytes_read; + uint64_t cur_pos = file->position(); file->set_position(0); + // FIXME(Triang3l): For now, XFile can be read only to guest buffers - + // this line won't work, implement reading to host buffers if needed. result = file->Read(&magic, sizeof(magic), 0, &bytes_read); if (XSUCCEEDED(result)) { if (bytes_read == sizeof(magic)) { diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc index 7a384bcc2..62179f6ed 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_memory.cc @@ -347,8 +347,8 @@ dword_result_t MmAllocatePhysicalMemoryEx(dword_t flags, dword_t region_size, uint32_t heap_max_addr = xe::sat_sub(max_addr_range.value(), heap_physical_address_offset); uint32_t heap_size = heap->heap_size(); - heap_min_addr = heap_base + std::min(heap_min_addr, heap_size); - heap_max_addr = heap_base + std::min(heap_max_addr, heap_size); + heap_min_addr = heap_base + std::min(heap_min_addr, heap_size - 1); + heap_max_addr = heap_base + std::min(heap_max_addr, heap_size - 1); uint32_t base_address; if (!heap->AllocRange(heap_min_addr, heap_max_addr, adjusted_size, adjusted_alignment, allocation_type, protect, top_down, diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_strings.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_strings.cc index aa1bbf245..88b59c74e 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_strings.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_strings.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * + * Copyright 2020 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -49,6 +49,7 @@ enum FormatFlags { FF_IsWide = 1 << 9, FF_IsSigned = 1 << 10, FF_ForceLeadingZero = 1 << 11, + FF_InvertWide = 1 << 12, }; enum ArgumentSize { @@ -174,7 +175,9 @@ int32_t format_core(PPCContext* ppc_context, FormatData& data, ArgList& args, case FS_Invalid: case FS_Unknown: case FS_End: - default: { assert_always(); } + default: { + assert_always(); + } case FS_Start: { if (c == '%') { @@ -220,7 +223,6 @@ int32_t format_core(PPCContext* ppc_context, FormatData& data, ArgList& args, flags |= FF_AddPrefix; continue; } - state = FS_Width; // fall through, don't need to goto restart } @@ -240,7 +242,6 @@ int32_t format_core(PPCContext* ppc_context, FormatData& data, ArgList& args, width += c - '0'; continue; } - state = FS_PrecisionStart; // fall through, don't need to goto restart } @@ -252,7 +253,6 @@ int32_t format_core(PPCContext* ppc_context, FormatData& data, ArgList& args, precision = 0; continue; } - state = FS_Size; goto restart; } @@ -271,7 +271,6 @@ int32_t format_core(PPCContext* ppc_context, FormatData& data, ArgList& args, precision += c - '0'; continue; } - state = FS_Size; // fall through } @@ -310,7 +309,6 @@ int32_t format_core(PPCContext* ppc_context, FormatData& data, ArgList& args, continue; } } - // fall through } @@ -319,16 +317,14 @@ int32_t format_core(PPCContext* ppc_context, FormatData& data, ArgList& args, // wide character switch (c) { case 'C': { - if (!(flags & (FF_IsShort | FF_IsLong | FF_IsWide))) { - flags |= FF_IsWide; - } + flags |= FF_InvertWide; // fall through } // character case 'c': { bool is_wide; - if (flags & FF_IsLong) { + if (flags & (FF_IsLong | FF_IsWide)) { // "An lc, lC, wc or wC type specifier is synonymous with C in // printf functions and with c in wprintf functions." is_wide = true; @@ -337,7 +333,7 @@ int32_t format_core(PPCContext* ppc_context, FormatData& data, ArgList& args, // functions and with C in wprintf functions." is_wide = false; } else { - is_wide = ((flags & FF_IsWide) != 0) ^ wide; + is_wide = ((flags & FF_InvertWide) != 0) ^ wide; } auto value = args.get32(); @@ -524,9 +520,7 @@ int32_t format_core(PPCContext* ppc_context, FormatData& data, ArgList& args, // wide string case 'S': { - if (!(flags & (FF_IsShort | FF_IsLong | FF_IsWide))) { - flags |= FF_IsWide; - } + flags |= FF_InvertWide; // fall through } @@ -543,7 +537,7 @@ int32_t format_core(PPCContext* ppc_context, FormatData& data, ArgList& args, } else { void* str = SHIM_MEM_ADDR(pointer); bool is_wide; - if (flags & FF_IsLong) { + if (flags & (FF_IsLong | FF_IsWide)) { // "An ls, lS, ws or wS type specifier is synonymous with S in // printf functions and with s in wprintf functions." is_wide = true; @@ -552,7 +546,7 @@ int32_t format_core(PPCContext* ppc_context, FormatData& data, ArgList& args, // functions and with S in wprintf functions." is_wide = false; } else { - is_wide = ((flags & (FF_IsWide)) != 0) ^ wide; + is_wide = ((flags & FF_InvertWide) != 0) ^ wide; } int32_t length; @@ -581,7 +575,9 @@ int32_t format_core(PPCContext* ppc_context, FormatData& data, ArgList& args, break; } - default: { assert_always(); } + default: { + assert_always(); + } } } } @@ -728,7 +724,7 @@ class StringFormatData : public FormatData { void skip(int32_t count) { while (count-- > 0) { - if (!*input_) { + if (!get()) { break; } } @@ -761,11 +757,11 @@ class WideStringFormatData : public FormatData { return xe::byte_swap(result); } - uint16_t peek(int32_t offset) { return input_[offset]; } + uint16_t peek(int32_t offset) { return xe::byte_swap(input_[offset]); } void skip(int32_t count) { while (count-- > 0) { - if (!*input_) { + if (!get()) { break; } } @@ -795,11 +791,11 @@ class WideCountFormatData : public FormatData { return xe::byte_swap(result); } - uint16_t peek(int32_t offset) { return input_[offset]; } + uint16_t peek(int32_t offset) { return xe::byte_swap(input_[offset]); } void skip(int32_t count) { while (count-- > 0) { - if (!*input_) { + if (!get()) { break; } } diff --git a/src/xenia/kernel/xfile.cc b/src/xenia/kernel/xfile.cc index 8a38b207a..857b7444a 100644 --- a/src/xenia/kernel/xfile.cc +++ b/src/xenia/kernel/xfile.cc @@ -13,8 +13,10 @@ #include "xenia/base/byte_stream.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" +#include "xenia/base/mutex.h" #include "xenia/kernel/kernel_state.h" #include "xenia/kernel/xevent.h" +#include "xenia/memory.h" namespace xe { namespace kernel { @@ -87,18 +89,73 @@ X_STATUS XFile::QueryDirectory(X_FILE_DIRECTORY_INFORMATION* out_info, return X_STATUS_SUCCESS; } -X_STATUS XFile::Read(void* buffer, size_t buffer_length, size_t byte_offset, - size_t* out_bytes_read, uint32_t apc_context) { - if (byte_offset == -1) { +X_STATUS XFile::Read(uint32_t buffer_guest_address, uint32_t buffer_length, + uint64_t byte_offset, uint32_t* out_bytes_read, + uint32_t apc_context) { + if (byte_offset == uint64_t(-1)) { // Read from current position. byte_offset = position_; } size_t bytes_read = 0; - X_STATUS result = - file_->ReadSync(buffer, buffer_length, byte_offset, &bytes_read); - if (XSUCCEEDED(result)) { - position_ += bytes_read; + X_STATUS result = X_STATUS_SUCCESS; + // Zero length means success for a valid file object according to Windows + // tests. + if (buffer_length) { + if (UINT32_MAX - buffer_guest_address < buffer_length) { + result = X_STATUS_ACCESS_VIOLATION; + } else { + // Games often read directly to texture/vertex buffer memory - in this + // case, invalidation notifications must be sent. However, having any + // memory callbacks in the range will result in STATUS_ACCESS_VIOLATION at + // least on Windows, without anything being read or any callbacks being + // triggered. So for physical memory, host protection must be bypassed, + // and invalidation callbacks must be triggered manually (it's also wrong + // to trigger invalidation callbacks before reading in this case, because + // during the read, the guest may still access the data around the buffer + // that is located in the same host pages as the buffer's start and end, + // on the GPU - and that must not trigger a race condition). + uint32_t buffer_guest_high_address = + buffer_guest_address + buffer_length - 1; + xe::BaseHeap* buffer_start_heap = + memory()->LookupHeap(buffer_guest_address); + const xe::BaseHeap* buffer_end_heap = + memory()->LookupHeap(buffer_guest_high_address); + if (!buffer_start_heap || !buffer_end_heap || + buffer_start_heap->IsGuestPhysicalHeap() != + buffer_end_heap->IsGuestPhysicalHeap() || + (buffer_start_heap->IsGuestPhysicalHeap() && + buffer_start_heap != buffer_end_heap)) { + result = X_STATUS_ACCESS_VIOLATION; + } else { + xe::PhysicalHeap* buffer_physical_heap = + buffer_start_heap->IsGuestPhysicalHeap() + ? static_cast(buffer_start_heap) + : nullptr; + if (buffer_physical_heap && + buffer_physical_heap->QueryRangeAccess(buffer_guest_address, + buffer_guest_high_address) != + memory::PageAccess::kReadWrite) { + result = X_STATUS_ACCESS_VIOLATION; + } else { + result = file_->ReadSync( + buffer_physical_heap + ? memory()->TranslatePhysical( + buffer_physical_heap->GetPhysicalAddress( + buffer_guest_address)) + : memory()->TranslateVirtual(buffer_guest_address), + buffer_length, size_t(byte_offset), &bytes_read); + if (XSUCCEEDED(result)) { + if (buffer_physical_heap) { + buffer_physical_heap->TriggerCallbacks( + xe::global_critical_region::AcquireDirect(), + buffer_guest_address, buffer_length, true, true); + } + position_ += bytes_read; + } + } + } + } } XIOCompletion::IONotification notify; @@ -109,24 +166,25 @@ X_STATUS XFile::Read(void* buffer, size_t buffer_length, size_t byte_offset, NotifyIOCompletionPorts(notify); if (out_bytes_read) { - *out_bytes_read = bytes_read; + *out_bytes_read = uint32_t(bytes_read); } async_event_->Set(); return result; } -X_STATUS XFile::Write(const void* buffer, size_t buffer_length, - size_t byte_offset, size_t* out_bytes_written, +X_STATUS XFile::Write(uint32_t buffer_guest_address, uint32_t buffer_length, + uint64_t byte_offset, uint32_t* out_bytes_written, uint32_t apc_context) { - if (byte_offset == -1) { + if (byte_offset == uint64_t(-1)) { // Write from current position. byte_offset = position_; } size_t bytes_written = 0; X_STATUS result = - file_->WriteSync(buffer, buffer_length, byte_offset, &bytes_written); + file_->WriteSync(memory()->TranslateVirtual(buffer_guest_address), + buffer_length, size_t(byte_offset), &bytes_written); if (XSUCCEEDED(result)) { position_ += bytes_written; } @@ -139,7 +197,7 @@ X_STATUS XFile::Write(const void* buffer, size_t buffer_length, NotifyIOCompletionPorts(notify); if (out_bytes_written) { - *out_bytes_written = bytes_written; + *out_bytes_written = uint32_t(bytes_written); } async_event_->Set(); diff --git a/src/xenia/kernel/xfile.h b/src/xenia/kernel/xfile.h index fda7abb47..2e4feb87a 100644 --- a/src/xenia/kernel/xfile.h +++ b/src/xenia/kernel/xfile.h @@ -92,17 +92,22 @@ class XFile : public XObject { const std::string& path() const { return file_->entry()->path(); } const std::string& name() const { return file_->entry()->name(); } - size_t position() const { return position_; } - void set_position(size_t value) { position_ = value; } + uint64_t position() const { return position_; } + void set_position(uint64_t value) { position_ = value; } X_STATUS QueryDirectory(X_FILE_DIRECTORY_INFORMATION* out_info, size_t length, const char* file_name, bool restart); - X_STATUS Read(void* buffer, size_t buffer_length, size_t byte_offset, - size_t* out_bytes_read, uint32_t apc_context); + // Don't do within the global critical region because invalidation callbacks + // may be triggered (as per the usual rule of not doing I/O within the global + // critical region). + X_STATUS Read(uint32_t buffer_guess_address, uint32_t buffer_length, + uint64_t byte_offset, uint32_t* out_bytes_read, + uint32_t apc_context); - X_STATUS Write(const void* buffer, size_t buffer_length, size_t byte_offset, - size_t* out_bytes_written, uint32_t apc_context); + X_STATUS Write(uint32_t buffer_guess_address, uint32_t buffer_length, + uint64_t byte_offset, uint32_t* out_bytes_written, + uint32_t apc_context); X_STATUS SetLength(size_t length); @@ -133,7 +138,7 @@ class XFile : public XObject { // TODO(benvanik): create flags, open state, etc. - size_t position_ = 0; + uint64_t position_ = 0; xe::filesystem::WildcardEngine find_engine_; size_t find_index_ = 0; diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc index 026cd21ba..3083554cc 100644 --- a/src/xenia/memory.cc +++ b/src/xenia/memory.cc @@ -365,7 +365,7 @@ uint32_t Memory::HostToGuestVirtual(const void* host_address) const { size_t(heaps_.vE0000000.heap_base()) + vE0000000_host_offset; if (virtual_address >= vE0000000_host_base && virtual_address <= - (vE0000000_host_base + heaps_.vE0000000.heap_size() - 1)) { + (vE0000000_host_base + (heaps_.vE0000000.heap_size() - 1))) { virtual_address -= vE0000000_host_offset; } return uint32_t(virtual_address); @@ -646,7 +646,7 @@ void BaseHeap::Initialize(Memory* memory, uint8_t* membase, uint32_t heap_base, memory_ = memory; membase_ = membase; heap_base_ = heap_base; - heap_size_ = heap_size - 1; + heap_size_ = heap_size; page_size_ = page_size; host_address_offset_ = host_address_offset; page_table_.resize(heap_size / page_size); @@ -668,7 +668,7 @@ void BaseHeap::Dispose() { void BaseHeap::DumpMap() { auto global_lock = global_critical_region_.Acquire(); XELOGE("------------------------------------------------------------------"); - XELOGE("Heap: %.8X-%.8X", heap_base_, heap_base_ + heap_size_); + XELOGE("Heap: %.8X-%.8X", heap_base_, heap_base_ + (heap_size_ - 1)); XELOGE("------------------------------------------------------------------"); XELOGE(" Heap Base: %.8X", heap_base_); XELOGE(" Heap Size: %d (%.8X)", heap_size_, heap_size_); @@ -710,7 +710,8 @@ void BaseHeap::DumpMap() { } if (is_empty_span) { XELOGE(" %.8X-%.8X - %d unreserved pages)", - heap_base_ + empty_span_start * page_size_, heap_base_ + heap_size_, + heap_base_ + empty_span_start * page_size_, + heap_base_ + (heap_size_ - 1), page_table_.size() - empty_span_start); } } @@ -745,7 +746,7 @@ uint32_t BaseHeap::GetUnreservedPageCount() { } bool BaseHeap::Save(ByteStream* stream) { - XELOGD("Heap %.8X-%.8X", heap_base_, heap_base_ + heap_size_); + XELOGD("Heap %.8X-%.8X", heap_base_, heap_base_ + (heap_size_ - 1)); for (size_t i = 0; i < page_table_.size(); i++) { auto& page = page_table_[i]; @@ -773,7 +774,7 @@ bool BaseHeap::Save(ByteStream* stream) { } bool BaseHeap::Restore(ByteStream* stream) { - XELOGD("Heap %.8X-%.8X", heap_base_, heap_base_ + heap_size_); + XELOGD("Heap %.8X-%.8X", heap_base_, heap_base_ + (heap_size_ - 1)); for (size_t i = 0; i < page_table_.size(); i++) { auto& page = page_table_[i]; @@ -830,7 +831,7 @@ bool BaseHeap::Alloc(uint32_t size, uint32_t alignment, size = xe::round_up(size, page_size_); alignment = xe::round_up(alignment, page_size_); uint32_t low_address = heap_base_; - uint32_t high_address = heap_base_ + heap_size_; + uint32_t high_address = heap_base_ + (heap_size_ - 1); return AllocRange(low_address, high_address, size, alignment, allocation_type, protect, top_down, out_address); } @@ -922,8 +923,8 @@ bool BaseHeap::AllocRange(uint32_t low_address, uint32_t high_address, alignment = xe::round_up(alignment, page_size_); uint32_t page_count = get_page_count(size, page_size_); low_address = std::max(heap_base_, xe::align(low_address, alignment)); - high_address = - std::min(heap_base_ + heap_size_, xe::align(high_address, alignment)); + high_address = std::min(heap_base_ + (heap_size_ - 1), + xe::align(high_address, alignment)); uint32_t low_page_number = (low_address - heap_base_) / page_size_; uint32_t high_page_number = (high_address - heap_base_) / page_size_; low_page_number = std::min(uint32_t(page_table_.size()) - 1, low_page_number); @@ -1302,6 +1303,24 @@ bool BaseHeap::QueryProtect(uint32_t address, uint32_t* out_protect) { return true; } +xe::memory::PageAccess BaseHeap::QueryRangeAccess(uint32_t low_address, + uint32_t high_address) { + if (low_address > high_address || low_address < heap_base_ || + (high_address - heap_base_) >= heap_size_) { + return xe::memory::PageAccess::kNoAccess; + } + uint32_t low_page_number = (low_address - heap_base_) / page_size_; + uint32_t high_page_number = (high_address - heap_base_) / page_size_; + uint32_t protect = kMemoryProtectRead | kMemoryProtectWrite; + { + auto global_lock = global_critical_region_.Acquire(); + for (uint32_t i = low_page_number; protect && i <= high_page_number; ++i) { + protect &= page_table_[i].current_protect; + } + } + return ToPageAccess(protect); +} + VirtualHeap::VirtualHeap() = default; VirtualHeap::~VirtualHeap() = default; @@ -1333,7 +1352,7 @@ void PhysicalHeap::Initialize(Memory* memory, uint8_t* membase, system_page_size_ = uint32_t(xe::memory::page_size()); system_page_count_ = - (heap_size_ /* already - 1 */ + host_address_offset + system_page_size_) / + (size_t(heap_size_) + host_address_offset + (system_page_size_ - 1)) / system_page_size_; system_page_flags_.resize((system_page_count_ + 63) / 64); } @@ -1355,7 +1374,7 @@ bool PhysicalHeap::Alloc(uint32_t size, uint32_t alignment, // Allocate from parent heap (gets our physical address in 0-512mb). uint32_t parent_heap_start = GetPhysicalAddress(heap_base_); - uint32_t parent_heap_end = GetPhysicalAddress(heap_base_ + heap_size_); + uint32_t parent_heap_end = GetPhysicalAddress(heap_base_ + (heap_size_ - 1)); uint32_t parent_address; if (!parent_heap_->AllocRange(parent_heap_start, parent_heap_end, size, alignment, allocation_type, protect, top_down, @@ -1375,11 +1394,6 @@ bool PhysicalHeap::Alloc(uint32_t size, uint32_t alignment, // TODO(benvanik): don't leak parent memory. return false; } - - if (protect & kMemoryProtectWrite) { - TriggerCallbacks(std::move(global_lock), address, size, true, true, false); - } - *out_address = address; return true; } @@ -1417,10 +1431,6 @@ bool PhysicalHeap::AllocFixed(uint32_t base_address, uint32_t size, return false; } - if (protect & kMemoryProtectWrite) { - TriggerCallbacks(std::move(global_lock), address, size, true, true, false); - } - return true; } @@ -1438,7 +1448,7 @@ bool PhysicalHeap::AllocRange(uint32_t low_address, uint32_t high_address, // Allocate from parent heap (gets our physical address in 0-512mb). low_address = std::max(heap_base_, low_address); - high_address = std::min(heap_base_ + heap_size_, high_address); + high_address = std::min(heap_base_ + (heap_size_ - 1), high_address); uint32_t parent_low_address = GetPhysicalAddress(low_address); uint32_t parent_high_address = GetPhysicalAddress(high_address); uint32_t parent_address; @@ -1461,32 +1471,49 @@ bool PhysicalHeap::AllocRange(uint32_t low_address, uint32_t high_address, // TODO(benvanik): don't leak parent memory. return false; } - - if (protect & kMemoryProtectWrite) { - TriggerCallbacks(std::move(global_lock), address, size, true, true, false); - } - *out_address = address; return true; } bool PhysicalHeap::Decommit(uint32_t address, uint32_t size) { auto global_lock = global_critical_region_.Acquire(); + uint32_t parent_address = GetPhysicalAddress(address); if (!parent_heap_->Decommit(parent_address, size)) { XELOGE("PhysicalHeap::Decommit failed due to parent heap failure"); return false; } + + // Not caring about the contents anymore. + TriggerCallbacks(std::move(global_lock), address, size, true, true); + return BaseHeap::Decommit(address, size); } bool PhysicalHeap::Release(uint32_t base_address, uint32_t* out_region_size) { auto global_lock = global_critical_region_.Acquire(); + uint32_t parent_base_address = GetPhysicalAddress(base_address); if (!parent_heap_->Release(parent_base_address, out_region_size)) { XELOGE("PhysicalHeap::Release failed due to parent heap failure"); return false; } + + // Must invalidate here because the range being released may be reused in + // another mapping of physical memory - but callback flags are set in each + // heap separately (https://github.com/xenia-project/xenia/issues/1559 - + // dynamic vertices in Viva Pinata start screen and menu allocated in + // 0xA0000000 at addresses that overlap intro video textures in 0xE0000000, + // with the state of the allocator as of February 24th, 2020). If memory is + // invalidated in Alloc instead, Alloc won't be aware of callbacks enabled in + // other heaps, thus callback handlers will keep considering this range valid + // forever. + uint32_t region_size; + if (QuerySize(base_address, ®ion_size)) { + TriggerCallbacks(std::move(global_lock), base_address, region_size, true, + true); + } + return BaseHeap::Release(base_address, out_region_size); } @@ -1527,10 +1554,10 @@ void PhysicalHeap::EnableAccessCallbacks(uint32_t physical_address, physical_address = physical_address_offset; } uint32_t heap_relative_address = physical_address - physical_address_offset; - if (heap_relative_address >= heap_size_ + 1) { + if (heap_relative_address >= heap_size_) { return; } - length = std::min(length, heap_size_ + 1 - heap_relative_address); + length = std::min(length, heap_size_ - heap_relative_address); if (length == 0) { return; } @@ -1637,10 +1664,10 @@ bool PhysicalHeap::TriggerCallbacks( virtual_address = heap_base_; } uint32_t heap_relative_address = virtual_address - heap_base_; - if (heap_relative_address >= heap_size_ + 1) { + if (heap_relative_address >= heap_size_) { return false; } - length = std::min(length, heap_size_ + 1 - heap_relative_address); + length = std::min(length, heap_size_ - heap_relative_address); if (length == 0) { return false; } @@ -1689,7 +1716,7 @@ bool PhysicalHeap::TriggerCallbacks( xe::sat_sub(system_page_last * system_page_size_ + system_page_size_, host_address_offset()) + physical_address_offset - physical_address_start, - heap_size_ + 1 - (physical_address_start - physical_address_offset)); + heap_size_ - (physical_address_start - physical_address_offset)); uint32_t unwatch_first = 0; uint32_t unwatch_last = UINT32_MAX; for (auto invalidation_callback : @@ -1724,8 +1751,8 @@ bool PhysicalHeap::TriggerCallbacks( unwatch_first = xe::sat_sub(unwatch_first, physical_address_offset); unwatch_last = xe::sat_sub(unwatch_last, physical_address_offset); // Clamp to the heap upper bound. - unwatch_first = std::min(unwatch_first, heap_size_); - unwatch_last = std::min(unwatch_last, heap_size_); + unwatch_first = std::min(unwatch_first, heap_size_ - 1); + unwatch_last = std::min(unwatch_last, heap_size_ - 1); // Convert to system pages and update the range. unwatch_first += host_address_offset(); unwatch_last += host_address_offset(); @@ -1794,7 +1821,7 @@ bool PhysicalHeap::TriggerCallbacks( uint32_t PhysicalHeap::GetPhysicalAddress(uint32_t address) const { assert_true(address >= heap_base_); address -= heap_base_; - assert_true(address <= heap_size_); + assert_true(address < heap_size_); if (heap_base_ >= 0xE0000000) { address += 0x1000; } diff --git a/src/xenia/memory.h b/src/xenia/memory.h index 3c7d0c93b..266b8acd8 100644 --- a/src/xenia/memory.h +++ b/src/xenia/memory.h @@ -172,6 +172,11 @@ class BaseHeap { // address. bool QueryProtect(uint32_t address, uint32_t* out_protect); + // Queries the currently strictest readability and writability for the entire + // range. + xe::memory::PageAccess QueryRangeAccess(uint32_t low_address, + uint32_t high_address); + // Whether the heap is a guest virtual memory mapping of the physical memory. virtual bool IsGuestPhysicalHeap() const { return false; } @@ -387,7 +392,9 @@ class Memory { // // May be triggered for a single page (in case of a write access violation or // when need to synchronize data given by data providers) or for multiple - // pages (like when memory is allocated). + // pages (like when memory is released, or explicitly to trigger callbacks + // when host-side code can't rely on regular access violations, like when + // accessing a file). // // Since granularity of callbacks is one single page, an invalidation // notification handler must invalidate the all the data stored in the touched diff --git a/third_party/yaml-cpp b/third_party/yaml-cpp deleted file mode 160000 index e92321aee..000000000 --- a/third_party/yaml-cpp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e92321aee52fd27566601f9cca53ba90b41e15c1 diff --git a/third_party/yaml-cpp.lua b/third_party/yaml-cpp.lua deleted file mode 100644 index 138e99a46..000000000 --- a/third_party/yaml-cpp.lua +++ /dev/null @@ -1,17 +0,0 @@ -group("third_party") -project("yaml-cpp") - uuid("47bfe853-a3f8-4902-921d-d564608ff355") - kind("StaticLib") - language("C++") - - defines({ - "_LIB", - }) - includedirs({ - "yaml-cpp/include/", - }) - recursive_platform_files("yaml-cpp/include/yaml-cpp") - recursive_platform_files("yaml-cpp/src") - - filter("platforms:Windows") - warnings("Off") -- Too many warnings. diff --git a/tools/diff.py b/tools/diff.py index 29bc1ff33..1ec660e68 100644 --- a/tools/diff.py +++ b/tools/diff.py @@ -6,7 +6,7 @@ import difflib import sys diff = difflib.unified_diff( - open(sys.argv[1]).readlines(), - open(sys.argv[2]).readlines()) -with open(sys.argv[3], 'w') as f: + open(sys.argv[1], encoding='utf-8').readlines(), + open(sys.argv[2], encoding='utf-8').readlines()) +with open(sys.argv[3], 'w', encoding='utf-8') as f: f.write(''.join(diff)) diff --git a/xenia-build b/xenia-build index cff6dfe5c..aa6859317 100755 --- a/xenia-build +++ b/xenia-build @@ -96,7 +96,7 @@ def import_vs_environment(): install_path = None env_tool_args = None - vswhere = subprocess.check_output('third_party/vswhere/vswhere.exe -version "[15,)" -latest -prerelease -format json -utf8', shell=False, universal_newlines=True) + vswhere = subprocess.check_output('third_party/vswhere/vswhere.exe -version "[15,)" -latest -prerelease -format json -utf8', shell=False, universal_newlines=True, encoding="utf-8") if vswhere: vswhere = json.loads(vswhere) if vswhere and len(vswhere) > 0: @@ -185,7 +185,7 @@ def get_bin(binary): return None -def shell_call(command, throw_on_error=True, stdout_path=None): +def shell_call(command, throw_on_error=True, stdout_path=None, shell=False): """Executes a shell command. Args: @@ -203,10 +203,10 @@ def shell_call(command, throw_on_error=True, stdout_path=None): try: if throw_on_error: result = 1 - subprocess.check_call(command, shell=False, stdout=stdout_file) + subprocess.check_call(command, shell=shell, stdout=stdout_file) result = 0 else: - result = subprocess.call(command, shell=False, stdout=stdout_file) + result = subprocess.call(command, shell=shell, stdout=stdout_file) finally: if stdout_file: stdout_file.close() @@ -319,7 +319,7 @@ def get_clang_format_binary(): attempts = [ 'C:\\Program Files\\LLVM\\bin\\clang-format.exe', 'C:\\Program Files (x86)\\LLVM\\bin\\clang-format.exe', - 'clang-format-3.8', + 'clang-format-9', 'clang-format', ] for binary in attempts: @@ -327,7 +327,7 @@ def get_clang_format_binary(): return binary print('ERROR: clang-format is not on PATH') print('LLVM is available from https://llvm.org/releases/download.html') - print('At least version 3.8 is required.') + print('At least version 9 is required.') print('See docs/style_guide.md for instructions on how to get it.') sys.exit(1) @@ -1215,6 +1215,7 @@ class LintCommand(Command): if args['all']: all_files = find_all_source_files() + all_files.sort() print('- linting %d files' % (len(all_files))) any_errors = False for file_path in all_files: @@ -1247,7 +1248,7 @@ class LintCommand(Command): shell_call([ 'type' if sys.platform == 'win32' else 'cat', difftemp, - ]) + ], shell=True if sys.platform == 'win32' else False) if os.path.exists(difftemp): os.remove(difftemp) print('') print('') @@ -1265,6 +1266,7 @@ class LintCommand(Command): 'third_party/clang-format/git-clang-format', '--binary=%s' % (clang_format_binary), '--commit=%s' % ('origin/master' if args['origin'] else 'HEAD'), + '--style=file', '--diff', ], throw_on_error=False, stdout_path=difftemp) with open(difftemp) as f: @@ -1281,6 +1283,7 @@ class LintCommand(Command): 'third_party/clang-format/git-clang-format', '--binary=%s' % (clang_format_binary), '--commit=%s' % ('origin/master' if args['origin'] else 'HEAD'), + '--style=file', '--diff', ]) print('ERROR: 1+ diffs. Stage changes and run \'xb format\' to fix.') @@ -1311,6 +1314,7 @@ class FormatCommand(Command): if args['all']: all_files = find_all_source_files() + all_files.sort() print('- clang-format [%d files]' % (len(all_files))) any_errors = False for file_path in all_files: