diff --git a/.drone.star b/.drone.star index 7a1af7633..60278044c 100644 --- a/.drone.star +++ b/.drone.star @@ -38,51 +38,40 @@ def targets_android(platform): 'imgui', 'mspack', 'snappy', - 'spirv-tools', 'xxhash', - # 'xenia-core', + 'xenia-core', # 'xenia-app-discord', - # 'xenia-apu', - # 'xenia-apu-nop', + 'xenia-apu', + 'xenia-apu-nop', 'xenia-base', 'xenia-base-tests', - # 'xenia-cpu', + 'xenia-cpu', # 'xenia-cpu-tests', # 'xenia-cpu-ppc-tests', # 'xenia-cpu-backend-x64', # 'xenia-debug-ui', - # 'xenia-gpu', - # 'xenia-gpu-shader-compiler', - # 'xenia-gpu-null', - # 'xenia-gpu-vulkan', + 'xenia-gpu', + 'xenia-gpu-shader-compiler', + 'xenia-gpu-null', + 'xenia-gpu-vulkan', # 'xenia-gpu-vulkan-trace-viewer', - # 'xenia-gpu-vulkan-trace-dump', + 'xenia-gpu-vulkan-trace-dump', 'xenia-hid', # 'xenia-hid-demo', 'xenia-hid-nop', - # 'xenia-kernel', + 'xenia-kernel', 'xenia-ui', - 'xenia-ui-spirv', - # 'xenia-ui-vulkan', + 'xenia-ui-vulkan', # 'xenia-ui-window-vulkan-demo', 'xenia-vfs', 'xenia-vfs-dump', ] if platform == 'Android-x86_64': targets.extend([ - 'xenia-core', - 'xenia-apu', - 'xenia-apu-nop', - 'xenia-cpu', 'xenia-cpu-tests', 'xenia-cpu-ppc-tests', 'xenia-cpu-backend-x64', 'xenia-debug-ui', - 'xenia-gpu', - 'xenia-gpu-null', - 'xenia-gpu-vulkan', - 'xenia-gpu-shader-compiler', - 'xenia-kernel', ]) return targets diff --git a/src/xenia/app/premake5.lua b/src/xenia/app/premake5.lua index 09c2d2a50..a321f2cc7 100644 --- a/src/xenia/app/premake5.lua +++ b/src/xenia/app/premake5.lua @@ -15,7 +15,6 @@ project("xenia-app") "xenia-base", "xenia-core", "xenia-cpu", - "xenia-cpu-backend-x64", "xenia-debug-ui", "xenia-gpu", "xenia-gpu-null", @@ -60,6 +59,11 @@ project("xenia-app") project_root, }) + filter("architecture:x86_64") + links({ + "xenia-cpu-backend-x64", + }) + filter("platforms:Windows") files({ "main_resources.rc", diff --git a/src/xenia/app/xenia_main.cc b/src/xenia/app/xenia_main.cc index 3e2729993..71595d5f0 100644 --- a/src/xenia/app/xenia_main.cc +++ b/src/xenia/app/xenia_main.cc @@ -477,7 +477,7 @@ void EmulatorApp::EmulatorThread() { // Setup and initialize all subsystems. If we can't do something // (unsupported system, memory issues, etc) this will fail early. X_STATUS result = emulator_->Setup( - emulator_window_->window(), emulator_window_->imgui_drawer(), + emulator_window_->window(), emulator_window_->imgui_drawer(), true, CreateAudioSystem, CreateGraphicsSystem, CreateInputDrivers); if (XFAILED(result)) { XELOGE("Failed to setup emulator: {:08X}", result); diff --git a/src/xenia/base/exception_handler.cc b/src/xenia/base/exception_handler.cc new file mode 100644 index 000000000..1b6cbd4b3 --- /dev/null +++ b/src/xenia/base/exception_handler.cc @@ -0,0 +1,88 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/base/exception_handler.h" + +namespace xe { + +// Based on VIXL Instruction::IsLoad and IsStore. +// https://github.com/Linaro/vixl/blob/d48909dd0ac62197edb75d26ed50927e4384a199/src/aarch64/instructions-aarch64.cc#L484 +// +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +bool IsArm64LoadPrefetchStore(uint32_t instruction, bool& is_store_out) { + if ((instruction & kArm64LoadLiteralFMask) == kArm64LoadLiteralFixed) { + return true; + } + if ((instruction & kArm64LoadStoreAnyFMask) != kArm64LoadStoreAnyFixed) { + return false; + } + if ((instruction & kArm64LoadStorePairAnyFMask) == + kArm64LoadStorePairAnyFixed) { + is_store_out = !(instruction & kArm64LoadStorePairLoadBit); + return true; + } + switch (Arm64LoadStoreOp(instruction & kArm64LoadStoreMask)) { + case Arm64LoadStoreOp::kLDRB_w: + case Arm64LoadStoreOp::kLDRH_w: + case Arm64LoadStoreOp::kLDR_w: + case Arm64LoadStoreOp::kLDR_x: + case Arm64LoadStoreOp::kLDRSB_x: + case Arm64LoadStoreOp::kLDRSH_x: + case Arm64LoadStoreOp::kLDRSW_x: + case Arm64LoadStoreOp::kLDRSB_w: + case Arm64LoadStoreOp::kLDRSH_w: + case Arm64LoadStoreOp::kLDR_b: + case Arm64LoadStoreOp::kLDR_h: + case Arm64LoadStoreOp::kLDR_s: + case Arm64LoadStoreOp::kLDR_d: + case Arm64LoadStoreOp::kLDR_q: + case Arm64LoadStoreOp::kPRFM: + is_store_out = false; + return true; + case Arm64LoadStoreOp::kSTRB_w: + case Arm64LoadStoreOp::kSTRH_w: + case Arm64LoadStoreOp::kSTR_w: + case Arm64LoadStoreOp::kSTR_x: + case Arm64LoadStoreOp::kSTR_b: + case Arm64LoadStoreOp::kSTR_h: + case Arm64LoadStoreOp::kSTR_s: + case Arm64LoadStoreOp::kSTR_d: + case Arm64LoadStoreOp::kSTR_q: + is_store_out = true; + return true; + default: + return false; + } +} + +} // namespace xe diff --git a/src/xenia/base/exception_handler.h b/src/xenia/base/exception_handler.h index cff15ab1b..218a2e4bc 100644 --- a/src/xenia/base/exception_handler.h +++ b/src/xenia/base/exception_handler.h @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -10,14 +10,97 @@ #ifndef XENIA_BASE_EXCEPTION_HANDLER_H_ #define XENIA_BASE_EXCEPTION_HANDLER_H_ +#include #include #include #include "xenia/base/assert.h" -#include "xenia/base/x64_context.h" +#include "xenia/base/host_thread_context.h" namespace xe { +// AArch64 load and store decoding based on VIXL. +// https://github.com/Linaro/vixl/blob/ae5957cd66517b3f31dbf37e9bf39db6594abfe3/src/aarch64/constants-aarch64.h +// +// Copyright 2015, VIXL authors +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of ARM Limited nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// `Instruction address + literal offset` loads. +// This includes PRFM_lit. +constexpr uint32_t kArm64LoadLiteralFMask = UINT32_C(0x3B000000); +constexpr uint32_t kArm64LoadLiteralFixed = UINT32_C(0x18000000); + +constexpr uint32_t kArm64LoadStoreAnyFMask = UINT32_C(0x0A000000); +constexpr uint32_t kArm64LoadStoreAnyFixed = UINT32_C(0x08000000); + +constexpr uint32_t kArm64LoadStorePairAnyFMask = UINT32_C(0x3A000000); +constexpr uint32_t kArm64LoadStorePairAnyFixed = UINT32_C(0x28000000); +constexpr uint32_t kArm64LoadStorePairLoadBit = UINT32_C(1) << 22; + +constexpr uint32_t kArm64LoadStoreMask = UINT32_C(0xC4C00000); +enum class Arm64LoadStoreOp : uint32_t { + kSTRB_w = UINT32_C(0x00000000), + kSTRH_w = UINT32_C(0x40000000), + kSTR_w = UINT32_C(0x80000000), + kSTR_x = UINT32_C(0xC0000000), + kLDRB_w = UINT32_C(0x00400000), + kLDRH_w = UINT32_C(0x40400000), + kLDR_w = UINT32_C(0x80400000), + kLDR_x = UINT32_C(0xC0400000), + kLDRSB_x = UINT32_C(0x00800000), + kLDRSH_x = UINT32_C(0x40800000), + kLDRSW_x = UINT32_C(0x80800000), + kLDRSB_w = UINT32_C(0x00C00000), + kLDRSH_w = UINT32_C(0x40C00000), + kSTR_b = UINT32_C(0x04000000), + kSTR_h = UINT32_C(0x44000000), + kSTR_s = UINT32_C(0x84000000), + kSTR_d = UINT32_C(0xC4000000), + kSTR_q = UINT32_C(0x04800000), + kLDR_b = UINT32_C(0x04400000), + kLDR_h = UINT32_C(0x44400000), + kLDR_s = UINT32_C(0x84400000), + kLDR_d = UINT32_C(0xC4400000), + kLDR_q = UINT32_C(0x04C00000), + kPRFM = UINT32_C(0xC0800000), +}; + +constexpr uint32_t kArm64LoadStoreOffsetFMask = UINT32_C(0x3B200C00); +enum class Arm64LoadStoreOffsetFixed : uint32_t { + kUnscaledOffset = UINT32_C(0x38000000), + kPostIndex = UINT32_C(0x38000400), + kPreIndex = UINT32_C(0x38000C00), + kRegisterOffset = UINT32_C(0x38200800), +}; + +constexpr uint32_t kArm64LoadStoreUnsignedOffsetFMask = UINT32_C(0x3B000000); +constexpr uint32_t kArm64LoadStoreUnsignedOffsetFixed = UINT32_C(0x39000000); + +bool IsArm64LoadPrefetchStore(uint32_t instruction, bool& is_store_out); + class Exception { public: enum class Code { @@ -32,7 +115,7 @@ class Exception { kWrite, }; - void InitializeAccessViolation(X64Context* thread_context, + void InitializeAccessViolation(HostThreadContext* thread_context, uint64_t fault_address, AccessViolationOperation operation) { code_ = Code::kAccessViolation; @@ -40,7 +123,7 @@ class Exception { fault_address_ = fault_address; access_violation_operation_ = operation; } - void InitializeIllegalInstruction(X64Context* thread_context) { + void InitializeIllegalInstruction(HostThreadContext* thread_context) { code_ = Code::kIllegalInstruction; thread_context_ = thread_context; } @@ -48,24 +131,67 @@ class Exception { Code code() const { return code_; } // Returns the platform-specific thread context info. - X64Context* thread_context() const { return thread_context_; } + // Note that certain registers must be modified through Modify* proxy + // functions rather than directly: + // x86-64: + // - General-purpose registers (r##, r8-r15). + // - XMM registers. + // AArch64: + // - General-purpose registers (Xn), including FP and LR. + // - SIMD and floating-point registers (Vn). + HostThreadContext* thread_context() const { return thread_context_; } -#if XE_ARCH_AMD64 // Returns the program counter where the exception occurred. - // RIP on x64. - uint64_t pc() const { return thread_context_->rip; } - // Sets the program counter where execution will resume. - void set_resume_pc(uint64_t pc) { thread_context_->rip = pc; } -#else - // Returns the program counter where the exception occurred. - // RIP on x64. uint64_t pc() const { +#if XE_ARCH_AMD64 + return thread_context_->rip; +#elif XE_ARCH_ARM64 + return thread_context_->pc; +#else assert_always(); return 0; +#endif // XE_ARCH } + // Sets the program counter where execution will resume. - void set_resume_pc(uint64_t pc) { assert_always(); } -#endif + void set_resume_pc(uint64_t pc) { +#if XE_ARCH_AMD64 + thread_context_->rip = pc; +#elif XE_ARCH_ARM64 + thread_context_->pc = pc; +#else + assert_always(); +#endif // XE_ARCH + } + +#if XE_ARCH_AMD64 + // The index is relative to X64Register::kIntRegisterFirst. + uint64_t& ModifyIntRegister(uint32_t index) { + assert_true(index <= 15); + modified_int_registers_ |= UINT16_C(1) << index; + return thread_context_->int_registers[index]; + } + uint16_t modified_int_registers() const { return modified_int_registers_; } + vec128_t& ModifyXmmRegister(uint32_t index) { + assert_true(index <= 15); + modified_xmm_registers_ |= UINT16_C(1) << index; + return thread_context_->xmm_registers[index]; + } + uint16_t modified_xmm_registers() const { return modified_xmm_registers_; } +#elif XE_ARCH_ARM64 + uint64_t& ModifyXRegister(uint32_t index) { + assert_true(index <= 30); + modified_x_registers_ |= UINT32_C(1) << index; + return thread_context_->x[index]; + } + uint32_t modified_x_registers() const { return modified_x_registers_; } + vec128_t& ModifyVRegister(uint32_t index) { + assert_true(index <= 31); + modified_v_registers_ |= UINT32_C(1) << index; + return thread_context_->v[index]; + } + uint32_t modified_v_registers() const { return modified_v_registers_; } +#endif // XE_ARCH // In case of AV, address that was read from/written to. uint64_t fault_address() const { return fault_address_; } @@ -77,7 +203,14 @@ class Exception { private: Code code_ = Code::kInvalidException; - X64Context* thread_context_ = nullptr; + HostThreadContext* thread_context_ = nullptr; +#if XE_ARCH_AMD64 + uint16_t modified_int_registers_ = 0; + uint16_t modified_xmm_registers_ = 0; +#elif XE_ARCH_ARM64 + uint32_t modified_x_registers_ = 0; + uint32_t modified_v_registers_ = 0; +#endif // XE_ARCH uint64_t fault_address_ = 0; AccessViolationOperation access_violation_operation_ = AccessViolationOperation::kUnknown; diff --git a/src/xenia/base/exception_handler_linux.cc b/src/xenia/base/exception_handler_linux.cc deleted file mode 100644 index bc656a15d..000000000 --- a/src/xenia/base/exception_handler_linux.cc +++ /dev/null @@ -1,35 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/base/exception_handler.h" - -#include "xenia/base/assert.h" -#include "xenia/base/math.h" -#include "xenia/base/platform_linux.h" - -namespace xe { - -// This can be as large as needed, but isn't often needed. -// As we will be sometimes firing many exceptions we want to avoid having to -// scan the table too much or invoke many custom handlers. -constexpr size_t kMaxHandlerCount = 8; - -// All custom handlers, left-aligned and null terminated. -// Executed in order. -std::pair handlers_[kMaxHandlerCount]; - -void ExceptionHandler::Install(Handler fn, void* data) { - // TODO(dougvj) stub -} - -void ExceptionHandler::Uninstall(Handler fn, void* data) { - // TODO(dougvj) stub -} - -} // namespace xe diff --git a/src/xenia/base/exception_handler_posix.cc b/src/xenia/base/exception_handler_posix.cc index 5c3aa81f0..0b11003ff 100644 --- a/src/xenia/base/exception_handler_posix.cc +++ b/src/xenia/base/exception_handler_posix.cc @@ -2,17 +2,285 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2017 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ #include "xenia/base/exception_handler.h" +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/host_thread_context.h" +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/platform.h" + namespace xe { -// TODO(DrChat): Exception handling on linux. -void ExceptionHandler::Install(Handler fn, void* data) {} -void ExceptionHandler::Uninstall(Handler fn, void* data) {} +bool signal_handlers_installed_ = false; +struct sigaction original_sigill_handler_; +struct sigaction original_sigsegv_handler_; -} // namespace xe \ No newline at end of file +// This can be as large as needed, but isn't often needed. +// As we will be sometimes firing many exceptions we want to avoid having to +// scan the table too much or invoke many custom handlers. +constexpr size_t kMaxHandlerCount = 8; + +// All custom handlers, left-aligned and null terminated. +// Executed in order. +std::pair handlers_[kMaxHandlerCount]; + +static void ExceptionHandlerCallback(int signal_number, siginfo_t* signal_info, + void* signal_context) { + mcontext_t& mcontext = + reinterpret_cast(signal_context)->uc_mcontext; + + HostThreadContext thread_context; + +#if XE_ARCH_AMD64 + thread_context.rip = uint64_t(mcontext.gregs[REG_RIP]); + thread_context.eflags = uint32_t(mcontext.gregs[REG_EFL]); + // The REG_ order may be different than the register indices in the + // instruction encoding. + thread_context.rax = uint64_t(mcontext.gregs[REG_RAX]); + thread_context.rcx = uint64_t(mcontext.gregs[REG_RCX]); + thread_context.rdx = uint64_t(mcontext.gregs[REG_RDX]); + thread_context.rbx = uint64_t(mcontext.gregs[REG_RBX]); + thread_context.rsp = uint64_t(mcontext.gregs[REG_RSP]); + thread_context.rbp = uint64_t(mcontext.gregs[REG_RBP]); + thread_context.rsi = uint64_t(mcontext.gregs[REG_RSI]); + thread_context.rdi = uint64_t(mcontext.gregs[REG_RDI]); + thread_context.r8 = uint64_t(mcontext.gregs[REG_R8]); + thread_context.r9 = uint64_t(mcontext.gregs[REG_R9]); + thread_context.r10 = uint64_t(mcontext.gregs[REG_R10]); + thread_context.r11 = uint64_t(mcontext.gregs[REG_R11]); + thread_context.r12 = uint64_t(mcontext.gregs[REG_R12]); + thread_context.r13 = uint64_t(mcontext.gregs[REG_R13]); + thread_context.r14 = uint64_t(mcontext.gregs[REG_R14]); + thread_context.r15 = uint64_t(mcontext.gregs[REG_R15]); + std::memcpy(thread_context.xmm_registers, mcontext.fpregs->_xmm, + sizeof(thread_context.xmm_registers)); +#elif XE_ARCH_ARM64 + std::memcpy(thread_context.x, mcontext.regs, sizeof(thread_context.x)); + thread_context.sp = mcontext.sp; + thread_context.pc = mcontext.pc; + thread_context.pstate = mcontext.pstate; + struct fpsimd_context* mcontext_fpsimd = nullptr; + struct esr_context* mcontext_esr = nullptr; + for (struct _aarch64_ctx* mcontext_extension = + reinterpret_cast(mcontext.__reserved); + mcontext_extension->magic; + mcontext_extension = reinterpret_cast( + reinterpret_cast(mcontext_extension) + + mcontext_extension->size)) { + switch (mcontext_extension->magic) { + case FPSIMD_MAGIC: + mcontext_fpsimd = + reinterpret_cast(mcontext_extension); + break; + case ESR_MAGIC: + mcontext_esr = + reinterpret_cast(mcontext_extension); + break; + default: + break; + } + } + assert_not_null(mcontext_fpsimd); + if (mcontext_fpsimd) { + thread_context.fpsr = mcontext_fpsimd->fpsr; + thread_context.fpcr = mcontext_fpsimd->fpcr; + std::memcpy(thread_context.v, mcontext_fpsimd->vregs, + sizeof(thread_context.v)); + } +#endif // XE_ARCH + + Exception ex; + switch (signal_number) { + case SIGILL: + ex.InitializeIllegalInstruction(&thread_context); + break; + case SIGSEGV: { + Exception::AccessViolationOperation access_violation_operation; +#if XE_ARCH_AMD64 + // x86_pf_error_code::X86_PF_WRITE + constexpr uint64_t kX86PageFaultErrorCodeWrite = UINT64_C(1) << 1; + access_violation_operation = + (uint64_t(mcontext.gregs[REG_ERR]) & kX86PageFaultErrorCodeWrite) + ? Exception::AccessViolationOperation::kWrite + : Exception::AccessViolationOperation::kRead; +#elif XE_ARCH_ARM64 + // For a Data Abort (EC - ESR_EL1 bits 31:26 - 0b100100 from a lower + // Exception Level, 0b100101 without a change in the Exception Level), + // bit 6 is 0 for reading from a memory location, 1 for writing to a + // memory location. + if (mcontext_esr && ((mcontext_esr->esr >> 26) & 0b111110) == 0b100100) { + access_violation_operation = + (mcontext_esr->esr & (UINT64_C(1) << 6)) + ? Exception::AccessViolationOperation::kWrite + : Exception::AccessViolationOperation::kRead; + } else { + // Determine the memory access direction based on which instruction has + // requested it. + // esr_context may be unavailable on certain hosts (for instance, on + // Android, it was added only in NDK r16 - which is the first NDK + // version to support the Android API level 27, while NDK r15 doesn't + // have esr_context in its API 26 sigcontext.h). + // On AArch64 (unlike on AArch32), the program counter is the address of + // the currently executing instruction. + bool instruction_is_store; + if (IsArm64LoadPrefetchStore( + *reinterpret_cast(mcontext.pc), + instruction_is_store)) { + access_violation_operation = + instruction_is_store ? Exception::AccessViolationOperation::kWrite + : Exception::AccessViolationOperation::kRead; + } else { + assert_always( + "No ESR in the exception thread context, or it's not a Data " + "Abort, and the faulting instruction is not a known load, " + "prefetch or store instruction"); + access_violation_operation = + Exception::AccessViolationOperation::kUnknown; + } + } +#else + access_violation_operation = + Exception::AccessViolationOperation::kUnknown; +#endif // XE_ARCH + ex.InitializeAccessViolation( + &thread_context, reinterpret_cast(signal_info->si_addr), + access_violation_operation); + } break; + default: + assert_unhandled_case(signal_number); + } + + for (size_t i = 0; i < xe::countof(handlers_) && handlers_[i].first; ++i) { + if (handlers_[i].first(&ex, handlers_[i].second)) { + // Exception handled. +#if XE_ARCH_AMD64 + mcontext.gregs[REG_RIP] = greg_t(thread_context.rip); + mcontext.gregs[REG_EFL] = greg_t(thread_context.eflags); + uint32_t modified_register_index; + // The order must match the order in X64Register. + static const size_t kIntRegisterMap[] = { + REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, + REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11, + REG_R12, REG_R13, REG_R14, REG_R15, + }; + uint16_t modified_int_registers_remaining = ex.modified_int_registers(); + while (xe::bit_scan_forward(modified_int_registers_remaining, + &modified_register_index)) { + modified_int_registers_remaining &= + ~(UINT16_C(1) << modified_register_index); + mcontext.gregs[kIntRegisterMap[modified_register_index]] = + thread_context.int_registers[modified_register_index]; + } + uint16_t modified_xmm_registers_remaining = ex.modified_xmm_registers(); + while (xe::bit_scan_forward(modified_xmm_registers_remaining, + &modified_register_index)) { + modified_xmm_registers_remaining &= + ~(UINT16_C(1) << modified_register_index); + std::memcpy(&mcontext.fpregs->_xmm[modified_register_index], + &thread_context.xmm_registers[modified_register_index], + sizeof(vec128_t)); + } +#elif XE_ARCH_ARM64 + uint32_t modified_register_index; + uint32_t modified_x_registers_remaining = ex.modified_x_registers(); + while (xe::bit_scan_forward(modified_x_registers_remaining, + &modified_register_index)) { + modified_x_registers_remaining &= + ~(UINT32_C(1) << modified_register_index); + mcontext.regs[modified_register_index] = + thread_context.x[modified_register_index]; + } + mcontext.sp = thread_context.sp; + mcontext.pc = thread_context.pc; + mcontext.pstate = thread_context.pstate; + if (mcontext_fpsimd) { + mcontext_fpsimd->fpsr = thread_context.fpsr; + mcontext_fpsimd->fpcr = thread_context.fpcr; + uint32_t modified_v_registers_remaining = ex.modified_v_registers(); + while (xe::bit_scan_forward(modified_v_registers_remaining, + &modified_register_index)) { + modified_v_registers_remaining &= + ~(UINT32_C(1) << modified_register_index); + std::memcpy(&mcontext_fpsimd->vregs[modified_register_index], + &thread_context.v[modified_register_index], + sizeof(vec128_t)); + mcontext.regs[modified_register_index] = + thread_context.x[modified_register_index]; + } + } +#endif // XE_ARCH + return; + } + } +} + +void ExceptionHandler::Install(Handler fn, void* data) { + if (!signal_handlers_installed_) { + struct sigaction signal_handler; + + std::memset(&signal_handler, 0, sizeof(signal_handler)); + signal_handler.sa_sigaction = ExceptionHandlerCallback; + signal_handler.sa_flags = SA_SIGINFO; + + if (sigaction(SIGILL, &signal_handler, &original_sigill_handler_) != 0) { + assert_always("Failed to install new SIGILL handler"); + } + if (sigaction(SIGSEGV, &signal_handler, &original_sigsegv_handler_) != 0) { + assert_always("Failed to install new SIGSEGV handler"); + } + signal_handlers_installed_ = true; + } + + for (size_t i = 0; i < xe::countof(handlers_); ++i) { + if (!handlers_[i].first) { + handlers_[i].first = fn; + handlers_[i].second = data; + return; + } + } + assert_always("Too many exception handlers installed"); +} + +void ExceptionHandler::Uninstall(Handler fn, void* data) { + for (size_t i = 0; i < xe::countof(handlers_); ++i) { + if (handlers_[i].first == fn && handlers_[i].second == data) { + for (; i < xe::countof(handlers_) - 1; ++i) { + handlers_[i] = handlers_[i + 1]; + } + handlers_[i].first = nullptr; + handlers_[i].second = nullptr; + break; + } + } + + bool has_any = false; + for (size_t i = 0; i < xe::countof(handlers_); ++i) { + if (handlers_[i].first) { + has_any = true; + break; + } + } + if (!has_any) { + if (signal_handlers_installed_) { + if (sigaction(SIGILL, &original_sigill_handler_, NULL) != 0) { + assert_always("Failed to restore original SIGILL handler"); + } + if (sigaction(SIGSEGV, &original_sigsegv_handler_, NULL) != 0) { + assert_always("Failed to restore original SIGSEGV handler"); + } + signal_handlers_installed_ = false; + } + } +} + +} // namespace xe diff --git a/src/xenia/base/exception_handler_win.cc b/src/xenia/base/exception_handler_win.cc index 6f2ae3216..786a129a5 100644 --- a/src/xenia/base/exception_handler_win.cc +++ b/src/xenia/base/exception_handler_win.cc @@ -35,8 +35,7 @@ LONG CALLBACK ExceptionHandlerCallback(PEXCEPTION_POINTERS ex_info) { return EXCEPTION_CONTINUE_SEARCH; } - // TODO(benvanik): avoid this by mapping X64Context virtual? - X64Context thread_context; + HostThreadContext thread_context; thread_context.rip = ex_info->ContextRecord->Rip; thread_context.eflags = ex_info->ContextRecord->EFlags; std::memcpy(thread_context.int_registers, &ex_info->ContextRecord->Rax, @@ -79,8 +78,26 @@ LONG CALLBACK ExceptionHandlerCallback(PEXCEPTION_POINTERS ex_info) { for (size_t i = 0; i < xe::countof(handlers_) && handlers_[i].first; ++i) { if (handlers_[i].first(&ex, handlers_[i].second)) { // Exception handled. - // TODO(benvanik): update all thread state? Dirty flags? ex_info->ContextRecord->Rip = thread_context.rip; + ex_info->ContextRecord->EFlags = thread_context.eflags; + uint32_t modified_register_index; + uint16_t modified_int_registers_remaining = ex.modified_int_registers(); + while (xe::bit_scan_forward(modified_int_registers_remaining, + &modified_register_index)) { + modified_int_registers_remaining &= + ~(UINT16_C(1) << modified_register_index); + (&ex_info->ContextRecord->Rax)[modified_register_index] = + thread_context.int_registers[modified_register_index]; + } + uint16_t modified_xmm_registers_remaining = ex.modified_xmm_registers(); + while (xe::bit_scan_forward(modified_xmm_registers_remaining, + &modified_register_index)) { + modified_xmm_registers_remaining &= + ~(UINT16_C(1) << modified_register_index); + std::memcpy(&ex_info->ContextRecord->Xmm0 + modified_register_index, + &thread_context.xmm_registers[modified_register_index], + sizeof(vec128_t)); + } return EXCEPTION_CONTINUE_EXECUTION; } } diff --git a/src/xenia/base/host_thread_context.cc b/src/xenia/base/host_thread_context.cc new file mode 100644 index 000000000..bf668bdd3 --- /dev/null +++ b/src/xenia/base/host_thread_context.cc @@ -0,0 +1,95 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/base/host_thread_context.h" + +#include "xenia/base/assert.h" +#include "xenia/base/platform.h" +#include "xenia/base/string_util.h" + +namespace xe { + +// NOTE: this order matches 1:1 with the HostRegister enums. +static const char* kRegisterNames[] = { +#if XE_ARCH_AMD64 + "rip", "eflags", "rax", "rcx", "rdx", "rbx", "rsp", + "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", + "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", + "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", + "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", +#elif XE_ARCH_ARM64 + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", + "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", + "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", + "x30", "sp", "pc", "pstate", "fpsr", "fpcr", "v0", "v1", "v2", "v3", + "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", + "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", +#endif // XE_ARCH +}; + +const char* HostThreadContext::GetRegisterName(HostRegister reg) { + return kRegisterNames[int(reg)]; +} + +std::string HostThreadContext::GetStringFromValue(HostRegister reg, + bool hex) const { +#if XE_ARCH_AMD64 + switch (reg) { + case X64Register::kRip: + return hex ? string_util::to_hex_string(rip) : std::to_string(rip); + case X64Register::kEflags: + return hex ? string_util::to_hex_string(eflags) : std::to_string(eflags); + default: + if (reg >= X64Register::kIntRegisterFirst && + reg <= X64Register::kIntRegisterLast) { + auto value = + int_registers[int(reg) - int(X64Register::kIntRegisterFirst)]; + return hex ? string_util::to_hex_string(value) : std::to_string(value); + } else if (reg >= X64Register::kXmm0 && reg <= X64Register::kXmm15) { + auto value = xmm_registers[int(reg) - int(X64Register::kXmm0)]; + return hex ? string_util::to_hex_string(value) : xe::to_string(value); + } else { + assert_unhandled_case(reg); + return std::string(); + } + } +#elif XE_ARCH_ARM64 + switch (reg) { + case Arm64Register::kSp: + return hex ? string_util::to_hex_string(sp) : std::to_string(sp); + case Arm64Register::kPc: + return hex ? string_util::to_hex_string(pc) : std::to_string(pc); + case Arm64Register::kPstate: + return hex ? string_util::to_hex_string(pstate) : std::to_string(pstate); + case Arm64Register::kFpsr: + return hex ? string_util::to_hex_string(fpsr) : std::to_string(fpsr); + case Arm64Register::kFpcr: + return hex ? string_util::to_hex_string(fpcr) : std::to_string(fpcr); + default: + if (reg >= Arm64Register::kX0 && reg <= Arm64Register::kX30) { + auto value = x[int(reg) - int(Arm64Register::kX0)]; + return hex ? string_util::to_hex_string(value) : std::to_string(value); + } else if (reg >= Arm64Register::kV0 && reg <= Arm64Register::kV31) { + auto value = v[int(reg) - int(Arm64Register::kV0)]; + return hex ? string_util::to_hex_string(value) : xe::to_string(value); + } else { + assert_unhandled_case(reg); + return std::string(); + } + } +#else + assert_always( + "HostThreadContext::GetStringFromValue not implemented for the target " + "CPU architecture"); + return std::string(); +#endif // XE_ARCH +} + +} // namespace xe diff --git a/src/xenia/base/x64_context.h b/src/xenia/base/host_thread_context.h similarity index 52% rename from src/xenia/base/x64_context.h rename to src/xenia/base/host_thread_context.h index c868e9ed8..554d09f44 100644 --- a/src/xenia/base/x64_context.h +++ b/src/xenia/base/host_thread_context.h @@ -2,13 +2,13 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * + * Copyright 2022 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ -#ifndef XENIA_BASE_X64_CONTEXT_H_ -#define XENIA_BASE_X64_CONTEXT_H_ +#ifndef XENIA_BASE_HOST_THREAD_CONTEXT_H_ +#define XENIA_BASE_HOST_THREAD_CONTEXT_H_ #include #include @@ -22,15 +22,18 @@ namespace xe { -class X64Context; +// NOTE: The order of the registers in the enumerations must match the order in +// the string table in host_thread_context.cc, as well as remapping tables in +// exception handler implementations. -#if XE_ARCH_AMD64 enum class X64Register { - // NOTE: this order matches 1:1 with the order in the X64Context. - // NOTE: this order matches 1:1 with a string table in the x64_context.cc. kRip, kEflags, - kRax, + + kIntRegisterFirst, + // The order matches the indices in the instruction encoding, as well as the + // Windows CONTEXT structure. + kRax = kIntRegisterFirst, kRcx, kRdx, kRbx, @@ -46,6 +49,8 @@ enum class X64Register { kR13, kR14, kR15, + kIntRegisterLast = kR15, + kXmm0, kXmm1, kXmm2, @@ -64,8 +69,91 @@ enum class X64Register { kXmm15, }; -class X64Context { +enum class Arm64Register { + kX0, + kX1, + kX2, + kX3, + kX4, + kX5, + kX6, + kX7, + kX8, + kX9, + kX10, + kX11, + kX12, + kX13, + kX14, + kX15, + kX16, + kX17, + kX18, + kX19, + kX20, + kX21, + kX22, + kX23, + kX24, + kX25, + kX26, + kX27, + kX28, + // FP (frame pointer). + kX29, + // LR (link register). + kX30, + kSp, + kPc, + kPstate, + kFpsr, + kFpcr, + // The whole 128 bits of a Vn register are also known as Qn (quadword). + kV0, + kV1, + kV2, + kV3, + kV4, + kV5, + kV6, + kV7, + kV8, + kV9, + kV10, + kV11, + kV12, + kV13, + kV14, + kV15, + kV16, + kV17, + kV18, + kV19, + kV20, + kV21, + kV22, + kV23, + kV24, + kV25, + kV26, + kV27, + kV28, + kV29, + kV30, + kV31, +}; + +#if XE_ARCH_AMD64 +using HostRegister = X64Register; +#elif XE_ARCH_ARM64 +using HostRegister = Arm64Register; +#else +enum class HostRegister {}; +#endif // XE_ARCH + +class HostThreadContext { public: +#if XE_ARCH_AMD64 uint64_t rip; uint32_t eflags; union { @@ -89,7 +177,6 @@ class X64Context { }; uint64_t int_registers[16]; }; - union { struct { vec128_t xmm0; @@ -111,12 +198,19 @@ class X64Context { }; vec128_t xmm_registers[16]; }; +#elif XE_ARCH_ARM64 + uint64_t x[31]; + uint64_t sp; + uint64_t pc; + uint64_t pstate; + uint32_t fpsr; + uint32_t fpcr; + vec128_t v[32]; +#endif // XE_ARCH - static const char* GetRegisterName(X64Register reg); - std::string GetStringFromValue(X64Register reg, bool hex) const; - void SetValueFromString(X64Register reg, std::string value, bool hex); + static const char* GetRegisterName(HostRegister reg); + std::string GetStringFromValue(HostRegister reg, bool hex) const; }; -#endif // XE_ARCH_AMD64 } // namespace xe diff --git a/src/xenia/base/x64_context.cc b/src/xenia/base/x64_context.cc deleted file mode 100644 index fc6027aeb..000000000 --- a/src/xenia/base/x64_context.cc +++ /dev/null @@ -1,67 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/base/x64_context.h" - -#include "xenia/base/assert.h" -#include "xenia/base/platform.h" -#include "xenia/base/string_util.h" - -namespace xe { - -#if XE_ARCH_AMD64 - -// NOTE: this order matches 1:1 with the X64Register enum. -static const char* kRegisterNames[] = { - "rip", "eflags", "rax", "rcx", "rdx", "rbx", "rsp", - "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", - "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", - "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", - "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", -}; - -const char* X64Context::GetRegisterName(X64Register reg) { - return kRegisterNames[static_cast(reg)]; -} - -std::string X64Context::GetStringFromValue(X64Register reg, bool hex) const { - switch (reg) { - case X64Register::kRip: - return hex ? string_util::to_hex_string(rip) : std::to_string(rip); - case X64Register::kEflags: - return hex ? string_util::to_hex_string(eflags) : std::to_string(eflags); - default: - if (static_cast(reg) >= static_cast(X64Register::kRax) && - static_cast(reg) <= static_cast(X64Register::kR15)) { - auto value = int_registers[static_cast(reg) - - static_cast(X64Register::kRax)]; - return hex ? string_util::to_hex_string(value) : std::to_string(value); - } else if (static_cast(reg) >= - static_cast(X64Register::kXmm0) && - static_cast(reg) <= - static_cast(X64Register::kXmm15)) { - auto value = xmm_registers[static_cast(reg) - - static_cast(X64Register::kXmm0)]; - return hex ? string_util::to_hex_string(value) : xe::to_string(value); - } else { - assert_unhandled_case(reg); - return ""; - } - } -} - -void X64Context::SetValueFromString(X64Register reg, std::string value, - bool hex) { - // TODO(benvanik): set value from string. - assert_always(false); -} - -#endif // XE_ARCH_AMD64 - -} // namespace xe diff --git a/src/xenia/cpu/backend/null_backend.cc b/src/xenia/cpu/backend/null_backend.cc new file mode 100644 index 000000000..e401b5f41 --- /dev/null +++ b/src/xenia/cpu/backend/null_backend.cc @@ -0,0 +1,36 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/cpu/backend/null_backend.h" + +#include "xenia/cpu/backend/assembler.h" +#include "xenia/cpu/function.h" + +namespace xe { +namespace cpu { +namespace backend { + +void NullBackend::CommitExecutableRange(uint32_t guest_low, + uint32_t guest_high) {} + +std::unique_ptr NullBackend::CreateAssembler() { return nullptr; } + +std::unique_ptr NullBackend::CreateGuestFunction( + Module* module, uint32_t address) { + return nullptr; +} + +uint64_t NullBackend::CalculateNextHostInstruction(ThreadDebugInfo* thread_info, + uint64_t current_pc) { + return current_pc; +} + +} // namespace backend +} // namespace cpu +} // namespace xe diff --git a/src/xenia/cpu/backend/null_backend.h b/src/xenia/cpu/backend/null_backend.h new file mode 100644 index 000000000..957132269 --- /dev/null +++ b/src/xenia/cpu/backend/null_backend.h @@ -0,0 +1,36 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_CPU_BACKEND_NULL_BACKEND_H_ +#define XENIA_CPU_BACKEND_NULL_BACKEND_H_ + +#include "xenia/cpu/backend/backend.h" + +namespace xe { +namespace cpu { +namespace backend { + +class NullBackend : public Backend { + public: + void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high) override; + + std::unique_ptr CreateAssembler() override; + + std::unique_ptr CreateGuestFunction(Module* module, + uint32_t address) override; + + uint64_t CalculateNextHostInstruction(ThreadDebugInfo* thread_info, + uint64_t current_pc) override; +}; + +} // namespace backend +} // namespace cpu +} // namespace xe + +#endif // XENIA_CPU_BACKEND_NULL_BACKEND_H_ diff --git a/src/xenia/cpu/backend/x64/x64_backend.cc b/src/xenia/cpu/backend/x64/x64_backend.cc index 1da4ba9f3..31e1dc9fd 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.cc +++ b/src/xenia/cpu/backend/x64/x64_backend.cc @@ -163,7 +163,7 @@ std::unique_ptr X64Backend::CreateGuestFunction( return std::make_unique(module, address); } -uint64_t ReadCapstoneReg(X64Context* context, x86_reg reg) { +uint64_t ReadCapstoneReg(HostThreadContext* context, x86_reg reg) { switch (reg) { case X86_REG_RAX: return context->rax; diff --git a/src/xenia/cpu/backend/x64/x64_backend.h b/src/xenia/cpu/backend/x64/x64_backend.h index 4cb69e040..470988806 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.h +++ b/src/xenia/cpu/backend/x64/x64_backend.h @@ -27,8 +27,6 @@ namespace x64 { class X64CodeCache; -#define XENIA_HAS_X64_BACKEND 1 - typedef void* (*HostToGuestThunk)(void* target, void* arg0, void* arg1); typedef void* (*GuestToHostThunk)(void* target, void* arg0, void* arg1); typedef void (*ResolveFunctionThunk)(); diff --git a/src/xenia/cpu/hir/value.cc b/src/xenia/cpu/hir/value.cc index 4358687c4..999182e35 100644 --- a/src/xenia/cpu/hir/value.cc +++ b/src/xenia/cpu/hir/value.cc @@ -1414,14 +1414,17 @@ void Value::DotProduct3(Value* other) { assert_true(this->type == VEC128_TYPE && other->type == VEC128_TYPE); switch (type) { case VEC128_TYPE: { - alignas(16) float result[4]; - __m128 src1 = _mm_load_ps(constant.v128.f32); - __m128 src2 = _mm_load_ps(other->constant.v128.f32); - __m128 dest = _mm_dp_ps(src1, src2, 0b01110001); - _mm_store_ps(result, dest); // TODO(rick): is this sane? type = FLOAT32_TYPE; - constant.f32 = result[0]; + // Using x86 DPPS ordering for consistency with x86-64 code generation: + // (X1 * X2 + Y1 * Y2) + (Z1 * Z2 + 0.0f) + // (+ 0.0f for zero sign, as zero imm8[4:7] bits result in zero terms, + // not in complete exclusion of them) + // TODO(Triang3l): NaN on overflow. + constant.f32 = + (constant.v128.f32[0] * other->constant.v128.f32[0] + + constant.v128.f32[1] * other->constant.v128.f32[1]) + + (constant.v128.f32[2] * other->constant.v128.f32[2] + 0.0f); } break; default: assert_unhandled_case(type); @@ -1433,14 +1436,15 @@ void Value::DotProduct4(Value* other) { assert_true(this->type == VEC128_TYPE && other->type == VEC128_TYPE); switch (type) { case VEC128_TYPE: { - alignas(16) float result[4]; - __m128 src1 = _mm_load_ps(constant.v128.f32); - __m128 src2 = _mm_load_ps(other->constant.v128.f32); - __m128 dest = _mm_dp_ps(src1, src2, 0b11110001); - _mm_store_ps(result, dest); // TODO(rick): is this sane? type = FLOAT32_TYPE; - constant.f32 = result[0]; + // Using x86 DPPS ordering for consistency with x86-64 code generation: + // (X1 * X2 + Y1 * Y2) + (Z1 * Z2 + W1 * W2) + // TODO(Triang3l): NaN on overflow. + constant.f32 = (constant.v128.f32[0] * other->constant.v128.f32[0] + + constant.v128.f32[1] * other->constant.v128.f32[1]) + + (constant.v128.f32[2] * other->constant.v128.f32[2] + + constant.v128.f32[3] * other->constant.v128.f32[3]); } break; default: assert_unhandled_case(type); diff --git a/src/xenia/cpu/mmio_handler.cc b/src/xenia/cpu/mmio_handler.cc index 3bcefb6e3..eb28703d1 100644 --- a/src/xenia/cpu/mmio_handler.cc +++ b/src/xenia/cpu/mmio_handler.cc @@ -18,6 +18,7 @@ #include "xenia/base/exception_handler.h" #include "xenia/base/logging.h" #include "xenia/base/memory.h" +#include "xenia/base/platform.h" namespace xe { namespace cpu { @@ -114,28 +115,10 @@ bool MMIOHandler::CheckStore(uint32_t virtual_address, uint32_t value) { return false; } -struct DecodedMov { - size_t length; - // Inidicates this is a load (or conversely a store). - bool is_load; - // Indicates the memory must be swapped. - bool byte_swap; - // Source (for store) or target (for load) register. - // AX CX DX BX SP BP SI DI // REX.R=0 - // R8 R9 R10 R11 R12 R13 R14 R15 // REX.R=1 - uint32_t value_reg; - // [base + (index * scale) + displacement] - bool mem_has_base; - uint8_t mem_base_reg; - bool mem_has_index; - uint8_t mem_index_reg; - uint8_t mem_scale; - int32_t mem_displacement; - bool is_constant; - int32_t constant; -}; - -bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) { +bool MMIOHandler::TryDecodeLoadStore(const uint8_t* p, + DecodedLoadStore& decoded_out) { + std::memset(&decoded_out, 0, sizeof(decoded_out)); +#if XE_ARCH_AMD64 uint8_t i = 0; // Current byte decode index. uint8_t rex = 0; if ((p[i] & 0xF0) == 0x40) { @@ -148,8 +131,8 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) { // 44 0f 38 f1 a4 02 00 movbe DWORD PTR [rdx+rax*1+0x0],r12d // 42 0f 38 f1 8c 22 00 movbe DWORD PTR [rdx+r12*1+0x0],ecx // 0f 38 f1 8c 02 00 00 movbe DWORD PTR [rdx + rax * 1 + 0x0], ecx - mov->is_load = false; - mov->byte_swap = true; + decoded_out.is_load = false; + decoded_out.byte_swap = true; i += 3; } else if (p[i] == 0x0F && p[i + 1] == 0x38 && p[i + 2] == 0xF0) { // MOVBE r32, m32 (load) @@ -159,8 +142,8 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) { // 46 0f 38 f0 a4 22 00 movbe r12d,DWORD PTR [rdx+r12*1+0x0] // 0f 38 f0 8c 02 00 00 movbe ecx,DWORD PTR [rdx+rax*1+0x0] // 0F 38 F0 1C 02 movbe ebx,dword ptr [rdx+rax] - mov->is_load = true; - mov->byte_swap = true; + decoded_out.is_load = true; + decoded_out.byte_swap = true; i += 3; } else if (p[i] == 0x89) { // MOV m32, r32 (store) @@ -168,8 +151,8 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) { // 44 89 24 02 mov DWORD PTR[rdx + rax * 1], r12d // 42 89 0c 22 mov DWORD PTR[rdx + r12 * 1], ecx // 89 0c 02 mov DWORD PTR[rdx + rax * 1], ecx - mov->is_load = false; - mov->byte_swap = false; + decoded_out.is_load = false; + decoded_out.byte_swap = false; ++i; } else if (p[i] == 0x8B) { // MOV r32, m32 (load) @@ -178,16 +161,16 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) { // 42 8b 0c 22 mov ecx, DWORD PTR[rdx + r12 * 1] // 46 8b 24 22 mov r12d, DWORD PTR[rdx + r12 * 1] // 8b 0c 02 mov ecx, DWORD PTR[rdx + rax * 1] - mov->is_load = true; - mov->byte_swap = false; + decoded_out.is_load = true; + decoded_out.byte_swap = false; ++i; } else if (p[i] == 0xC7) { // MOV m32, simm32 // https://web.archive.org/web/20161017042413/https://www.asmpedia.org/index.php?title=MOV // C7 04 02 02 00 00 00 mov dword ptr [rdx+rax],2 - mov->is_load = false; - mov->byte_swap = false; - mov->is_constant = true; + decoded_out.is_load = false; + decoded_out.byte_swap = false; + decoded_out.is_constant = true; ++i; } else { return false; @@ -204,13 +187,13 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) { uint8_t mod = (modrm & 0b11000000) >> 6; uint8_t reg = (modrm & 0b00111000) >> 3; uint8_t rm = (modrm & 0b00000111); - mov->value_reg = reg + (rex_r ? 8 : 0); - mov->mem_has_base = false; - mov->mem_base_reg = 0; - mov->mem_has_index = false; - mov->mem_index_reg = 0; - mov->mem_scale = 1; - mov->mem_displacement = 0; + decoded_out.value_reg = reg + (rex_r ? 8 : 0); + decoded_out.mem_has_base = false; + decoded_out.mem_base_reg = 0; + decoded_out.mem_has_index = false; + decoded_out.mem_index_reg = 0; + decoded_out.mem_scale = 1; + decoded_out.mem_displacement = 0; bool has_sib = false; switch (rm) { case 0b100: // SIB @@ -221,17 +204,17 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) { // RIP-relative not supported. return false; } - mov->mem_has_base = true; - mov->mem_base_reg = rm + (rex_b ? 8 : 0); + decoded_out.mem_has_base = true; + decoded_out.mem_base_reg = rm + (rex_b ? 8 : 0); break; default: - mov->mem_has_base = true; - mov->mem_base_reg = rm + (rex_b ? 8 : 0); + decoded_out.mem_has_base = true; + decoded_out.mem_base_reg = rm + (rex_b ? 8 : 0); break; } if (has_sib) { uint8_t sib = p[i++]; - mov->mem_scale = 1 << ((sib & 0b11000000) >> 8); + decoded_out.mem_scale = 1 << ((sib & 0b11000000) >> 8); uint8_t sib_index = (sib & 0b00111000) >> 3; uint8_t sib_base = (sib & 0b00000111); switch (sib_index) { @@ -239,8 +222,9 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) { // No index. break; default: - mov->mem_has_index = true; - mov->mem_index_reg = sib_index + (rex_x ? 8 : 0); + decoded_out.mem_has_index = true; + decoded_out.mem_index_reg = sib_index + (rex_x ? 8 : 0); + decoded_out.mem_index_size = sizeof(uint64_t); break; } switch (sib_base) { @@ -249,29 +233,162 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) { assert_zero(mod); return false; default: - mov->mem_has_base = true; - mov->mem_base_reg = sib_base + (rex_b ? 8 : 0); + decoded_out.mem_has_base = true; + decoded_out.mem_base_reg = sib_base + (rex_b ? 8 : 0); break; } } switch (mod) { case 0b00: { - mov->mem_displacement += 0; + decoded_out.mem_displacement += 0; } break; case 0b01: { - mov->mem_displacement += int8_t(p[i++]); + decoded_out.mem_displacement += int8_t(p[i++]); } break; case 0b10: { - mov->mem_displacement += xe::load(p + i); + decoded_out.mem_displacement += xe::load(p + i); i += 4; } break; } - if (mov->is_constant) { - mov->constant = xe::load(p + i); + if (decoded_out.is_constant) { + decoded_out.constant = xe::load(p + i); i += 4; } - mov->length = i; + decoded_out.length = i; return true; + +#elif XE_ARCH_ARM64 + decoded_out.length = sizeof(uint32_t); + uint32_t instruction = *reinterpret_cast(p); + + // Literal loading (PC-relative) is not handled. + + if ((instruction & kArm64LoadStoreAnyFMask) != kArm64LoadStoreAnyFixed) { + // Not a load or a store instruction. + return false; + } + + if ((instruction & kArm64LoadStorePairAnyFMask) == + kArm64LoadStorePairAnyFixed) { + // Handling MMIO only for single 32-bit values, not for pairs. + return false; + } + + uint8_t value_reg_base; + switch (Arm64LoadStoreOp(instruction & kArm64LoadStoreMask)) { + case Arm64LoadStoreOp::kSTR_w: + decoded_out.is_load = false; + value_reg_base = DecodedLoadStore::kArm64ValueRegX0; + break; + case Arm64LoadStoreOp::kLDR_w: + decoded_out.is_load = true; + value_reg_base = DecodedLoadStore::kArm64ValueRegX0; + break; + case Arm64LoadStoreOp::kSTR_s: + decoded_out.is_load = false; + value_reg_base = DecodedLoadStore::kArm64ValueRegV0; + break; + case Arm64LoadStoreOp::kLDR_s: + decoded_out.is_load = true; + value_reg_base = DecodedLoadStore::kArm64ValueRegV0; + break; + default: + return false; + } + + // `Rt` field (load / store register). + decoded_out.value_reg = value_reg_base + (instruction & 31); + if (decoded_out.is_load && + decoded_out.value_reg == DecodedLoadStore::kArm64ValueRegZero) { + // Zero constant rather than a register read. + decoded_out.is_constant = true; + decoded_out.constant = 0; + } + + decoded_out.mem_has_base = true; + // The base is Xn (for 0...30) or SP (for 31). + // `Rn` field (first source register). + decoded_out.mem_base_reg = (instruction >> 5) & 31; + + bool is_unsigned_offset = + (instruction & kArm64LoadStoreUnsignedOffsetFMask) == + kArm64LoadStoreUnsignedOffsetFixed; + if (is_unsigned_offset) { + // LDR|STR Wt|St, [Xn|SP{, #pimm}] + // pimm (positive immediate) is scaled by the size of the data (4 for + // words). + // `ImmLSUnsigned` field. + uint32_t unsigned_offset = (instruction >> 10) & 4095; + decoded_out.mem_displacement = + ptrdiff_t(sizeof(uint32_t) * unsigned_offset); + } else { + Arm64LoadStoreOffsetFixed offset = + Arm64LoadStoreOffsetFixed(instruction & kArm64LoadStoreOffsetFMask); + // simm (signed immediate) is not scaled. + // Only applicable to kUnscaledOffset, kPostIndex and kPreIndex. + // `ImmLS` field. + int32_t signed_offset = int32_t(instruction << (32 - (9 + 12))) >> (32 - 9); + // For both post- and pre-indexing, the new address is written to the + // register after the data register write, thus if Xt and Xn are the same, + // the final value in the register will be the new address. + // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate-- + switch (offset) { + case Arm64LoadStoreOffsetFixed::kUnscaledOffset: { + // LDUR|STUR Wt|St, [Xn|SP{, #simm}] + decoded_out.mem_displacement = signed_offset; + } break; + case Arm64LoadStoreOffsetFixed::kPostIndex: { + // LDR|STR Wt|St, [Xn|SP], #simm + decoded_out.mem_base_writeback = true; + decoded_out.mem_base_writeback_offset = signed_offset; + } break; + case Arm64LoadStoreOffsetFixed::kPreIndex: { + // LDR|STR Wt|St, [Xn|SP, #simm]! + decoded_out.mem_base_writeback = true; + decoded_out.mem_base_writeback_offset = signed_offset; + decoded_out.mem_displacement = signed_offset; + } break; + case Arm64LoadStoreOffsetFixed::kRegisterOffset: { + // LDR|STR Wt|St, [Xn|SP, (Wm|Xm){, extend {amount}}] + // `Rm` field. + decoded_out.mem_index_reg = (instruction >> 16) & 31; + if (decoded_out.mem_index_reg != DecodedLoadStore::kArm64RegZero) { + decoded_out.mem_has_index = true; + // Allowed extend types in the `option` field are UXTW (0b010), LSL + // (0b011 - identical to UXTX), SXTW (0b110), SXTX (0b111). + // The shift (0 or 2 for 32-bit LDR/STR) can be applied regardless of + // the extend type ("LSL" is just a term for assembly readability, + // internally it's treated simply as UXTX). + // If bit 0 of the `option` field is 0 (UXTW, SXTW), the index + // register is treated as 32-bit (Wm) extended to 64-bit. If it's 1 + // (LSL aka UXTX, SXTX), the index register is treated as 64-bit (Xm). + // `ExtendMode` (`option`) field. + uint32_t extend_mode = (instruction >> 13) & 0b111; + if (!(extend_mode & 0b010)) { + // Sub-word index - undefined. + return false; + } + decoded_out.mem_index_size = + (extend_mode & 0b001) ? sizeof(uint64_t) : sizeof(uint32_t); + decoded_out.mem_index_sign_extend = (extend_mode & 0b100) != 0; + // Shift is either 0 or log2(sizeof(load or store size)). + // Supporting MMIO only for 4-byte words. + // `ImmShiftLS` field. + decoded_out.mem_scale = + (instruction & (UINT32_C(1) << 12)) ? sizeof(uint32_t) : 1; + } + } break; + default: + return false; + } + } + + return true; + +#else +#error TryDecodeLoadStore not implemented for the target CPU architecture. + return false; +#endif // XE_ARCH } bool MMIOHandler::ExceptionCallbackThunk(Exception* ex, void* data) { @@ -300,11 +417,13 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) { // Access violations are pretty rare, so we can do a linear search here. // Only check if in the virtual range, as we only support virtual ranges. const MMIORange* range = nullptr; + uint32_t fault_guest_virtual_address = 0; if (ex->fault_address() < uint64_t(physical_membase_)) { - uint32_t fault_virtual_address = host_to_guest_virtual_( + fault_guest_virtual_address = host_to_guest_virtual_( host_to_guest_virtual_context_, fault_host_address); for (const auto& test_range : mapped_ranges_) { - if ((fault_virtual_address & test_range.mask) == test_range.address) { + if ((fault_guest_virtual_address & test_range.mask) == + test_range.address) { // Address is within the range of this mapping. range = &test_range; break; @@ -336,44 +455,114 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) { auto rip = ex->pc(); auto p = reinterpret_cast(rip); - DecodedMov mov = {0}; - bool decoded = TryDecodeMov(p, &mov); - if (!decoded) { - XELOGE("Unable to decode MMIO mov at {}", p); + DecodedLoadStore decoded_load_store; + if (!TryDecodeLoadStore(p, decoded_load_store)) { + XELOGE("Unable to decode MMIO load or store instruction at {}", p); assert_always("Unknown MMIO instruction type"); return false; } - if (mov.is_load) { + HostThreadContext& thread_context = *ex->thread_context(); + +#if XE_ARCH_ARM64 + // Preserve the base address with the pre- or the post-index offset to write + // it after writing the result (since the base address register and the + // register to load to may be the same, in which case it should receive the + // original base address with the offset). + uintptr_t mem_base_writeback_address = 0; + if (decoded_load_store.mem_has_base && + decoded_load_store.mem_base_writeback) { + if (decoded_load_store.mem_base_reg == + DecodedLoadStore::kArm64MemBaseRegSp) { + mem_base_writeback_address = thread_context.sp; + } else { + assert_true(decoded_load_store.mem_base_reg <= 30); + mem_base_writeback_address = + thread_context.x[decoded_load_store.mem_base_reg]; + } + mem_base_writeback_address += decoded_load_store.mem_base_writeback_offset; + } +#endif // XE_ARCH_ARM64 + + uint8_t value_reg = decoded_load_store.value_reg; + if (decoded_load_store.is_load) { // Load of a memory value - read from range, swap, and store in the // register. uint32_t value = range->read(nullptr, range->callback_context, - static_cast(ex->fault_address())); - uint64_t* reg_ptr = &ex->thread_context()->int_registers[mov.value_reg]; - if (!mov.byte_swap) { + fault_guest_virtual_address); + if (!decoded_load_store.byte_swap) { // We swap only if it's not a movbe, as otherwise we are swapping twice. value = xe::byte_swap(value); } - *reg_ptr = value; +#if XE_ARCH_AMD64 + ex->ModifyIntRegister(value_reg) = value; +#elif XE_ARCH_ARM64 + if (value_reg >= DecodedLoadStore::kArm64ValueRegX0 && + value_reg <= (DecodedLoadStore::kArm64ValueRegX0 + 30)) { + ex->ModifyXRegister(value_reg - DecodedLoadStore::kArm64ValueRegX0) = + value; + } else if (value_reg >= DecodedLoadStore::kArm64ValueRegV0 && + value_reg <= (DecodedLoadStore::kArm64ValueRegV0 + 31)) { + ex->ModifyVRegister(value_reg - DecodedLoadStore::kArm64ValueRegV0) + .u32[0] = value; + } else { + assert_true(value_reg == DecodedLoadStore::kArm64ValueRegZero); + // Register write is ignored for X31. + } +#else +#error Register value writing not implemented for the target CPU architecture. +#endif // XE_ARCH } else { // Store of a register value - read register, swap, write to range. - int32_t value; - if (mov.is_constant) { - value = uint32_t(mov.constant); + uint32_t value; + if (decoded_load_store.is_constant) { + value = uint32_t(decoded_load_store.constant); } else { - uint64_t* reg_ptr = &ex->thread_context()->int_registers[mov.value_reg]; - value = static_cast(*reg_ptr); - if (!mov.byte_swap) { +#if XE_ARCH_AMD64 + value = uint32_t(thread_context.int_registers[value_reg]); +#elif XE_ARCH_ARM64 + if (value_reg >= DecodedLoadStore::kArm64ValueRegX0 && + value_reg <= (DecodedLoadStore::kArm64ValueRegX0 + 30)) { + value = uint32_t( + thread_context.x[value_reg - DecodedLoadStore::kArm64ValueRegX0]); + } else if (value_reg >= DecodedLoadStore::kArm64ValueRegV0 && + value_reg <= (DecodedLoadStore::kArm64ValueRegV0 + 31)) { + value = thread_context.v[value_reg - DecodedLoadStore::kArm64ValueRegV0] + .u32[0]; + } else { + assert_true(value_reg == DecodedLoadStore::kArm64ValueRegZero); + value = 0; + } +#else +#error Register value reading not implemented for the target CPU architecture. +#endif // XE_ARCH + if (!decoded_load_store.byte_swap) { // We swap only if it's not a movbe, as otherwise we are swapping twice. - value = xe::byte_swap(static_cast(value)); + value = xe::byte_swap(value); } } - range->write(nullptr, range->callback_context, - static_cast(ex->fault_address()), value); + range->write(nullptr, range->callback_context, fault_guest_virtual_address, + value); } +#if XE_ARCH_ARM64 + // Write the base address with the pre- or the post-index offset, overwriting + // the register to load to if it's the same. + if (decoded_load_store.mem_has_base && + decoded_load_store.mem_base_writeback) { + if (decoded_load_store.mem_base_reg == + DecodedLoadStore::kArm64MemBaseRegSp) { + thread_context.sp = mem_base_writeback_address; + } else { + assert_true(decoded_load_store.mem_base_reg <= 30); + ex->ModifyXRegister(decoded_load_store.mem_base_reg) = + mem_base_writeback_address; + } + } +#endif // XE_ARCH_ARM64 + // Advance RIP to the next instruction so that we resume properly. - ex->set_resume_pc(rip + mov.length); + ex->set_resume_pc(rip + decoded_load_store.length); return true; } diff --git a/src/xenia/cpu/mmio_handler.h b/src/xenia/cpu/mmio_handler.h index fdf202e1c..6240544e0 100644 --- a/src/xenia/cpu/mmio_handler.h +++ b/src/xenia/cpu/mmio_handler.h @@ -15,10 +15,11 @@ #include #include "xenia/base/mutex.h" +#include "xenia/base/platform.h" namespace xe { class Exception; -class X64Context; +class HostThreadContext; } // namespace xe namespace xe { @@ -93,6 +94,61 @@ class MMIOHandler { static MMIOHandler* global_handler_; xe::global_critical_region global_critical_region_; + + private: + struct DecodedLoadStore { + // Matches the Xn/Wn register number for 0 reads and ignored writes in many + // usage cases. + static constexpr uint8_t kArm64RegZero = 31; + + // Matches the actual register number encoding for an SP base in AArch64 + // load and store instructions. + static constexpr uint8_t kArm64MemBaseRegSp = kArm64RegZero; + + static constexpr uint8_t kArm64ValueRegX0 = 0; + static constexpr uint8_t kArm64ValueRegZero = + kArm64ValueRegX0 + kArm64RegZero; + static constexpr uint8_t kArm64ValueRegV0 = 32; + + size_t length; + // Inidicates this is a load (or conversely a store). + bool is_load; + // Indicates the memory must be swapped. + bool byte_swap; + // Source (for store) or target (for load) register. + // For x86-64: + // AX CX DX BX SP BP SI DI // REX.R=0 + // R8 R9 R10 R11 R12 R13 R14 R15 // REX.R=1 + // For AArch64: + // - kArm64ValueRegX0 + [0...30]: Xn (Wn for 32 bits - upper 32 bits of Xn + // are zeroed on Wn write). + // - kArm64ValueRegZero: Zero constant for register read, ignored register + // write (though memory must still be accessed - a MMIO load may have side + // effects even if the result is discarded). + // - kArm64ValueRegV0 + [0...31]: Vn (Sn for 32 bits). + uint8_t value_reg; + // [base + (index * scale) + displacement] + bool mem_has_base; + // On AArch64, if mem_base_reg is kArm64MemBaseRegSp, the base register is + // SP, not Xn. + uint8_t mem_base_reg; + // For AArch64 pre- and post-indexing. In case of a load, the base register + // is written back after the loaded data is written to the register, + // overwriting the value register if it's the same. + bool mem_base_writeback; + int32_t mem_base_writeback_offset; + bool mem_has_index; + uint8_t mem_index_reg; + uint8_t mem_index_size; + bool mem_index_sign_extend; + uint8_t mem_scale; + ptrdiff_t mem_displacement; + bool is_constant; + int32_t constant; + }; + + static bool TryDecodeLoadStore(const uint8_t* p, + DecodedLoadStore& decoded_out); }; } // namespace cpu diff --git a/src/xenia/cpu/ppc/testing/ppc_testing_main.cc b/src/xenia/cpu/ppc/testing/ppc_testing_main.cc index a39c41bd1..0d54261c4 100644 --- a/src/xenia/cpu/ppc/testing/ppc_testing_main.cc +++ b/src/xenia/cpu/ppc/testing/ppc_testing_main.cc @@ -15,13 +15,16 @@ #include "xenia/base/math.h" #include "xenia/base/platform.h" #include "xenia/base/string_buffer.h" -#include "xenia/cpu/backend/x64/x64_backend.h" #include "xenia/cpu/cpu_flags.h" #include "xenia/cpu/ppc/ppc_context.h" #include "xenia/cpu/ppc/ppc_frontend.h" #include "xenia/cpu/processor.h" #include "xenia/cpu/raw_module.h" +#if XE_ARCH_AMD64 +#include "xenia/cpu/backend/x64/x64_backend.h" +#endif // XE_ARCH + #if XE_COMPILER_MSVC #include "xenia/base/platform_win.h" #endif // XE_COMPILER_MSVC @@ -196,17 +199,17 @@ class TestRunner { std::unique_ptr backend; if (!backend) { -#if defined(XENIA_HAS_X64_BACKEND) && XENIA_HAS_X64_BACKEND +#if XE_ARCH_AMD64 if (cvars::cpu == "x64") { backend.reset(new xe::cpu::backend::x64::X64Backend()); } -#endif // XENIA_HAS_X64_BACKEND +#endif // XE_ARCH if (cvars::cpu == "any") { -#if defined(XENIA_HAS_X64_BACKEND) && XENIA_HAS_X64_BACKEND if (!backend) { +#if XE_ARCH_AMD64 backend.reset(new xe::cpu::backend::x64::X64Backend()); +#endif // XE_ARCH } -#endif // XENIA_HAS_X64_BACKEND } } diff --git a/src/xenia/cpu/ppc/testing/premake5.lua b/src/xenia/cpu/ppc/testing/premake5.lua index d91256460..bca2bb81e 100644 --- a/src/xenia/cpu/ppc/testing/premake5.lua +++ b/src/xenia/cpu/ppc/testing/premake5.lua @@ -11,7 +11,6 @@ project("xenia-cpu-ppc-tests") "fmt", "mspack", "xenia-core", - "xenia-cpu-backend-x64", "xenia-cpu", "xenia-base", }) @@ -24,6 +23,10 @@ project("xenia-cpu-ppc-tests") }) filter("files:*.s") flags({"ExcludeFromBuild"}) + filter("architecture:x86_64") + links({ + "xenia-cpu-backend-x64", + }) filter("platforms:Windows") debugdir(project_root) debugargs({ diff --git a/src/xenia/cpu/processor.cc b/src/xenia/cpu/processor.cc index ce625879d..6bd57b4f7 100644 --- a/src/xenia/cpu/processor.cc +++ b/src/xenia/cpu/processor.cc @@ -19,6 +19,7 @@ #include "xenia/base/literals.h" #include "xenia/base/logging.h" #include "xenia/base/memory.h" +#include "xenia/base/platform.h" #include "xenia/base/profiling.h" #include "xenia/base/threading.h" #include "xenia/cpu/breakpoint.h" @@ -133,7 +134,11 @@ bool Processor::Setup(std::unique_ptr backend) { // Stack walker is used when profiling, debugging, and dumping. // Note that creation may fail, in which case we'll have to disable those // features. - stack_walker_ = StackWalker::Create(backend_->code_cache()); + // The code cache may be unavailable in case of a "null" backend. + cpu::backend::CodeCache* code_cache = backend_->code_cache(); + if (code_cache) { + stack_walker_ = StackWalker::Create(code_cache); + } if (!stack_walker_) { // TODO(benvanik): disable features. if (cvars::debug) { @@ -698,7 +703,13 @@ bool Processor::OnThreadBreakpointHit(Exception* ex) { // Apply thread context changes. // TODO(benvanik): apply to all threads? +#if XE_ARCH_AMD64 ex->set_resume_pc(thread_info->host_context.rip); +#elif XE_ARCH_ARM64 + ex->set_resume_pc(thread_info->host_context.pc); +#else +#error Instruction pointer not specified for the target CPU architecture. +#endif // XE_ARCH // Resume execution. return true; @@ -828,8 +839,8 @@ bool Processor::ResumeAllThreads() { return true; } -void Processor::UpdateThreadExecutionStates(uint32_t override_thread_id, - X64Context* override_context) { +void Processor::UpdateThreadExecutionStates( + uint32_t override_thread_id, HostThreadContext* override_context) { auto global_lock = global_critical_region_.Acquire(); uint64_t frame_host_pcs[64]; xe::cpu::StackFrame cpu_frames[64]; @@ -851,7 +862,7 @@ void Processor::UpdateThreadExecutionStates(uint32_t override_thread_id, // Grab stack trace and X64 context then resolve all symbols. uint64_t hash; - X64Context* in_host_context = nullptr; + HostThreadContext* in_host_context = nullptr; if (override_thread_id == thread_info->thread_id) { // If we were passed an override context we use that. Otherwise, ask the // stack walker for a new context. diff --git a/src/xenia/cpu/processor.h b/src/xenia/cpu/processor.h index c0b956572..eaa958d3d 100644 --- a/src/xenia/cpu/processor.h +++ b/src/xenia/cpu/processor.h @@ -215,8 +215,9 @@ class Processor { // Updates all cached thread execution info (state, call stacks, etc). // The given override thread handle and context will be used in place of // sampled values for that thread. - void UpdateThreadExecutionStates(uint32_t override_handle = 0, - X64Context* override_context = nullptr); + void UpdateThreadExecutionStates( + uint32_t override_handle = 0, + HostThreadContext* override_context = nullptr); // Suspends all breakpoints, uninstalling them as required. // No breakpoints will be triggered until they are resumed. diff --git a/src/xenia/cpu/stack_walker.h b/src/xenia/cpu/stack_walker.h index 4dd4f44e9..3006c2887 100644 --- a/src/xenia/cpu/stack_walker.h +++ b/src/xenia/cpu/stack_walker.h @@ -13,7 +13,7 @@ #include #include -#include "xenia/base/x64_context.h" +#include "xenia/base/host_thread_context.h" #include "xenia/cpu/function.h" namespace xe { @@ -83,8 +83,8 @@ class StackWalker { virtual size_t CaptureStackTrace(void* thread_handle, uint64_t* frame_host_pcs, size_t frame_offset, size_t frame_count, - const X64Context* in_host_context, - X64Context* out_host_context, + const HostThreadContext* in_host_context, + HostThreadContext* out_host_context, uint64_t* out_stack_hash = nullptr) = 0; // Resolves symbol information for the given stack frames. diff --git a/src/xenia/cpu/stack_walker_win.cc b/src/xenia/cpu/stack_walker_win.cc index cbfa96023..aaaab140a 100644 --- a/src/xenia/cpu/stack_walker_win.cc +++ b/src/xenia/cpu/stack_walker_win.cc @@ -153,8 +153,8 @@ class Win32StackWalker : public StackWalker { size_t CaptureStackTrace(void* thread_handle, uint64_t* frame_host_pcs, size_t frame_offset, size_t frame_count, - const X64Context* in_host_context, - X64Context* out_host_context, + const HostThreadContext* in_host_context, + HostThreadContext* out_host_context, uint64_t* out_stack_hash) override { // TODO(benvanik): use xstate? // https://msdn.microsoft.com/en-us/library/windows/desktop/hh134240(v=vs.85).aspx diff --git a/src/xenia/cpu/thread_debug_info.h b/src/xenia/cpu/thread_debug_info.h index ffce6822c..5803880da 100644 --- a/src/xenia/cpu/thread_debug_info.h +++ b/src/xenia/cpu/thread_debug_info.h @@ -12,7 +12,7 @@ #include -#include "xenia/base/x64_context.h" +#include "xenia/base/host_thread_context.h" #include "xenia/cpu/thread.h" #include "xenia/cpu/thread_state.h" @@ -70,10 +70,10 @@ struct ThreadDebugInfo { // Last-sampled PPC context. // This is updated whenever the debugger stops. ppc::PPCContext guest_context; - // Last-sampled host x64 context. + // Last-sampled host context. // This is updated whenever the debugger stops and must be used instead of any // value taken from the StackWalker as it properly respects exception stacks. - X64Context host_context; + HostThreadContext host_context; // A single frame in a call stack. struct Frame { diff --git a/src/xenia/debug/ui/debug_window.cc b/src/xenia/debug/ui/debug_window.cc index bcbf7e042..0c03b7ebb 100644 --- a/src/xenia/debug/ui/debug_window.cc +++ b/src/xenia/debug/ui/debug_window.cc @@ -960,7 +960,7 @@ void DebugWindow::DrawRegistersPane() { auto reg = static_cast(i); ImGui::BeginGroup(); ImGui::AlignTextToFramePadding(); - ImGui::Text("%3s", X64Context::GetRegisterName(reg)); + ImGui::Text("%3s", HostThreadContext::GetRegisterName(reg)); ImGui::SameLine(); ImGui::Dummy(ImVec2(4, 0)); ImGui::SameLine(); @@ -985,7 +985,7 @@ void DebugWindow::DrawRegistersPane() { static_cast(static_cast(X64Register::kXmm0) + i); ImGui::BeginGroup(); ImGui::AlignTextToFramePadding(); - ImGui::Text("%5s", X64Context::GetRegisterName(reg)); + ImGui::Text("%5s", HostThreadContext::GetRegisterName(reg)); ImGui::SameLine(); ImGui::Dummy(ImVec2(4, 0)); ImGui::SameLine(); diff --git a/src/xenia/debug/ui/debug_window.h b/src/xenia/debug/ui/debug_window.h index be7294940..e3c01c54d 100644 --- a/src/xenia/debug/ui/debug_window.h +++ b/src/xenia/debug/ui/debug_window.h @@ -13,7 +13,7 @@ #include #include -#include "xenia/base/x64_context.h" +#include "xenia/base/host_thread_context.h" #include "xenia/cpu/breakpoint.h" #include "xenia/cpu/debug_listener.h" #include "xenia/cpu/processor.h" diff --git a/src/xenia/emulator.cc b/src/xenia/emulator.cc index c939e1133..e970a626a 100644 --- a/src/xenia/emulator.cc +++ b/src/xenia/emulator.cc @@ -24,9 +24,10 @@ #include "xenia/base/literals.h" #include "xenia/base/logging.h" #include "xenia/base/mapped_memory.h" +#include "xenia/base/platform.h" #include "xenia/base/string.h" #include "xenia/cpu/backend/code_cache.h" -#include "xenia/cpu/backend/x64/x64_backend.h" +#include "xenia/cpu/backend/null_backend.h" #include "xenia/cpu/cpu_flags.h" #include "xenia/cpu/thread_state.h" #include "xenia/gpu/graphics_system.h" @@ -50,6 +51,10 @@ #include "xenia/vfs/devices/null_device.h" #include "xenia/vfs/devices/stfs_container_device.h" +#if XE_ARCH_AMD64 +#include "xenia/cpu/backend/x64/x64_backend.h" +#endif // XE_ARCH + DEFINE_double(time_scalar, 1.0, "Scalar used to speed or slow time (1x, 2x, 1/2x, etc).", "General"); @@ -127,6 +132,7 @@ Emulator::~Emulator() { X_STATUS Emulator::Setup( ui::Window* display_window, ui::ImGuiDrawer* imgui_drawer, + bool require_cpu_backend, std::function(cpu::Processor*)> audio_system_factory, std::function()> @@ -160,19 +166,20 @@ X_STATUS Emulator::Setup( export_resolver_ = std::make_unique(); std::unique_ptr backend; - if (!backend) { -#if defined(XENIA_HAS_X64_BACKEND) && XENIA_HAS_X64_BACKEND - if (cvars::cpu == "x64") { +#if XE_ARCH_AMD64 + if (cvars::cpu == "x64") { + backend.reset(new xe::cpu::backend::x64::X64Backend()); + } +#endif // XE_ARCH + if (cvars::cpu == "any") { + if (!backend) { +#if XE_ARCH_AMD64 backend.reset(new xe::cpu::backend::x64::X64Backend()); +#endif // XE_ARCH } -#endif // XENIA_HAS_X64_BACKEND - if (cvars::cpu == "any") { -#if defined(XENIA_HAS_X64_BACKEND) && XENIA_HAS_X64_BACKEND - if (!backend) { - backend.reset(new xe::cpu::backend::x64::X64Backend()); - } -#endif // XENIA_HAS_X64_BACKEND - } + } + if (!backend && !require_cpu_backend) { + backend.reset(new xe::cpu::backend::NullBackend()); } // Initialize the CPU. diff --git a/src/xenia/emulator.h b/src/xenia/emulator.h index 3ce6ad7e6..6fad1d28b 100644 --- a/src/xenia/emulator.h +++ b/src/xenia/emulator.h @@ -165,6 +165,7 @@ class Emulator { // functions. X_STATUS Setup( ui::Window* display_window, ui::ImGuiDrawer* imgui_drawer, + bool require_cpu_backend, std::function(cpu::Processor*)> audio_system_factory, std::function()> diff --git a/src/xenia/gpu/d3d12/d3d12_render_target_cache.h b/src/xenia/gpu/d3d12/d3d12_render_target_cache.h index 6f823d34e..7e111480b 100644 --- a/src/xenia/gpu/d3d12/d3d12_render_target_cache.h +++ b/src/xenia/gpu/d3d12/d3d12_render_target_cache.h @@ -497,7 +497,7 @@ class D3D12RenderTargetCache final : public RenderTargetCache { TransferInvocation(const Transfer& transfer, const TransferShaderKey& shader_key) : transfer(transfer), shader_key(shader_key) {} - bool operator<(const TransferInvocation& other_invocation) { + bool operator<(const TransferInvocation& other_invocation) const { // TODO(Triang3l): See if it may be better to sort by the source in the // first place, especially when reading the same data multiple times (like // to write the stencil bits after depth) for better read locality. @@ -639,7 +639,7 @@ class D3D12RenderTargetCache final : public RenderTargetCache { DumpInvocation(const ResolveCopyDumpRectangle& rectangle, const DumpPipelineKey& pipeline_key) : rectangle(rectangle), pipeline_key(pipeline_key) {} - bool operator<(const DumpInvocation& other_invocation) { + bool operator<(const DumpInvocation& other_invocation) const { // Sort by the pipeline key primarily to reduce pipeline state (context) // switches. if (pipeline_key != other_invocation.pipeline_key) { diff --git a/src/xenia/gpu/d3d12/premake5.lua b/src/xenia/gpu/d3d12/premake5.lua index aa09f8b75..5bcf0efa2 100644 --- a/src/xenia/gpu/d3d12/premake5.lua +++ b/src/xenia/gpu/d3d12/premake5.lua @@ -30,7 +30,6 @@ project("xenia-gpu-d3d12-trace-viewer") "xenia-base", "xenia-core", "xenia-cpu", - "xenia-cpu-backend-x64", "xenia-gpu", "xenia-gpu-d3d12", "xenia-hid", @@ -68,6 +67,11 @@ project("xenia-gpu-d3d12-trace-viewer") }) end + filter("architecture:x86_64") + links({ + "xenia-cpu-backend-x64", + }) + group("src") project("xenia-gpu-d3d12-trace-dump") uuid("686b859c-0046-44c4-a02c-41fc3fb75698") @@ -79,7 +83,6 @@ project("xenia-gpu-d3d12-trace-dump") "xenia-base", "xenia-core", "xenia-cpu", - "xenia-cpu-backend-x64", "xenia-gpu", "xenia-gpu-d3d12", "xenia-hid", @@ -115,3 +118,8 @@ project("xenia-gpu-d3d12-trace-dump") "1>scratch/stdout-trace-dump.txt", }) end + + filter("architecture:x86_64") + links({ + "xenia-cpu-backend-x64", + }) diff --git a/src/xenia/gpu/primitive_processor.cc b/src/xenia/gpu/primitive_processor.cc index 5c91abaa8..3e32afa6d 100644 --- a/src/xenia/gpu/primitive_processor.cc +++ b/src/xenia/gpu/primitive_processor.cc @@ -942,7 +942,7 @@ void PrimitiveProcessor::Get16BitResetIndexUsage( is_ffff_simd = _mm_or_si128(is_ffff_simd, _mm_cmpeq_epi16(source_simd, ffff_simd)); #elif XE_ARCH_ARM64 - is_reset_simd = vcorrq_u16( + is_reset_simd = vorrq_u16( is_reset_simd, vceqq_u16(source_simd, reset_index_guest_endian_simd)); is_ffff_simd = vmaxq_u16(is_ffff_simd, source_simd); #else diff --git a/src/xenia/gpu/render_target_cache.cc b/src/xenia/gpu/render_target_cache.cc index b3bca4290..0b6c2d134 100644 --- a/src/xenia/gpu/render_target_cache.cc +++ b/src/xenia/gpu/render_target_cache.cc @@ -374,8 +374,14 @@ void RenderTargetCache::InitializeCommon() { RenderTargetKey(), RenderTargetKey())); } -void RenderTargetCache::ShutdownCommon() { +void RenderTargetCache::DestroyAllRenderTargets(bool shutting_down) { ownership_ranges_.clear(); + if (!shutting_down) { + ownership_ranges_.emplace( + std::piecewise_construct, std::forward_as_tuple(uint32_t(0)), + std::forward_as_tuple(xenos::kEdramTileCount, RenderTargetKey(), + RenderTargetKey(), RenderTargetKey())); + } for (const auto& render_target_pair : render_targets_) { if (render_target_pair.second) { @@ -385,6 +391,8 @@ void RenderTargetCache::ShutdownCommon() { render_targets_.clear(); } +void RenderTargetCache::ShutdownCommon() { DestroyAllRenderTargets(true); } + void RenderTargetCache::ClearCache() { // Keep only render targets currently owning any EDRAM data. if (!render_targets_.empty()) { diff --git a/src/xenia/gpu/render_target_cache.h b/src/xenia/gpu/render_target_cache.h index d794f66e7..48cfecd59 100644 --- a/src/xenia/gpu/render_target_cache.h +++ b/src/xenia/gpu/render_target_cache.h @@ -193,6 +193,10 @@ class RenderTargetCache { // Call last in implementation-specific initialization (when things like path // are initialized by the implementation). void InitializeCommon(); + // May be called from the destructor, or from the implementation shutdown to + // destroy all render targets before destroying what they depend on in the + // implementation. + void DestroyAllRenderTargets(bool shutting_down); // Call last in implementation-specific shutdown, also callable from the // destructor. void ShutdownCommon(); diff --git a/src/xenia/gpu/spirv_shader_translator.cc b/src/xenia/gpu/spirv_shader_translator.cc index cfbbd28e4..22c6c8a0a 100644 --- a/src/xenia/gpu/spirv_shader_translator.cc +++ b/src/xenia/gpu/spirv_shader_translator.cc @@ -75,9 +75,6 @@ SpirvShaderTranslator::Features::Features( } } -const std::string SpirvShaderTranslator::kInterpolatorNamePrefix = - "xe_interpolator_"; - SpirvShaderTranslator::SpirvShaderTranslator(const Features& features) : features_(features) {} @@ -164,6 +161,8 @@ void SpirvShaderTranslator::StartTranslation() { type_float2_ = builder_->makeVectorType(type_float_, 2); type_float3_ = builder_->makeVectorType(type_float_, 3); type_float4_ = builder_->makeVectorType(type_float_, 4); + type_interpolators_ = builder_->makeArrayType( + type_float4_, builder_->makeUintConstant(xenos::kMaxInterpolators), 0); const_int_0_ = builder_->makeIntConstant(0); id_vector_temp_.clear(); @@ -257,8 +256,9 @@ void SpirvShaderTranslator::StartTranslation() { "xe_uniform_system_constants"); builder_->addDecoration(uniform_system_constants_, spv::DecorationDescriptorSet, - kDescriptorSetSystemConstants); - builder_->addDecoration(uniform_system_constants_, spv::DecorationBinding, 0); + int(kDescriptorSetConstants)); + builder_->addDecoration(uniform_system_constants_, spv::DecorationBinding, + int(kConstantBufferSystem)); if (features_.spirv_version >= spv::Spv_1_4) { main_interface_.push_back(uniform_system_constants_); } @@ -285,12 +285,13 @@ void SpirvShaderTranslator::StartTranslation() { uniform_float_constants_ = builder_->createVariable( spv::NoPrecision, spv::StorageClassUniform, type_float_constants, "xe_uniform_float_constants"); + builder_->addDecoration(uniform_float_constants_, + spv::DecorationDescriptorSet, + int(kDescriptorSetConstants)); builder_->addDecoration( - uniform_float_constants_, spv::DecorationDescriptorSet, - int(is_pixel_shader() ? kDescriptorSetFloatConstantsPixel - : kDescriptorSetFloatConstantsVertex)); - builder_->addDecoration(uniform_float_constants_, spv::DecorationBinding, - 0); + uniform_float_constants_, spv::DecorationBinding, + int(is_pixel_shader() ? kConstantBufferFloatPixel + : kConstantBufferFloatVertex)); if (features_.spirv_version >= spv::Spv_1_4) { main_interface_.push_back(uniform_float_constants_); } @@ -326,9 +327,9 @@ void SpirvShaderTranslator::StartTranslation() { "xe_uniform_bool_loop_constants"); builder_->addDecoration(uniform_bool_loop_constants_, spv::DecorationDescriptorSet, - int(kDescriptorSetBoolLoopConstants)); + int(kDescriptorSetConstants)); builder_->addDecoration(uniform_bool_loop_constants_, spv::DecorationBinding, - 0); + int(kConstantBufferBoolLoop)); if (features_.spirv_version >= spv::Spv_1_4) { main_interface_.push_back(uniform_bool_loop_constants_); } @@ -352,8 +353,9 @@ void SpirvShaderTranslator::StartTranslation() { "xe_uniform_fetch_constants"); builder_->addDecoration(uniform_fetch_constants_, spv::DecorationDescriptorSet, - int(kDescriptorSetFetchConstants)); - builder_->addDecoration(uniform_fetch_constants_, spv::DecorationBinding, 0); + int(kDescriptorSetConstants)); + builder_->addDecoration(uniform_fetch_constants_, spv::DecorationBinding, + int(kConstantBufferFetch)); if (features_.spirv_version >= spv::Spv_1_4) { main_interface_.push_back(uniform_fetch_constants_); } @@ -639,6 +641,16 @@ std::vector SpirvShaderTranslator::CompleteTranslation() { entry_point->addIdOperand(interface_id); } + // Specify the binding indices for samplers when the number of textures is + // known, as samplers are located after images in the texture descriptor set. + size_t texture_binding_count = texture_bindings_.size(); + size_t sampler_binding_count = sampler_bindings_.size(); + for (size_t i = 0; i < sampler_binding_count; ++i) { + builder_->addDecoration(sampler_bindings_[i].variable, + spv::DecorationBinding, + int(texture_binding_count + i)); + } + // TODO(Triang3l): Avoid copy? std::vector module_uints; builder_->dump(module_uints); @@ -1056,17 +1068,15 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() { main_interface_.push_back(input_vertex_index_); } - // Create the Xenia-specific outputs. - // TODO(Triang3l): Change to an interpolator array. - for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { - spv::Id interpolator = builder_->createVariable( - spv::NoPrecision, spv::StorageClassOutput, type_float4_, - (kInterpolatorNamePrefix + std::to_string(i)).c_str()); - input_output_interpolators_[i] = interpolator; - builder_->addDecoration(interpolator, spv::DecorationLocation, int(i)); - builder_->addDecoration(interpolator, spv::DecorationInvariant); - main_interface_.push_back(interpolator); - } + // Create the interpolator output. + input_output_interpolators_ = + builder_->createVariable(spv::NoPrecision, spv::StorageClassOutput, + type_interpolators_, "xe_out_interpolators"); + builder_->addDecoration(input_output_interpolators_, spv::DecorationLocation, + 0); + builder_->addDecoration(input_output_interpolators_, + spv::DecorationInvariant); + main_interface_.push_back(input_output_interpolators_); // Create the gl_PerVertex output for used system outputs. std::vector struct_per_vertex_members; @@ -1095,7 +1105,12 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() { // Zero the interpolators. for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { - builder_->createStore(const_float4_0_, input_output_interpolators_[i]); + id_vector_temp_.clear(); + id_vector_temp_.push_back(builder_->makeIntConstant(int(i))); + builder_->createStore(const_float4_0_, + builder_->createAccessChain( + spv::StorageClassOutput, + input_output_interpolators_, id_vector_temp_)); } // Load the vertex index or the tessellation parameters. @@ -1269,17 +1284,13 @@ void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() { } void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { - // Interpolator inputs. - uint32_t interpolator_count = - std::min(xenos::kMaxInterpolators, register_count()); - for (uint32_t i = 0; i < interpolator_count; ++i) { - spv::Id interpolator = builder_->createVariable( - spv::NoPrecision, spv::StorageClassInput, type_float4_, - (kInterpolatorNamePrefix + std::to_string(i)).c_str()); - input_output_interpolators_[i] = interpolator; - builder_->addDecoration(interpolator, spv::DecorationLocation, int(i)); - main_interface_.push_back(interpolator); - } + // Interpolator input. + input_output_interpolators_ = + builder_->createVariable(spv::NoPrecision, spv::StorageClassInput, + type_interpolators_, "xe_in_interpolators"); + builder_->addDecoration(input_output_interpolators_, spv::DecorationLocation, + 0); + main_interface_.push_back(input_output_interpolators_); bool param_gen_needed = GetPsParamGenInterpolator() != UINT32_MAX; @@ -1347,7 +1358,10 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() { // Register array element. id_vector_temp_.push_back(builder_->makeIntConstant(int(i))); builder_->createStore( - builder_->createLoad(input_output_interpolators_[i], spv::NoPrecision), + builder_->createLoad(builder_->createAccessChain( + spv::StorageClassInput, + input_output_interpolators_, id_vector_temp_), + spv::NoPrecision), builder_->createAccessChain(spv::StorageClassFunction, var_main_registers_, id_vector_temp_)); } @@ -1824,7 +1838,12 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result, } break; case InstructionStorageTarget::kInterpolator: assert_true(is_vertex_shader()); - target_pointer = input_output_interpolators_[result.storage_index]; + id_vector_temp_util_.clear(); + id_vector_temp_util_.push_back( + builder_->makeIntConstant(int(result.storage_index))); + target_pointer = builder_->createAccessChain(spv::StorageClassOutput, + input_output_interpolators_, + id_vector_temp_util_); break; case InstructionStorageTarget::kPosition: assert_true(is_vertex_shader()); diff --git a/src/xenia/gpu/spirv_shader_translator.h b/src/xenia/gpu/spirv_shader_translator.h index aca23efe5..18afaff79 100644 --- a/src/xenia/gpu/spirv_shader_translator.h +++ b/src/xenia/gpu/spirv_shader_translator.h @@ -131,6 +131,16 @@ class SpirvShaderTranslator : public ShaderTranslator { float color_exp_bias[4]; }; + enum ConstantBuffer : uint32_t { + kConstantBufferSystem, + kConstantBufferFloatVertex, + kConstantBufferFloatPixel, + kConstantBufferBoolLoop, + kConstantBufferFetch, + + kConstantBufferCount, + }; + // The minimum limit for maxPerStageDescriptorStorageBuffers is 4, and for // maxStorageBufferRange it's 128 MB. These are the values of those limits on // Arm Mali as of November 2020. Xenia needs 512 MB shared memory to be bound, @@ -159,31 +169,28 @@ class SpirvShaderTranslator : public ShaderTranslator { // Never changed. kDescriptorSetSharedMemoryAndEdram, - // Pretty rarely used and rarely changed - flow control constants. - kDescriptorSetBoolLoopConstants, - // May stay the same across many draws. - kDescriptorSetSystemConstants, - // Less frequently changed (per-material). - kDescriptorSetFloatConstantsPixel, - // Quite frequently changed (for one object drawn multiple times, for - // instance - may contain projection matrices). - kDescriptorSetFloatConstantsVertex, - // Very frequently changed, especially for UI draws, and for models drawn in - // multiple parts - contains vertex and texture fetch constants. - kDescriptorSetFetchConstants, + // Changed in case of changes in the data. + kDescriptorSetConstants, // Mutable part of the pipeline layout: kDescriptorSetMutableLayoutsStart, // Rarely used at all, but may be changed at an unpredictable rate when - // vertex textures are used. - kDescriptorSetSamplersVertex = kDescriptorSetMutableLayoutsStart, - kDescriptorSetTexturesVertex, + // vertex textures are used (for example, for bones of an object, which may + // consist of multiple draw commands with different materials). + kDescriptorSetTexturesVertex = kDescriptorSetMutableLayoutsStart, // Per-material textures. - kDescriptorSetSamplersPixel, kDescriptorSetTexturesPixel, + kDescriptorSetCount, }; + static_assert( + kDescriptorSetCount <= 4, + "The number of descriptor sets used by translated shaders must be within " + "the minimum Vulkan maxBoundDescriptorSets requirement of 4, which is " + "the limit on most GPUs used in Android devices - Arm Mali, Imagination " + "PowerVR, Qualcomm Adreno 6xx and older, as well as on old PC Nvidia " + "drivers"); // "Xenia Emulator Microcode Translator". // https://github.com/KhronosGroup/SPIRV-Headers/blob/c43a43c7cc3af55910b9bec2a71e3e8a622443cf/include/spirv/spir-v.xml#L79 @@ -522,6 +529,8 @@ class SpirvShaderTranslator : public ShaderTranslator { spv::Id type_float_vectors_[4]; }; + spv::Id type_interpolators_; + spv::Id const_int_0_; spv::Id const_int4_0_; spv::Id const_uint_0_; @@ -582,11 +591,12 @@ class SpirvShaderTranslator : public ShaderTranslator { // PS, only when needed - bool. spv::Id input_front_facing_; - // In vertex or tessellation evaluation shaders - outputs, always - // xenos::kMaxInterpolators. - // In pixel shaders - inputs, min(xenos::kMaxInterpolators, register_count()). - spv::Id input_output_interpolators_[xenos::kMaxInterpolators]; - static const std::string kInterpolatorNamePrefix; + // VS output or PS input, only when needed - type_interpolators_. + // The Qualcomm Adreno driver has strict requirements for stage linkage - if + // this is an array in one stage, it must be an array in the other (in case of + // Xenia, including geometry shaders); it must not be an array in one and just + // elements in consecutive locations in another. + spv::Id input_output_interpolators_; enum OutputPerVertexMember : unsigned int { kOutputPerVertexMemberPosition, diff --git a/src/xenia/gpu/spirv_shader_translator_fetch.cc b/src/xenia/gpu/spirv_shader_translator_fetch.cc index f9bf7c564..7be662460 100644 --- a/src/xenia/gpu/spirv_shader_translator_fetch.cc +++ b/src/xenia/gpu/spirv_shader_translator_fetch.cc @@ -2573,10 +2573,10 @@ size_t SpirvShaderTranslator::FindOrAddSamplerBinding( builder_->makeSamplerType(), name.str().c_str()); builder_->addDecoration( new_sampler_binding.variable, spv::DecorationDescriptorSet, - int(is_vertex_shader() ? kDescriptorSetSamplersVertex - : kDescriptorSetSamplersPixel)); - builder_->addDecoration(new_sampler_binding.variable, spv::DecorationBinding, - int(new_sampler_binding_index)); + int(is_vertex_shader() ? kDescriptorSetTexturesVertex + : kDescriptorSetTexturesPixel)); + // The binding indices will be specified later after all textures are added as + // samplers are located after images in the descriptor set. if (features_.spirv_version >= spv::Spv_1_4) { main_interface_.push_back(new_sampler_binding.variable); } diff --git a/src/xenia/gpu/trace_dump.cc b/src/xenia/gpu/trace_dump.cc index 2932a4110..ec4e85cea 100644 --- a/src/xenia/gpu/trace_dump.cc +++ b/src/xenia/gpu/trace_dump.cc @@ -95,8 +95,8 @@ bool TraceDump::Setup() { // Create the emulator but don't initialize so we can setup the window. emulator_ = std::make_unique("", "", "", ""); X_STATUS result = emulator_->Setup( - nullptr, nullptr, nullptr, [this]() { return CreateGraphicsSystem(); }, - nullptr); + nullptr, nullptr, false, nullptr, + [this]() { return CreateGraphicsSystem(); }, nullptr); if (XFAILED(result)) { XELOGE("Failed to setup emulator: {:08X}", result); return false; diff --git a/src/xenia/gpu/trace_viewer.cc b/src/xenia/gpu/trace_viewer.cc index 70f68c184..7485dd6b0 100644 --- a/src/xenia/gpu/trace_viewer.cc +++ b/src/xenia/gpu/trace_viewer.cc @@ -125,7 +125,7 @@ bool TraceViewer::Setup() { // Create the emulator but don't initialize so we can setup the window. emulator_ = std::make_unique("", "", "", ""); X_STATUS result = emulator_->Setup( - window_.get(), nullptr, nullptr, + window_.get(), nullptr, false, nullptr, [this]() { return CreateGraphicsSystem(); }, nullptr); if (XFAILED(result)) { XELOGE("Failed to setup emulator: {:08X}", result); diff --git a/src/xenia/gpu/vulkan/premake5.lua b/src/xenia/gpu/vulkan/premake5.lua index 9c3c83c84..e12080994 100644 --- a/src/xenia/gpu/vulkan/premake5.lua +++ b/src/xenia/gpu/vulkan/premake5.lua @@ -34,7 +34,6 @@ project("xenia-gpu-vulkan-trace-viewer") "xenia-base", "xenia-core", "xenia-cpu", - "xenia-cpu-backend-x64", "xenia-gpu", "xenia-gpu-vulkan", "xenia-hid", @@ -66,6 +65,11 @@ project("xenia-gpu-vulkan-trace-viewer") "../../ui/windowed_app_main_"..platform_suffix..".cc", }) + filter("architecture:x86_64") + links({ + "xenia-cpu-backend-x64", + }) + filter("platforms:Linux") links({ "X11", @@ -95,7 +99,6 @@ project("xenia-gpu-vulkan-trace-dump") "xenia-base", "xenia-core", "xenia-cpu", - "xenia-cpu-backend-x64", "xenia-gpu", "xenia-gpu-vulkan", "xenia-hid", @@ -126,6 +129,11 @@ project("xenia-gpu-vulkan-trace-dump") "../../base/console_app_main_"..platform_suffix..".cc", }) + filter("architecture:x86_64") + links({ + "xenia-cpu-backend-x64", + }) + filter("platforms:Linux") links({ "X11", diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index aa9f2e4ee..89e43479a 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -49,6 +49,24 @@ namespace shaders { #include "xenia/gpu/shaders/bytecode/vulkan_spirv/fullscreen_cw_vs.h" } // namespace shaders +const VkDescriptorPoolSize + VulkanCommandProcessor::kDescriptorPoolSizeUniformBuffer = { + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + SpirvShaderTranslator::kConstantBufferCount* + kLinkedTypeDescriptorPoolSetCount}; + +const VkDescriptorPoolSize + VulkanCommandProcessor::kDescriptorPoolSizeStorageBuffer = { + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, kLinkedTypeDescriptorPoolSetCount}; + +// 2x descriptors for texture images because of unsigned and signed bindings. +const VkDescriptorPoolSize + VulkanCommandProcessor::kDescriptorPoolSizeTextures[2] = { + {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, + 2 * kLinkedTypeDescriptorPoolSetCount}, + {VK_DESCRIPTOR_TYPE_SAMPLER, kLinkedTypeDescriptorPoolSetCount}, +}; + // No specific reason for 32768 descriptors, just the "too much" amount from // Direct3D 12 PIX warnings. 2x descriptors for textures because of unsigned and // signed bindings. @@ -59,19 +77,19 @@ VulkanCommandProcessor::VulkanCommandProcessor( transient_descriptor_allocator_uniform_buffer_( *static_cast( graphics_system->provider()), - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 32768, 32768), + &kDescriptorPoolSizeUniformBuffer, 1, + kLinkedTypeDescriptorPoolSetCount), transient_descriptor_allocator_storage_buffer_( *static_cast( graphics_system->provider()), - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 32768, 32768), - transient_descriptor_allocator_sampled_image_( + &kDescriptorPoolSizeStorageBuffer, 1, + kLinkedTypeDescriptorPoolSetCount), + transient_descriptor_allocator_textures_( *static_cast( graphics_system->provider()), - VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 2 * 32768, 32768), - transient_descriptor_allocator_sampler_( - *static_cast( - graphics_system->provider()), - VK_DESCRIPTOR_TYPE_SAMPLER, 32768, 32768) {} + kDescriptorPoolSizeTextures, + uint32_t(xe::countof(kDescriptorPoolSizeTextures)), + kLinkedTypeDescriptorPoolSetCount) {} VulkanCommandProcessor::~VulkanCommandProcessor() = default; @@ -176,84 +194,61 @@ bool VulkanCommandProcessor::SetupContext() { "and the EDRAM"); return false; } - // Transient: uniform buffer for the guest vertex shader stages. + // Guest draw constants. + VkDescriptorSetLayoutBinding descriptor_set_layout_bindings_constants + [SpirvShaderTranslator::kConstantBufferCount] = {}; + for (uint32_t i = 0; i < SpirvShaderTranslator::kConstantBufferCount; ++i) { + VkDescriptorSetLayoutBinding& constants_binding = + descriptor_set_layout_bindings_constants[i]; + constants_binding.binding = i; + constants_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + constants_binding.descriptorCount = 1; + constants_binding.pImmutableSamplers = nullptr; + } + descriptor_set_layout_bindings_constants + [SpirvShaderTranslator::kConstantBufferSystem] + .stageFlags = + guest_shader_stages | + (device_features.tessellationShader + ? VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT + : 0) | + (device_features.geometryShader ? VK_SHADER_STAGE_GEOMETRY_BIT : 0); + descriptor_set_layout_bindings_constants + [SpirvShaderTranslator::kConstantBufferFloatVertex] + .stageFlags = guest_shader_vertex_stages_; + descriptor_set_layout_bindings_constants + [SpirvShaderTranslator::kConstantBufferFloatPixel] + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + descriptor_set_layout_bindings_constants + [SpirvShaderTranslator::kConstantBufferBoolLoop] + .stageFlags = guest_shader_stages; + descriptor_set_layout_bindings_constants + [SpirvShaderTranslator::kConstantBufferFetch] + .stageFlags = guest_shader_stages; + descriptor_set_layout_create_info.bindingCount = + uint32_t(xe::countof(descriptor_set_layout_bindings_constants)); + descriptor_set_layout_create_info.pBindings = + descriptor_set_layout_bindings_constants; + if (dfn.vkCreateDescriptorSetLayout( + device, &descriptor_set_layout_create_info, nullptr, + &descriptor_set_layout_constants_) != VK_SUCCESS) { + XELOGE( + "Failed to create a Vulkan descriptor set layout for guest draw " + "constant buffers"); + return false; + } + // Transient: uniform buffer for compute shaders. VkDescriptorSetLayoutBinding descriptor_set_layout_binding_transient; descriptor_set_layout_binding_transient.binding = 0; descriptor_set_layout_binding_transient.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; descriptor_set_layout_binding_transient.descriptorCount = 1; descriptor_set_layout_binding_transient.stageFlags = - guest_shader_vertex_stages_; + VK_SHADER_STAGE_COMPUTE_BIT; descriptor_set_layout_binding_transient.pImmutableSamplers = nullptr; descriptor_set_layout_create_info.bindingCount = 1; descriptor_set_layout_create_info.pBindings = &descriptor_set_layout_binding_transient; - if (dfn.vkCreateDescriptorSetLayout( - device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layouts_single_transient_[size_t( - SingleTransientDescriptorLayout::kUniformBufferGuestVertex)]) != - VK_SUCCESS) { - XELOGE( - "Failed to create a Vulkan descriptor set layout for a uniform buffer " - "bound to the guest vertex shader stages"); - return false; - } - // Transient: uniform buffer for fragment shaders. - descriptor_set_layout_binding_transient.descriptorType = - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - descriptor_set_layout_binding_transient.stageFlags = - VK_SHADER_STAGE_FRAGMENT_BIT; - if (dfn.vkCreateDescriptorSetLayout( - device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layouts_single_transient_[size_t( - SingleTransientDescriptorLayout::kUniformBufferFragment)]) != - VK_SUCCESS) { - XELOGE( - "Failed to create a Vulkan descriptor set layout for a uniform buffer " - "bound to the fragment shader"); - return false; - } - // Transient: uniform buffer for the guest shader stages. - descriptor_set_layout_binding_transient.descriptorType = - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - descriptor_set_layout_binding_transient.stageFlags = guest_shader_stages; - if (dfn.vkCreateDescriptorSetLayout( - device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layouts_single_transient_[size_t( - SingleTransientDescriptorLayout::kUniformBufferGuestShader)]) != - VK_SUCCESS) { - XELOGE( - "Failed to create a Vulkan descriptor set layout for a uniform buffer " - "bound to the guest shader stages"); - return false; - } - // Transient: system constants. - descriptor_set_layout_binding_transient.descriptorType = - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - descriptor_set_layout_binding_transient.stageFlags = guest_shader_stages; - if (device_features.tessellationShader) { - descriptor_set_layout_binding_transient.stageFlags |= - VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; - } - if (device_features.geometryShader) { - descriptor_set_layout_binding_transient.stageFlags |= - VK_SHADER_STAGE_GEOMETRY_BIT; - } - if (dfn.vkCreateDescriptorSetLayout( - device, &descriptor_set_layout_create_info, nullptr, - &descriptor_set_layouts_single_transient_[size_t( - SingleTransientDescriptorLayout :: - kUniformBufferSystemConstants)]) != VK_SUCCESS) { - XELOGE( - "Failed to create a Vulkan descriptor set layout for the system " - "constants uniform buffer"); - return false; - } - // Transient: uniform buffer for compute shaders. - descriptor_set_layout_binding_transient.descriptorType = - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - descriptor_set_layout_binding_transient.stageFlags = - VK_SHADER_STAGE_COMPUTE_BIT; if (dfn.vkCreateDescriptorSetLayout( device, &descriptor_set_layout_create_info, nullptr, &descriptor_set_layouts_single_transient_[size_t( @@ -1052,6 +1047,9 @@ void VulkanCommandProcessor::ShutdownContext() { dfn.vkDestroyDescriptorSetLayout, device, descriptor_set_layout_single_transient); } + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, + device, + descriptor_set_layout_constants_); ui::vulkan::util::DestroyAndNullHandle( dfn.vkDestroyDescriptorSetLayout, device, descriptor_set_layout_shared_memory_and_edram_); @@ -1134,27 +1132,25 @@ void VulkanCommandProcessor::WriteRegister(uint32_t index, uint32_t value) { float_constant_index -= 256; if (current_float_constant_map_pixel_[float_constant_index >> 6] & (1ull << (float_constant_index & 63))) { - current_graphics_descriptor_set_values_up_to_date_ &= - ~(UINT32_C(1) - << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel); + current_constant_buffers_up_to_date_ &= ~( + UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFloatPixel); } } else { if (current_float_constant_map_vertex_[float_constant_index >> 6] & (1ull << (float_constant_index & 63))) { - current_graphics_descriptor_set_values_up_to_date_ &= - ~(UINT32_C(1) - << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex); + current_constant_buffers_up_to_date_ &= ~( + UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFloatVertex); } } } } else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 && index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) { - current_graphics_descriptor_set_values_up_to_date_ &= ~( - UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants); + current_constant_buffers_up_to_date_ &= + ~(UINT32_C(1) << SpirvShaderTranslator::kConstantBufferBoolLoop); } else if (index >= XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 && index <= XE_GPU_REG_SHADER_CONSTANT_FETCH_31_5) { - current_graphics_descriptor_set_values_up_to_date_ &= - ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants); + current_constant_buffers_up_to_date_ &= + ~(UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFetch); if (texture_cache_) { texture_cache_->TextureFetchConstantWritten( (index - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0) / 6); @@ -1756,14 +1752,21 @@ VkDescriptorSet VulkanCommandProcessor::AllocateSingleTransientDescriptor( const ui::vulkan::VulkanProvider& provider = GetVulkanProvider(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - ui::vulkan::SingleTypeDescriptorSetAllocator& - transfer_descriptor_allocator = - transient_descriptor_layout == - SingleTransientDescriptorLayout::kStorageBufferCompute - ? transient_descriptor_allocator_storage_buffer_ - : transient_descriptor_allocator_uniform_buffer_; - descriptor_set = transfer_descriptor_allocator.Allocate( - GetSingleTransientDescriptorLayout(transient_descriptor_layout), 1); + bool is_storage_buffer = + transient_descriptor_layout == + SingleTransientDescriptorLayout::kStorageBufferCompute; + ui::vulkan::LinkedTypeDescriptorSetAllocator& + transient_descriptor_allocator = + is_storage_buffer ? transient_descriptor_allocator_storage_buffer_ + : transient_descriptor_allocator_uniform_buffer_; + VkDescriptorPoolSize descriptor_count; + descriptor_count.type = is_storage_buffer + ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER + : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptor_count.descriptorCount = 1; + descriptor_set = transient_descriptor_allocator.Allocate( + GetSingleTransientDescriptorLayout(transient_descriptor_layout), + &descriptor_count, 1); if (descriptor_set == VK_NULL_HANDLE) { return VK_NULL_HANDLE; } @@ -1777,15 +1780,16 @@ VkDescriptorSet VulkanCommandProcessor::AllocateSingleTransientDescriptor( } VkDescriptorSetLayout VulkanCommandProcessor::GetTextureDescriptorSetLayout( - bool is_samplers, bool is_vertex, size_t binding_count) { + bool is_vertex, size_t texture_count, size_t sampler_count) { + size_t binding_count = texture_count + sampler_count; if (!binding_count) { return descriptor_set_layout_empty_; } TextureDescriptorSetLayoutKey texture_descriptor_set_layout_key; - texture_descriptor_set_layout_key.is_samplers = uint32_t(is_samplers); + texture_descriptor_set_layout_key.texture_count = uint32_t(texture_count); + texture_descriptor_set_layout_key.sampler_count = uint32_t(sampler_count); texture_descriptor_set_layout_key.is_vertex = uint32_t(is_vertex); - texture_descriptor_set_layout_key.binding_count = uint32_t(binding_count); auto it_existing = descriptor_set_layouts_textures_.find(texture_descriptor_set_layout_key); if (it_existing != descriptor_set_layouts_textures_.end()) { @@ -1798,16 +1802,22 @@ VkDescriptorSetLayout VulkanCommandProcessor::GetTextureDescriptorSetLayout( descriptor_set_layout_bindings_.clear(); descriptor_set_layout_bindings_.reserve(binding_count); - VkDescriptorType descriptor_type = is_samplers - ? VK_DESCRIPTOR_TYPE_SAMPLER - : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; VkShaderStageFlags stage_flags = is_vertex ? guest_shader_vertex_stages_ : VK_SHADER_STAGE_FRAGMENT_BIT; - for (size_t i = 0; i < binding_count; ++i) { + for (size_t i = 0; i < texture_count; ++i) { VkDescriptorSetLayoutBinding& descriptor_set_layout_binding = descriptor_set_layout_bindings_.emplace_back(); descriptor_set_layout_binding.binding = uint32_t(i); - descriptor_set_layout_binding.descriptorType = descriptor_type; + descriptor_set_layout_binding.descriptorType = + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + descriptor_set_layout_binding.descriptorCount = 1; + descriptor_set_layout_binding.stageFlags = stage_flags; + } + for (size_t i = 0; i < sampler_count; ++i) { + VkDescriptorSetLayoutBinding& descriptor_set_layout_binding = + descriptor_set_layout_bindings_.emplace_back(); + descriptor_set_layout_binding.binding = uint32_t(texture_count + i); + descriptor_set_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; descriptor_set_layout_binding.descriptorCount = 1; descriptor_set_layout_binding.stageFlags = stage_flags; } @@ -1847,40 +1857,24 @@ VulkanCommandProcessor::GetPipelineLayout(size_t texture_count_pixel, } } - VkDescriptorSetLayout descriptor_set_layout_textures_pixel = - GetTextureDescriptorSetLayout(false, false, texture_count_pixel); - if (descriptor_set_layout_textures_pixel == VK_NULL_HANDLE) { - XELOGE( - "Failed to obtain a Vulkan descriptor set layout for {} sampled images " - "for guest pixel shaders", - texture_count_pixel); - return nullptr; - } - VkDescriptorSetLayout descriptor_set_layout_samplers_pixel = - GetTextureDescriptorSetLayout(true, false, sampler_count_pixel); - if (descriptor_set_layout_samplers_pixel == VK_NULL_HANDLE) { - XELOGE( - "Failed to obtain a Vulkan descriptor set layout for {} samplers for " - "guest pixel shaders", - sampler_count_pixel); - return nullptr; - } VkDescriptorSetLayout descriptor_set_layout_textures_vertex = - GetTextureDescriptorSetLayout(false, true, texture_count_vertex); + GetTextureDescriptorSetLayout(true, texture_count_vertex, + sampler_count_vertex); if (descriptor_set_layout_textures_vertex == VK_NULL_HANDLE) { XELOGE( "Failed to obtain a Vulkan descriptor set layout for {} sampled images " - "for guest vertex shaders", - texture_count_vertex); + "and {} samplers for guest vertex shaders", + texture_count_vertex, sampler_count_vertex); return nullptr; } - VkDescriptorSetLayout descriptor_set_layout_samplers_vertex = - GetTextureDescriptorSetLayout(true, true, sampler_count_vertex); - if (descriptor_set_layout_samplers_vertex == VK_NULL_HANDLE) { + VkDescriptorSetLayout descriptor_set_layout_textures_pixel = + GetTextureDescriptorSetLayout(false, texture_count_pixel, + sampler_count_pixel); + if (descriptor_set_layout_textures_pixel == VK_NULL_HANDLE) { XELOGE( - "Failed to obtain a Vulkan descriptor set layout for {} samplers for " - "guest vertex shaders", - sampler_count_vertex); + "Failed to obtain a Vulkan descriptor set layout for {} sampled images " + "and {} samplers for guest pixel shaders", + texture_count_pixel, sampler_count_pixel); return nullptr; } @@ -1890,31 +1884,11 @@ VulkanCommandProcessor::GetPipelineLayout(size_t texture_count_pixel, descriptor_set_layouts [SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram] = descriptor_set_layout_shared_memory_and_edram_; - descriptor_set_layouts - [SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] = - GetSingleTransientDescriptorLayout( - SingleTransientDescriptorLayout::kUniformBufferGuestShader); - descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSystemConstants] = - GetSingleTransientDescriptorLayout( - SingleTransientDescriptorLayout::kUniformBufferSystemConstants); - descriptor_set_layouts - [SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] = - GetSingleTransientDescriptorLayout( - SingleTransientDescriptorLayout::kUniformBufferFragment); - descriptor_set_layouts - [SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] = - GetSingleTransientDescriptorLayout( - SingleTransientDescriptorLayout::kUniformBufferGuestVertex); - descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetFetchConstants] = - GetSingleTransientDescriptorLayout( - SingleTransientDescriptorLayout::kUniformBufferGuestShader); + descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetConstants] = + descriptor_set_layout_constants_; // Mutable layouts. - descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSamplersVertex] = - descriptor_set_layout_samplers_vertex; descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesVertex] = descriptor_set_layout_textures_vertex; - descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSamplersPixel] = - descriptor_set_layout_samplers_pixel; descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesPixel] = descriptor_set_layout_textures_pixel; @@ -1945,9 +1919,7 @@ VulkanCommandProcessor::GetPipelineLayout(size_t texture_count_pixel, std::piecewise_construct, std::forward_as_tuple(pipeline_layout_key), std::forward_as_tuple(pipeline_layout, descriptor_set_layout_textures_vertex, - descriptor_set_layout_samplers_vertex, - descriptor_set_layout_textures_pixel, - descriptor_set_layout_samplers_pixel)); + descriptor_set_layout_textures_pixel)); // unordered_map insertion doesn't invalidate element references. return &emplaced_pair.first->second; } @@ -2346,13 +2318,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, // set N if set layouts 0 through N are compatible). uint32_t descriptor_sets_kept = uint32_t(SpirvShaderTranslator::kDescriptorSetCount); - if (current_guest_graphics_pipeline_layout_ - ->descriptor_set_layout_samplers_vertex_ref() != - pipeline_layout->descriptor_set_layout_samplers_vertex_ref()) { - descriptor_sets_kept = std::min( - descriptor_sets_kept, - uint32_t(SpirvShaderTranslator::kDescriptorSetSamplersVertex)); - } if (current_guest_graphics_pipeline_layout_ ->descriptor_set_layout_textures_vertex_ref() != pipeline_layout->descriptor_set_layout_textures_vertex_ref()) { @@ -2360,13 +2325,6 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type, descriptor_sets_kept, uint32_t(SpirvShaderTranslator::kDescriptorSetTexturesVertex)); } - if (current_guest_graphics_pipeline_layout_ - ->descriptor_set_layout_samplers_pixel_ref() != - pipeline_layout->descriptor_set_layout_samplers_pixel_ref()) { - descriptor_sets_kept = std::min( - descriptor_sets_kept, - uint32_t(SpirvShaderTranslator::kDescriptorSetSamplersPixel)); - } if (current_guest_graphics_pipeline_layout_ ->descriptor_set_layout_textures_pixel_ref() != pipeline_layout->descriptor_set_layout_textures_pixel_ref()) { @@ -2774,6 +2732,7 @@ bool VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { sizeof(current_float_constant_map_pixel_)); std::memset(current_graphics_descriptor_sets_, 0, sizeof(current_graphics_descriptor_sets_)); + current_constant_buffers_up_to_date_ = 0; current_graphics_descriptor_sets_ [SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram] = shared_memory_and_edram_descriptor_set_; @@ -2797,6 +2756,16 @@ bool VulkanCommandProcessor::BeginSubmission(bool is_guest_command) { .push_back(used_transient_descriptor.set); single_transient_descriptors_used_.pop_front(); } + while (!constants_transient_descriptors_used_.empty()) { + const std::pair& used_transient_descriptor = + constants_transient_descriptors_used_.front(); + if (used_transient_descriptor.first > frame_completed_) { + break; + } + constants_transient_descriptors_free_.push_back( + used_transient_descriptor.second); + constants_transient_descriptors_used_.pop_front(); + } while (!texture_transient_descriptor_sets_used_.empty()) { const UsedTextureTransientDescriptorSet& used_transient_descriptor_set = texture_transient_descriptor_sets_used_.front(); @@ -3089,9 +3058,10 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) { void VulkanCommandProcessor::ClearTransientDescriptorPools() { texture_transient_descriptor_sets_free_.clear(); texture_transient_descriptor_sets_used_.clear(); - transient_descriptor_allocator_sampler_.Reset(); - transient_descriptor_allocator_sampled_image_.Reset(); + transient_descriptor_allocator_textures_.Reset(); + constants_transient_descriptors_free_.clear(); + constants_transient_descriptors_used_.clear(); for (std::vector& transient_descriptors_free : single_transient_descriptors_free_) { transient_descriptors_free.clear(); @@ -3520,8 +3490,8 @@ void VulkanCommandProcessor::UpdateSystemConstantValues( } if (dirty) { - current_graphics_descriptor_set_values_up_to_date_ &= - ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants); + current_constant_buffers_up_to_date_ &= + ~(UINT32_C(1) << SpirvShaderTranslator::kConstantBufferSystem); } } @@ -3537,7 +3507,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); VkDevice device = provider.device(); - // Invalidate descriptors for changed data. + // Invalidate constant buffers and descriptors for changed data. // Float constants. // These are the constant base addresses/ranges for shaders. @@ -3559,10 +3529,8 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, // If no float constants at all, any buffer can be reused for them, so not // invalidating. if (float_constant_count_vertex) { - current_graphics_descriptor_set_values_up_to_date_ &= - ~( - UINT32_C(1) - << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex); + current_constant_buffers_up_to_date_ &= + ~(UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFloatVertex); } } } @@ -3577,9 +3545,8 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, current_float_constant_map_pixel_[i] = float_constant_map_pixel.float_bitmap[i]; if (float_constant_count_pixel) { - current_graphics_descriptor_set_values_up_to_date_ &= - ~(UINT32_C(1) - << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel); + current_constant_buffers_up_to_date_ &= ~( + UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFloatPixel); } } } @@ -3588,6 +3555,141 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, sizeof(current_float_constant_map_pixel_)); } + // Write the new constant buffers. + constexpr uint32_t kAllConstantBuffersMask = + (UINT32_C(1) << SpirvShaderTranslator::kConstantBufferCount) - 1; + assert_zero(current_constant_buffers_up_to_date_ & ~kAllConstantBuffersMask); + if ((current_constant_buffers_up_to_date_ & kAllConstantBuffersMask) != + kAllConstantBuffersMask) { + current_graphics_descriptor_set_values_up_to_date_ &= + ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetConstants); + size_t uniform_buffer_alignment = size_t( + provider.device_properties().limits.minUniformBufferOffsetAlignment); + // System constants. + if (!(current_constant_buffers_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kConstantBufferSystem))) { + VkDescriptorBufferInfo& buffer_info = current_constant_buffer_infos_ + [SpirvShaderTranslator::kConstantBufferSystem]; + uint8_t* mapping = uniform_buffer_pool_->Request( + frame_current_, sizeof(SpirvShaderTranslator::SystemConstants), + uniform_buffer_alignment, buffer_info.buffer, buffer_info.offset); + if (!mapping) { + return false; + } + buffer_info.range = sizeof(SpirvShaderTranslator::SystemConstants); + std::memcpy(mapping, &system_constants_, + sizeof(SpirvShaderTranslator::SystemConstants)); + current_constant_buffers_up_to_date_ |= + UINT32_C(1) << SpirvShaderTranslator::kConstantBufferSystem; + } + // Vertex shader float constants. + if (!(current_constant_buffers_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFloatVertex))) { + VkDescriptorBufferInfo& buffer_info = current_constant_buffer_infos_ + [SpirvShaderTranslator::kConstantBufferFloatVertex]; + // Even if the shader doesn't need any float constants, a valid binding + // must still be provided (the pipeline layout always has float constants, + // for both the vertex shader and the pixel shader), so if the first draw + // in the frame doesn't have float constants at all, still allocate a + // dummy buffer. + size_t float_constants_size = + sizeof(float) * 4 * + std::max(float_constant_count_vertex, UINT32_C(1)); + uint8_t* mapping = uniform_buffer_pool_->Request( + frame_current_, float_constants_size, uniform_buffer_alignment, + buffer_info.buffer, buffer_info.offset); + if (!mapping) { + return false; + } + buffer_info.range = VkDeviceSize(float_constants_size); + for (uint32_t i = 0; i < 4; ++i) { + uint64_t float_constant_map_entry = + current_float_constant_map_vertex_[i]; + uint32_t float_constant_index; + while (xe::bit_scan_forward(float_constant_map_entry, + &float_constant_index)) { + float_constant_map_entry &= ~(1ull << float_constant_index); + std::memcpy(mapping, + ®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) + + (float_constant_index << 2)] + .f32, + sizeof(float) * 4); + mapping += sizeof(float) * 4; + } + } + current_constant_buffers_up_to_date_ |= + UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFloatVertex; + } + // Pixel shader float constants. + if (!(current_constant_buffers_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFloatPixel))) { + VkDescriptorBufferInfo& buffer_info = current_constant_buffer_infos_ + [SpirvShaderTranslator::kConstantBufferFloatPixel]; + size_t float_constants_size = + sizeof(float) * 4 * std::max(float_constant_count_pixel, UINT32_C(1)); + uint8_t* mapping = uniform_buffer_pool_->Request( + frame_current_, float_constants_size, uniform_buffer_alignment, + buffer_info.buffer, buffer_info.offset); + if (!mapping) { + return false; + } + buffer_info.range = VkDeviceSize(float_constants_size); + for (uint32_t i = 0; i < 4; ++i) { + uint64_t float_constant_map_entry = + current_float_constant_map_pixel_[i]; + uint32_t float_constant_index; + while (xe::bit_scan_forward(float_constant_map_entry, + &float_constant_index)) { + float_constant_map_entry &= ~(1ull << float_constant_index); + std::memcpy(mapping, + ®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) + + (float_constant_index << 2)] + .f32, + sizeof(float) * 4); + mapping += sizeof(float) * 4; + } + } + current_constant_buffers_up_to_date_ |= + UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFloatPixel; + } + // Bool and loop constants. + if (!(current_constant_buffers_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kConstantBufferBoolLoop))) { + VkDescriptorBufferInfo& buffer_info = current_constant_buffer_infos_ + [SpirvShaderTranslator::kConstantBufferBoolLoop]; + constexpr size_t kBoolLoopConstantsSize = sizeof(uint32_t) * (8 + 32); + uint8_t* mapping = uniform_buffer_pool_->Request( + frame_current_, kBoolLoopConstantsSize, uniform_buffer_alignment, + buffer_info.buffer, buffer_info.offset); + if (!mapping) { + return false; + } + buffer_info.range = VkDeviceSize(kBoolLoopConstantsSize); + std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, + kBoolLoopConstantsSize); + current_constant_buffers_up_to_date_ |= + UINT32_C(1) << SpirvShaderTranslator::kConstantBufferBoolLoop; + } + // Fetch constants. + if (!(current_constant_buffers_up_to_date_ & + (UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFetch))) { + VkDescriptorBufferInfo& buffer_info = current_constant_buffer_infos_ + [SpirvShaderTranslator::kConstantBufferFetch]; + constexpr size_t kFetchConstantsSize = sizeof(uint32_t) * 6 * 32; + uint8_t* mapping = uniform_buffer_pool_->Request( + frame_current_, kFetchConstantsSize, uniform_buffer_alignment, + buffer_info.buffer, buffer_info.offset); + if (!mapping) { + return false; + } + buffer_info.range = VkDeviceSize(kFetchConstantsSize); + std::memcpy(mapping, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, + kFetchConstantsSize); + current_constant_buffers_up_to_date_ |= + UINT32_C(1) << SpirvShaderTranslator::kConstantBufferFetch; + } + } + // Textures and samplers. const std::vector& samplers_vertex = vertex_shader->GetSamplerBindingsAfterTranslation(); @@ -3611,9 +3713,7 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, } // TODO(Triang3l): Reuse texture and sampler bindings if not changed. current_graphics_descriptor_set_values_up_to_date_ &= - ~((UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersVertex) | - (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex) | - (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersPixel) | + ~((UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex) | (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel)); // Make sure new descriptor sets are bound to the command buffer. @@ -3623,39 +3723,21 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, // Fill the texture and sampler write image infos. - bool write_vertex_samplers = - sampler_count_vertex && - !(current_graphics_descriptor_set_values_up_to_date_ & - (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersVertex)); bool write_vertex_textures = - texture_count_vertex && + (texture_count_vertex || sampler_count_vertex) && !(current_graphics_descriptor_set_values_up_to_date_ & (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex)); - bool write_pixel_samplers = - sampler_count_pixel && - !(current_graphics_descriptor_set_values_up_to_date_ & - (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersPixel)); bool write_pixel_textures = - texture_count_pixel && + (texture_count_pixel || sampler_count_pixel) && !(current_graphics_descriptor_set_values_up_to_date_ & (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel)); descriptor_write_image_info_.clear(); descriptor_write_image_info_.reserve( - (write_vertex_samplers ? sampler_count_vertex : 0) + - (write_vertex_textures ? texture_count_vertex : 0) + - (write_pixel_samplers ? sampler_count_pixel : 0) + - (write_pixel_textures ? texture_count_pixel : 0)); - size_t vertex_sampler_image_info_offset = descriptor_write_image_info_.size(); - if (write_vertex_samplers) { - for (const std::pair& - sampler_pair : current_samplers_vertex_) { - VkDescriptorImageInfo& descriptor_image_info = - descriptor_write_image_info_.emplace_back(); - descriptor_image_info.sampler = sampler_pair.second; - } - } + (write_vertex_textures ? texture_count_vertex + sampler_count_vertex + : 0) + + (write_pixel_textures ? texture_count_pixel + sampler_count_pixel : 0)); size_t vertex_texture_image_info_offset = descriptor_write_image_info_.size(); - if (write_vertex_textures) { + if (write_vertex_textures && texture_count_vertex) { for (const VulkanShader::TextureBinding& texture_binding : textures_vertex) { VkDescriptorImageInfo& descriptor_image_info = @@ -3668,17 +3750,17 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } } - size_t pixel_sampler_image_info_offset = descriptor_write_image_info_.size(); - if (write_pixel_samplers) { + size_t vertex_sampler_image_info_offset = descriptor_write_image_info_.size(); + if (write_vertex_textures && sampler_count_vertex) { for (const std::pair& - sampler_pair : current_samplers_pixel_) { + sampler_pair : current_samplers_vertex_) { VkDescriptorImageInfo& descriptor_image_info = descriptor_write_image_info_.emplace_back(); descriptor_image_info.sampler = sampler_pair.second; } } size_t pixel_texture_image_info_offset = descriptor_write_image_info_.size(); - if (write_pixel_textures) { + if (write_pixel_textures && texture_count_pixel) { for (const VulkanShader::TextureBinding& texture_binding : *textures_pixel) { VkDescriptorImageInfo& descriptor_image_info = @@ -3691,242 +3773,119 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } } + size_t pixel_sampler_image_info_offset = descriptor_write_image_info_.size(); + if (write_pixel_textures && sampler_count_pixel) { + for (const std::pair& + sampler_pair : current_samplers_pixel_) { + VkDescriptorImageInfo& descriptor_image_info = + descriptor_write_image_info_.emplace_back(); + descriptor_image_info.sampler = sampler_pair.second; + } + } // Write the new descriptor sets. - VkWriteDescriptorSet - write_descriptor_sets[SpirvShaderTranslator::kDescriptorSetCount]; + // Consecutive bindings updated via a single VkWriteDescriptorSet must have + // identical stage flags, but for the constants they vary. Plus vertex and + // pixel texture images and samplers. + std::array + write_descriptor_sets; uint32_t write_descriptor_set_count = 0; uint32_t write_descriptor_set_bits = 0; assert_not_zero( current_graphics_descriptor_set_values_up_to_date_ & (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram)); - // Bool and loop constants. - VkDescriptorBufferInfo buffer_info_bool_loop_constants; + // Constant buffers. if (!(current_graphics_descriptor_set_values_up_to_date_ & - (UINT32_C(1) - << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants))) { - VkWriteDescriptorSet& write_bool_loop_constants = - write_descriptor_sets[write_descriptor_set_count++]; - constexpr size_t kBoolLoopConstantsSize = sizeof(uint32_t) * (8 + 32); - uint8_t* mapping_bool_loop_constants = WriteTransientUniformBufferBinding( - kBoolLoopConstantsSize, - SingleTransientDescriptorLayout::kUniformBufferGuestShader, - buffer_info_bool_loop_constants, write_bool_loop_constants); - if (!mapping_bool_loop_constants) { - return false; - } - std::memcpy(mapping_bool_loop_constants, - ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, - kBoolLoopConstantsSize); - write_descriptor_set_bits |= - UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants; - current_graphics_descriptor_sets_ - [SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] = - write_bool_loop_constants.dstSet; - } - // System constants. - VkDescriptorBufferInfo buffer_info_system_constants; - if (!(current_graphics_descriptor_set_values_up_to_date_ & - (UINT32_C(1) - << SpirvShaderTranslator::kDescriptorSetSystemConstants))) { - VkWriteDescriptorSet& write_system_constants = - write_descriptor_sets[write_descriptor_set_count++]; - uint8_t* mapping_system_constants = WriteTransientUniformBufferBinding( - sizeof(SpirvShaderTranslator::SystemConstants), - SingleTransientDescriptorLayout::kUniformBufferSystemConstants, - buffer_info_system_constants, write_system_constants); - if (!mapping_system_constants) { - return false; - } - std::memcpy(mapping_system_constants, &system_constants_, - sizeof(SpirvShaderTranslator::SystemConstants)); - write_descriptor_set_bits |= - UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants; - current_graphics_descriptor_sets_ - [SpirvShaderTranslator::kDescriptorSetSystemConstants] = - write_system_constants.dstSet; - } - // Pixel shader float constants. - VkDescriptorBufferInfo buffer_info_float_constant_pixel; - if (!(current_graphics_descriptor_set_values_up_to_date_ & - (UINT32_C(1) - << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel))) { - // Even if the shader doesn't need any float constants, a valid binding must - // still be provided (the pipeline layout always has float constants, for - // both the vertex shader and the pixel shader), so if the first draw in the - // frame doesn't have float constants at all, still allocate an empty - // buffer. - VkWriteDescriptorSet& write_float_constants_pixel = - write_descriptor_sets[write_descriptor_set_count++]; - uint8_t* mapping_float_constants_pixel = WriteTransientUniformBufferBinding( - sizeof(float) * 4 * std::max(float_constant_count_pixel, UINT32_C(1)), - SingleTransientDescriptorLayout::kUniformBufferFragment, - buffer_info_float_constant_pixel, write_float_constants_pixel); - if (!mapping_float_constants_pixel) { - return false; - } - for (uint32_t i = 0; i < 4; ++i) { - uint64_t float_constant_map_entry = current_float_constant_map_pixel_[i]; - uint32_t float_constant_index; - while (xe::bit_scan_forward(float_constant_map_entry, - &float_constant_index)) { - float_constant_map_entry &= ~(1ull << float_constant_index); - std::memcpy(mapping_float_constants_pixel, - ®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) + - (float_constant_index << 2)] - .f32, - sizeof(float) * 4); - mapping_float_constants_pixel += sizeof(float) * 4; + (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetConstants))) { + VkDescriptorSet constants_descriptor_set; + if (!constants_transient_descriptors_free_.empty()) { + constants_descriptor_set = constants_transient_descriptors_free_.back(); + constants_transient_descriptors_free_.pop_back(); + } else { + VkDescriptorPoolSize constants_descriptor_count; + constants_descriptor_count.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + constants_descriptor_count.descriptorCount = + SpirvShaderTranslator::kConstantBufferCount; + constants_descriptor_set = + transient_descriptor_allocator_uniform_buffer_.Allocate( + descriptor_set_layout_constants_, &constants_descriptor_count, 1); + if (constants_descriptor_set == VK_NULL_HANDLE) { + return false; } } - write_descriptor_set_bits |= - UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel; - current_graphics_descriptor_sets_ - [SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] = - write_float_constants_pixel.dstSet; - } - // Vertex shader float constants. - VkDescriptorBufferInfo buffer_info_float_constant_vertex; - if (!(current_graphics_descriptor_set_values_up_to_date_ & - (UINT32_C(1) - << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex))) { - VkWriteDescriptorSet& write_float_constants_vertex = - write_descriptor_sets[write_descriptor_set_count++]; - uint8_t* mapping_float_constants_vertex = - WriteTransientUniformBufferBinding( - sizeof(float) * 4 * - std::max(float_constant_count_vertex, UINT32_C(1)), - SingleTransientDescriptorLayout::kUniformBufferGuestVertex, - buffer_info_float_constant_vertex, write_float_constants_vertex); - if (!mapping_float_constants_vertex) { - return false; - } - for (uint32_t i = 0; i < 4; ++i) { - uint64_t float_constant_map_entry = current_float_constant_map_vertex_[i]; - uint32_t float_constant_index; - while (xe::bit_scan_forward(float_constant_map_entry, - &float_constant_index)) { - float_constant_map_entry &= ~(1ull << float_constant_index); - std::memcpy(mapping_float_constants_vertex, - ®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) + - (float_constant_index << 2)] - .f32, - sizeof(float) * 4); - mapping_float_constants_vertex += sizeof(float) * 4; - } + constants_transient_descriptors_used_.emplace_back( + frame_current_, constants_descriptor_set); + // Consecutive bindings updated via a single VkWriteDescriptorSet must have + // identical stage flags, but for the constants they vary. + for (uint32_t i = 0; i < SpirvShaderTranslator::kConstantBufferCount; ++i) { + VkWriteDescriptorSet& write_constants = + write_descriptor_sets[write_descriptor_set_count++]; + write_constants.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_constants.pNext = nullptr; + write_constants.dstSet = constants_descriptor_set; + write_constants.dstBinding = i; + write_constants.dstArrayElement = 0; + write_constants.descriptorCount = 1; + write_constants.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + write_constants.pImageInfo = nullptr; + write_constants.pBufferInfo = ¤t_constant_buffer_infos_[i]; + write_constants.pTexelBufferView = nullptr; } write_descriptor_set_bits |= - UINT32_C(1) - << SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex; + UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetConstants; current_graphics_descriptor_sets_ - [SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] = - write_float_constants_vertex.dstSet; + [SpirvShaderTranslator::kDescriptorSetConstants] = + constants_descriptor_set; } - // Fetch constants. - VkDescriptorBufferInfo buffer_info_fetch_constants; - if (!(current_graphics_descriptor_set_values_up_to_date_ & - (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants))) { - VkWriteDescriptorSet& write_fetch_constants = - write_descriptor_sets[write_descriptor_set_count++]; - constexpr size_t kFetchConstantsSize = sizeof(uint32_t) * 6 * 32; - uint8_t* mapping_fetch_constants = WriteTransientUniformBufferBinding( - kFetchConstantsSize, - SingleTransientDescriptorLayout::kUniformBufferGuestShader, - buffer_info_fetch_constants, write_fetch_constants); - if (!mapping_fetch_constants) { - return false; - } - std::memcpy(mapping_fetch_constants, - ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, - kFetchConstantsSize); - write_descriptor_set_bits |= - UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants; - current_graphics_descriptor_sets_ - [SpirvShaderTranslator::kDescriptorSetFetchConstants] = - write_fetch_constants.dstSet; - } - // Vertex shader samplers. - if (write_vertex_samplers) { - VkWriteDescriptorSet& write_samplers = - write_descriptor_sets[write_descriptor_set_count++]; - if (!WriteTransientTextureBindings( - true, true, sampler_count_vertex, - current_guest_graphics_pipeline_layout_ - ->descriptor_set_layout_samplers_vertex_ref(), - descriptor_write_image_info_.data() + - vertex_sampler_image_info_offset, - write_samplers)) { - return false; - } - write_descriptor_set_bits |= - UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersVertex; - current_graphics_descriptor_sets_ - [SpirvShaderTranslator::kDescriptorSetSamplersVertex] = - write_samplers.dstSet; - } - // Vertex shader textures. + // Vertex shader textures and samplers. if (write_vertex_textures) { - VkWriteDescriptorSet& write_textures = - write_descriptor_sets[write_descriptor_set_count++]; - if (!WriteTransientTextureBindings( - false, true, texture_count_vertex, - current_guest_graphics_pipeline_layout_ - ->descriptor_set_layout_textures_vertex_ref(), - descriptor_write_image_info_.data() + - vertex_texture_image_info_offset, - write_textures)) { + VkWriteDescriptorSet* write_textures = + write_descriptor_sets.data() + write_descriptor_set_count; + uint32_t texture_descriptor_set_write_count = WriteTransientTextureBindings( + true, texture_count_vertex, sampler_count_vertex, + current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_vertex_ref(), + descriptor_write_image_info_.data() + vertex_texture_image_info_offset, + descriptor_write_image_info_.data() + vertex_sampler_image_info_offset, + write_textures); + if (!texture_descriptor_set_write_count) { return false; } + write_descriptor_set_count += texture_descriptor_set_write_count; write_descriptor_set_bits |= UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex; current_graphics_descriptor_sets_ [SpirvShaderTranslator::kDescriptorSetTexturesVertex] = - write_textures.dstSet; + write_textures[0].dstSet; } - // Pixel shader samplers. - if (write_pixel_samplers) { - VkWriteDescriptorSet& write_samplers = - write_descriptor_sets[write_descriptor_set_count++]; - if (!WriteTransientTextureBindings( - true, false, sampler_count_pixel, - current_guest_graphics_pipeline_layout_ - ->descriptor_set_layout_samplers_pixel_ref(), - descriptor_write_image_info_.data() + - pixel_sampler_image_info_offset, - write_samplers)) { - return false; - } - write_descriptor_set_bits |= - UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersPixel; - current_graphics_descriptor_sets_ - [SpirvShaderTranslator::kDescriptorSetSamplersPixel] = - write_samplers.dstSet; - } - // Pixel shader textures. + // Pixel shader textures and samplers. if (write_pixel_textures) { - VkWriteDescriptorSet& write_textures = - write_descriptor_sets[write_descriptor_set_count++]; - if (!WriteTransientTextureBindings( - false, false, texture_count_pixel, - current_guest_graphics_pipeline_layout_ - ->descriptor_set_layout_textures_pixel_ref(), - descriptor_write_image_info_.data() + - pixel_texture_image_info_offset, - write_textures)) { + VkWriteDescriptorSet* write_textures = + write_descriptor_sets.data() + write_descriptor_set_count; + uint32_t texture_descriptor_set_write_count = WriteTransientTextureBindings( + false, texture_count_pixel, sampler_count_pixel, + current_guest_graphics_pipeline_layout_ + ->descriptor_set_layout_textures_pixel_ref(), + descriptor_write_image_info_.data() + pixel_texture_image_info_offset, + descriptor_write_image_info_.data() + pixel_sampler_image_info_offset, + write_textures); + if (!texture_descriptor_set_write_count) { return false; } + write_descriptor_set_count += texture_descriptor_set_write_count; write_descriptor_set_bits |= UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel; current_graphics_descriptor_sets_ [SpirvShaderTranslator::kDescriptorSetTexturesPixel] = - write_textures.dstSet; + write_textures[0].dstSet; } // Write. if (write_descriptor_set_count) { dfn.vkUpdateDescriptorSets(device, write_descriptor_set_count, - write_descriptor_sets, 0, nullptr); + write_descriptor_sets.data(), 0, nullptr); } // Only make valid if all descriptor sets have been allocated and written // successfully. @@ -3936,19 +3895,11 @@ bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader, // Bind the new descriptor sets. uint32_t descriptor_sets_needed = (UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetCount) - 1; - if (!sampler_count_vertex) { - descriptor_sets_needed &= - ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersVertex); - } - if (!texture_count_vertex) { + if (!texture_count_vertex && !sampler_count_vertex) { descriptor_sets_needed &= ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex); } - if (!sampler_count_pixel) { - descriptor_sets_needed &= - ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetSamplersPixel); - } - if (!texture_count_pixel) { + if (!texture_count_pixel && !sampler_count_pixel) { descriptor_sets_needed &= ~(UINT32_C(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel); } @@ -4031,17 +3982,20 @@ uint8_t* VulkanCommandProcessor::WriteTransientUniformBufferBinding( return mapping; } -bool VulkanCommandProcessor::WriteTransientTextureBindings( - bool is_samplers, bool is_vertex, uint32_t binding_count, +uint32_t VulkanCommandProcessor::WriteTransientTextureBindings( + bool is_vertex, uint32_t texture_count, uint32_t sampler_count, VkDescriptorSetLayout descriptor_set_layout, - const VkDescriptorImageInfo* image_info, - VkWriteDescriptorSet& write_descriptor_set_out) { - assert_not_zero(binding_count); + const VkDescriptorImageInfo* texture_image_info, + const VkDescriptorImageInfo* sampler_image_info, + VkWriteDescriptorSet* descriptor_set_writes_out) { assert_true(frame_open_); + if (!texture_count && !sampler_count) { + return 0; + } TextureDescriptorSetLayoutKey texture_descriptor_set_layout_key; - texture_descriptor_set_layout_key.is_samplers = uint32_t(is_samplers); + texture_descriptor_set_layout_key.texture_count = texture_count; + texture_descriptor_set_layout_key.sampler_count = sampler_count; texture_descriptor_set_layout_key.is_vertex = uint32_t(is_vertex); - texture_descriptor_set_layout_key.binding_count = binding_count; VkDescriptorSet texture_descriptor_set; auto textures_free_it = texture_transient_descriptor_sets_free_.find( texture_descriptor_set_layout_key); @@ -4050,12 +4004,26 @@ bool VulkanCommandProcessor::WriteTransientTextureBindings( texture_descriptor_set = textures_free_it->second.back(); textures_free_it->second.pop_back(); } else { - texture_descriptor_set = - (is_samplers ? transient_descriptor_allocator_sampler_ - : transient_descriptor_allocator_sampled_image_) - .Allocate(descriptor_set_layout, binding_count); + std::array texture_descriptor_counts; + uint32_t texture_descriptor_counts_count = 0; + if (texture_count) { + VkDescriptorPoolSize& texture_descriptor_count = + texture_descriptor_counts[texture_descriptor_counts_count++]; + texture_descriptor_count.type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + texture_descriptor_count.descriptorCount = texture_count; + } + if (sampler_count) { + VkDescriptorPoolSize& texture_descriptor_count = + texture_descriptor_counts[texture_descriptor_counts_count++]; + texture_descriptor_count.type = VK_DESCRIPTOR_TYPE_SAMPLER; + texture_descriptor_count.descriptorCount = sampler_count; + } + assert_not_zero(texture_descriptor_counts_count); + texture_descriptor_set = transient_descriptor_allocator_textures_.Allocate( + descriptor_set_layout, texture_descriptor_counts.data(), + texture_descriptor_counts_count); if (texture_descriptor_set == VK_NULL_HANDLE) { - return false; + return 0; } } UsedTextureTransientDescriptorSet& used_texture_descriptor_set = @@ -4063,19 +4031,37 @@ bool VulkanCommandProcessor::WriteTransientTextureBindings( used_texture_descriptor_set.frame = frame_current_; used_texture_descriptor_set.layout = texture_descriptor_set_layout_key; used_texture_descriptor_set.set = texture_descriptor_set; - write_descriptor_set_out.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - write_descriptor_set_out.pNext = nullptr; - write_descriptor_set_out.dstSet = texture_descriptor_set; - write_descriptor_set_out.dstBinding = 0; - write_descriptor_set_out.dstArrayElement = 0; - write_descriptor_set_out.descriptorCount = binding_count; - write_descriptor_set_out.descriptorType = - is_samplers ? VK_DESCRIPTOR_TYPE_SAMPLER - : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - write_descriptor_set_out.pImageInfo = image_info; - write_descriptor_set_out.pBufferInfo = nullptr; - write_descriptor_set_out.pTexelBufferView = nullptr; - return true; + uint32_t descriptor_set_write_count = 0; + if (texture_count) { + VkWriteDescriptorSet& descriptor_set_write = + descriptor_set_writes_out[descriptor_set_write_count++]; + descriptor_set_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_set_write.pNext = nullptr; + descriptor_set_write.dstSet = texture_descriptor_set; + descriptor_set_write.dstBinding = 0; + descriptor_set_write.dstArrayElement = 0; + descriptor_set_write.descriptorCount = texture_count; + descriptor_set_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + descriptor_set_write.pImageInfo = texture_image_info; + descriptor_set_write.pBufferInfo = nullptr; + descriptor_set_write.pTexelBufferView = nullptr; + } + if (sampler_count) { + VkWriteDescriptorSet& descriptor_set_write = + descriptor_set_writes_out[descriptor_set_write_count++]; + descriptor_set_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_set_write.pNext = nullptr; + descriptor_set_write.dstSet = texture_descriptor_set; + descriptor_set_write.dstBinding = texture_count; + descriptor_set_write.dstArrayElement = 0; + descriptor_set_write.descriptorCount = sampler_count; + descriptor_set_write.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + descriptor_set_write.pImageInfo = sampler_image_info; + descriptor_set_write.pBufferInfo = nullptr; + descriptor_set_write.pTexelBufferView = nullptr; + } + assert_not_zero(descriptor_set_write_count); + return descriptor_set_write_count; } } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 1186310f2..215fa3ee9 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -36,7 +36,7 @@ #include "xenia/gpu/vulkan/vulkan_texture_cache.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/kernel_state.h" -#include "xenia/ui/vulkan/single_type_descriptor_set_allocator.h" +#include "xenia/ui/vulkan/linked_type_descriptor_set_allocator.h" #include "xenia/ui/vulkan/vulkan_presenter.h" #include "xenia/ui/vulkan/vulkan_provider.h" #include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h" @@ -49,10 +49,6 @@ class VulkanCommandProcessor : public CommandProcessor { public: // Single-descriptor layouts for use within a single frame. enum class SingleTransientDescriptorLayout { - kUniformBufferGuestVertex, - kUniformBufferFragment, - kUniformBufferGuestShader, - kUniformBufferSystemConstants, kUniformBufferCompute, kStorageBufferCompute, kCount, @@ -231,9 +227,9 @@ class VulkanCommandProcessor : public CommandProcessor { VkDescriptorSet& descriptor_set_out); // The returned reference is valid until a cache clear. - VkDescriptorSetLayout GetTextureDescriptorSetLayout(bool is_samplers, - bool is_vertex, - size_t binding_count); + VkDescriptorSetLayout GetTextureDescriptorSetLayout(bool is_vertex, + size_t texture_count, + size_t sampler_count); // The returned reference is valid until a cache clear. const VulkanPipelineCache::PipelineLayoutProvider* GetPipelineLayout( size_t texture_count_pixel, size_t sampler_count_pixel, @@ -298,12 +294,11 @@ class VulkanCommandProcessor : public CommandProcessor { union TextureDescriptorSetLayoutKey { uint32_t key; struct { - // 0 - sampled image descriptors, 1 - sampler descriptors. - uint32_t is_samplers : 1; + // If texture and sampler counts are both 0, use + // descriptor_set_layout_empty_ instead as these are owning references. + uint32_t texture_count : 16; + uint32_t sampler_count : 15; uint32_t is_vertex : 1; - // For 0, use descriptor_set_layout_empty_ instead as these are owning - // references. - uint32_t binding_count : 30; }; TextureDescriptorSetLayoutKey() : key(0) { @@ -354,40 +349,26 @@ class VulkanCommandProcessor : public CommandProcessor { explicit PipelineLayout( VkPipelineLayout pipeline_layout, VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref, - VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref, - VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref, - VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref) + VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref) : pipeline_layout_(pipeline_layout), descriptor_set_layout_textures_vertex_ref_( descriptor_set_layout_textures_vertex_ref), - descriptor_set_layout_samplers_vertex_ref_( - descriptor_set_layout_samplers_vertex_ref), descriptor_set_layout_textures_pixel_ref_( - descriptor_set_layout_textures_pixel_ref), - descriptor_set_layout_samplers_pixel_ref_( - descriptor_set_layout_samplers_pixel_ref) {} + descriptor_set_layout_textures_pixel_ref) {} VkPipelineLayout GetPipelineLayout() const override { return pipeline_layout_; } VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref() const { return descriptor_set_layout_textures_vertex_ref_; } - VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref() const { - return descriptor_set_layout_samplers_vertex_ref_; - } VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref() const { return descriptor_set_layout_textures_pixel_ref_; } - VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref() const { - return descriptor_set_layout_samplers_pixel_ref_; - } private: VkPipelineLayout pipeline_layout_; VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref_; - VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref_; VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref_; - VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref_; }; struct UsedSingleTransientDescriptor { @@ -458,16 +439,20 @@ class VulkanCommandProcessor : public CommandProcessor { uint32_t used_texture_mask); bool UpdateBindings(const VulkanShader* vertex_shader, const VulkanShader* pixel_shader); - // Allocates a descriptor set and fills the VkWriteDescriptorSet structure. - // The descriptor set layout must be the one for the given is_samplers, - // is_vertex, binding_count (from GetTextureDescriptorSetLayout - may be + // Allocates a descriptor set and fills one or two VkWriteDescriptorSet + // structure instances (for images and samplers). + // The descriptor set layout must be the one for the given is_vertex, + // texture_count, sampler_count (from GetTextureDescriptorSetLayout - may be // already available at the moment of the call, no need to locate it again). - // Returns whether the allocation was successful. - bool WriteTransientTextureBindings( - bool is_samplers, bool is_vertex, uint32_t binding_count, + // Returns how many VkWriteDescriptorSet structure instances have been + // written, or 0 if there was a failure to allocate the descriptor set or no + // bindings were requested. + uint32_t WriteTransientTextureBindings( + bool is_vertex, uint32_t texture_count, uint32_t sampler_count, VkDescriptorSetLayout descriptor_set_layout, - const VkDescriptorImageInfo* image_info, - VkWriteDescriptorSet& write_descriptor_set_out); + const VkDescriptorImageInfo* texture_image_info, + const VkDescriptorImageInfo* sampler_image_info, + VkWriteDescriptorSet* descriptor_set_writes_out); bool device_lost_ = false; @@ -530,6 +515,7 @@ class VulkanCommandProcessor : public CommandProcessor { VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE; VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ = VK_NULL_HANDLE; + VkDescriptorSetLayout descriptor_set_layout_constants_ = VK_NULL_HANDLE; std::array descriptor_set_layouts_single_transient_{}; @@ -543,19 +529,27 @@ class VulkanCommandProcessor : public CommandProcessor { PipelineLayoutKey::Hasher> pipeline_layouts_; - ui::vulkan::SingleTypeDescriptorSetAllocator + // No specific reason for 32768, just the "too much" descriptor count from + // Direct3D 12 PIX warnings. + static constexpr uint32_t kLinkedTypeDescriptorPoolSetCount = 32768; + static const VkDescriptorPoolSize kDescriptorPoolSizeUniformBuffer; + static const VkDescriptorPoolSize kDescriptorPoolSizeStorageBuffer; + static const VkDescriptorPoolSize kDescriptorPoolSizeTextures[2]; + ui::vulkan::LinkedTypeDescriptorSetAllocator transient_descriptor_allocator_uniform_buffer_; - ui::vulkan::SingleTypeDescriptorSetAllocator + ui::vulkan::LinkedTypeDescriptorSetAllocator transient_descriptor_allocator_storage_buffer_; std::deque single_transient_descriptors_used_; std::array, size_t(SingleTransientDescriptorLayout::kCount)> single_transient_descriptors_free_; + // . + std::deque> + constants_transient_descriptors_used_; + std::vector constants_transient_descriptors_free_; - ui::vulkan::SingleTypeDescriptorSetAllocator - transient_descriptor_allocator_sampled_image_; - ui::vulkan::SingleTypeDescriptorSetAllocator - transient_descriptor_allocator_sampler_; + ui::vulkan::LinkedTypeDescriptorSetAllocator + transient_descriptor_allocator_textures_; std::deque texture_transient_descriptor_sets_used_; std::unordered_map +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" +#include "xenia/ui/vulkan/vulkan_util.h" + +namespace xe { +namespace ui { +namespace vulkan { + +void LinkedTypeDescriptorSetAllocator::Reset() { + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); + VkDevice device = provider_.device(); + ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorPool, device, + page_usable_latest_.pool); + page_usable_latest_.descriptors_remaining.reset(); + for (const std::pair& page_pair : pages_usable_) { + dfn.vkDestroyDescriptorPool(device, page_pair.second.pool, nullptr); + } + pages_usable_.clear(); + for (VkDescriptorPool pool : pages_full_) { + dfn.vkDestroyDescriptorPool(device, pool, nullptr); + } + pages_full_.clear(); +} + +VkDescriptorSet LinkedTypeDescriptorSetAllocator::Allocate( + VkDescriptorSetLayout descriptor_set_layout, + const VkDescriptorPoolSize* descriptor_counts, + uint32_t descriptor_type_count) { + assert_not_zero(descriptor_type_count); +#ifndef NDEBUG + for (uint32_t i = 0; i < descriptor_type_count; ++i) { + const VkDescriptorPoolSize& descriptor_count_for_type = + descriptor_counts[i]; + assert_not_zero(descriptor_count_for_type.descriptorCount); + for (uint32_t j = 0; j < i; ++j) { + assert_true(descriptor_counts[j].type != descriptor_count_for_type.type); + } + } +#endif + + const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); + VkDevice device = provider_.device(); + + VkDescriptorSetAllocateInfo descriptor_set_allocate_info; + descriptor_set_allocate_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + descriptor_set_allocate_info.pNext = nullptr; + descriptor_set_allocate_info.descriptorSetCount = 1; + descriptor_set_allocate_info.pSetLayouts = &descriptor_set_layout; + VkDescriptorSet descriptor_set; + + // Check if more descriptors have been requested than a page can hold, or + // descriptors of types not provided by this allocator, and if that's the + // case, create a dedicated pool for this allocation. + bool dedicated_descriptor_pool_needed = false; + for (uint32_t i = 0; i < descriptor_type_count; ++i) { + const VkDescriptorPoolSize& descriptor_count_for_type = + descriptor_counts[i]; + // If the type is one that's not supported by the allocator, a dedicated + // pool is required. If it's supported, and the allocator has large enough + // pools to hold the requested number of descriptors, + // dedicated_descriptor_pool_needed will be set to false for this iteration, + // and the loop will continue. Otherwise, if that doesn't happen, a + // dedicated pool is required. + dedicated_descriptor_pool_needed = true; + for (uint32_t j = 0; j < descriptor_pool_size_count_; ++j) { + const VkDescriptorPoolSize& descriptor_pool_size = + descriptor_pool_sizes_[j]; + if (descriptor_count_for_type.type != descriptor_pool_size.type) { + continue; + } + if (descriptor_count_for_type.descriptorCount <= + descriptor_pool_size.descriptorCount) { + // For this type, pages can hold enough descriptors. + dedicated_descriptor_pool_needed = false; + } + break; + } + if (dedicated_descriptor_pool_needed) { + // For at least one requested type, pages can't hold enough descriptors. + break; + } + } + if (dedicated_descriptor_pool_needed) { + VkDescriptorPoolCreateInfo dedicated_descriptor_pool_create_info; + dedicated_descriptor_pool_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + dedicated_descriptor_pool_create_info.pNext = nullptr; + dedicated_descriptor_pool_create_info.flags = 0; + dedicated_descriptor_pool_create_info.maxSets = 1; + dedicated_descriptor_pool_create_info.poolSizeCount = descriptor_type_count; + dedicated_descriptor_pool_create_info.pPoolSizes = descriptor_counts; + VkDescriptorPool dedicated_descriptor_pool; + if (dfn.vkCreateDescriptorPool( + device, &dedicated_descriptor_pool_create_info, nullptr, + &dedicated_descriptor_pool) != VK_SUCCESS) { + XELOGE( + "LinkedTypeDescriptorSetAllocator: Failed to create a dedicated " + "descriptor pool for a descriptor set that is too large for a pool " + "page"); + return VK_NULL_HANDLE; + } + descriptor_set_allocate_info.descriptorPool = dedicated_descriptor_pool; + if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, + &descriptor_set) != VK_SUCCESS) { + XELOGE( + "LinkedTypeDescriptorSetAllocator: Failed to allocate descriptors in " + "a dedicated pool"); + dfn.vkDestroyDescriptorPool(device, dedicated_descriptor_pool, nullptr); + return VK_NULL_HANDLE; + } + pages_full_.push_back(dedicated_descriptor_pool); + return descriptor_set; + } + + // Try allocating from the latest page an allocation has happened from, to + // avoid detaching from the map and re-attaching for every allocation. + if (page_usable_latest_.pool != VK_NULL_HANDLE) { + assert_not_zero(page_usable_latest_.descriptor_sets_remaining); + bool allocate_from_latest_page = true; + bool latest_page_becomes_full = + page_usable_latest_.descriptor_sets_remaining == 1; + for (uint32_t i = 0; i < descriptor_type_count; ++i) { + const VkDescriptorPoolSize& descriptor_count_for_type = + descriptor_counts[i]; + for (uint32_t j = 0; j < descriptor_pool_size_count_; ++j) { + const VkDescriptorPoolSize& descriptors_remaining_for_type = + page_usable_latest_.descriptors_remaining[j]; + if (descriptor_count_for_type.type != + descriptors_remaining_for_type.type) { + continue; + } + if (descriptor_count_for_type.descriptorCount >= + descriptors_remaining_for_type.descriptorCount) { + if (descriptor_count_for_type.descriptorCount > + descriptors_remaining_for_type.descriptorCount) { + allocate_from_latest_page = false; + break; + } + latest_page_becomes_full = true; + } + } + if (!allocate_from_latest_page) { + break; + } + } + if (allocate_from_latest_page) { + descriptor_set_allocate_info.descriptorPool = page_usable_latest_.pool; + if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, + &descriptor_set) != VK_SUCCESS) { + descriptor_set = VK_NULL_HANDLE; + // Failed to allocate internally even though there should be enough + // space, don't try to allocate from this pool again at all. + latest_page_becomes_full = true; + } + if (latest_page_becomes_full) { + pages_full_.push_back(page_usable_latest_.pool); + page_usable_latest_.pool = VK_NULL_HANDLE; + page_usable_latest_.descriptors_remaining.reset(); + } else { + --page_usable_latest_.descriptor_sets_remaining; + for (uint32_t i = 0; i < descriptor_type_count; ++i) { + const VkDescriptorPoolSize& descriptor_count_for_type = + descriptor_counts[i]; + for (uint32_t j = 0; j < descriptor_pool_size_count_; ++j) { + VkDescriptorPoolSize& descriptors_remaining_for_type = + page_usable_latest_.descriptors_remaining[j]; + if (descriptor_count_for_type.type != + descriptors_remaining_for_type.type) { + continue; + } + descriptors_remaining_for_type.descriptorCount -= + descriptor_count_for_type.descriptorCount; + } + } + } + if (descriptor_set != VK_NULL_HANDLE) { + return descriptor_set; + } + } + } + + // Count the maximum number of descriptors requested for any type to stop + // searching for pages once they can't satisfy this requirement. + uint32_t max_descriptors_per_type = descriptor_counts[0].descriptorCount; + for (uint32_t i = 1; i < descriptor_type_count; ++i) { + max_descriptors_per_type = std::max(max_descriptors_per_type, + descriptor_counts[i].descriptorCount); + } + + // If allocating from the latest pool wasn't possible, pick any that has + // enough free space. Prefer filling pages that have the most free space as + // they can more likely be used for more allocations later. + auto page_usable_it_next = pages_usable_.rbegin(); + while (page_usable_it_next != pages_usable_.rend()) { + auto page_usable_it = page_usable_it_next; + ++page_usable_it_next; + if (page_usable_it->first < max_descriptors_per_type) { + // All other pages_usable_ entries have smaller maximum number of free + // descriptor for any type (it's the map key). + break; + } + // Check if the page has enough free descriptors for all requested types, + // and whether allocating the requested number of descriptors in it will + // result in the page becoming full. + bool map_page_has_sufficient_space = true; + bool map_page_becomes_full = + page_usable_it->second.descriptor_sets_remaining == 1; + for (uint32_t i = 0; i < descriptor_type_count; ++i) { + const VkDescriptorPoolSize& descriptor_count_for_type = + descriptor_counts[i]; + for (uint32_t j = 0; j < descriptor_pool_size_count_; ++j) { + const VkDescriptorPoolSize& descriptors_remaining_for_type = + page_usable_it->second.descriptors_remaining[j]; + if (descriptor_count_for_type.type != + descriptors_remaining_for_type.type) { + continue; + } + if (descriptor_count_for_type.descriptorCount >= + descriptors_remaining_for_type.descriptorCount) { + if (descriptor_count_for_type.descriptorCount > + descriptors_remaining_for_type.descriptorCount) { + map_page_has_sufficient_space = false; + break; + } + map_page_becomes_full = true; + } + } + if (!map_page_has_sufficient_space) { + break; + } + } + if (!map_page_has_sufficient_space) { + // Even though the coarse (maximum number of descriptors for any type) + // check has passed, for the exact types requested this page doesn't have + // sufficient space - try another one. + continue; + } + // Remove the page from the map unconditionally - in case of a successful + // allocation, it will have a different number of free descriptors for + // different types, thus potentially a new map key (but it will also become + // page_usable_latest_ instead even), or will become full, and in case of a + // failure to allocate internally even though there still should be enough + // space, it should never be allocated from again. + Page map_page = std::move(page_usable_it->second); + // Convert the reverse iterator to a forward iterator for erasing. + pages_usable_.erase(std::next(page_usable_it).base()); + descriptor_set_allocate_info.descriptorPool = map_page.pool; + if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, + &descriptor_set) != VK_SUCCESS) { + descriptor_set = VK_NULL_HANDLE; + // Failed to allocate internally even though there should be enough space, + // don't try to allocate from this pool again at all. + map_page_becomes_full = true; + } + if (map_page_becomes_full) { + map_page.descriptors_remaining.reset(); + pages_full_.push_back(map_page.pool); + } else { + --map_page.descriptor_sets_remaining; + for (uint32_t i = 0; i < descriptor_type_count; ++i) { + const VkDescriptorPoolSize& descriptor_count_for_type = + descriptor_counts[i]; + for (uint32_t j = 0; j < descriptor_pool_size_count_; ++j) { + VkDescriptorPoolSize& descriptors_remaining_for_type = + map_page.descriptors_remaining[j]; + if (descriptor_count_for_type.type != + descriptors_remaining_for_type.type) { + continue; + } + descriptors_remaining_for_type.descriptorCount -= + descriptor_count_for_type.descriptorCount; + } + } + // Move the latest page that allocation couldn't be done in to the usable + // pages to replace it with the new one. + if (page_usable_latest_.pool != VK_NULL_HANDLE) { + // Calculate the map key (the maximum number of remaining descriptors of + // any type). + uint32_t latest_page_max_descriptors_remaining = + page_usable_latest_.descriptors_remaining[0].descriptorCount; + for (uint32_t i = 1; i < descriptor_pool_size_count_; ++i) { + latest_page_max_descriptors_remaining = std::max( + latest_page_max_descriptors_remaining, + page_usable_latest_.descriptors_remaining[i].descriptorCount); + } + assert_not_zero(latest_page_max_descriptors_remaining); + pages_usable_.emplace(latest_page_max_descriptors_remaining, + std::move(page_usable_latest_)); + } + page_usable_latest_ = std::move(map_page); + } + if (descriptor_set != VK_NULL_HANDLE) { + return descriptor_set; + } + } + + // Try allocating from a new page. + // See if the new page has instantly become full. + bool new_page_becomes_full = descriptor_sets_per_page_ == 1; + for (uint32_t i = 0; !new_page_becomes_full && i < descriptor_type_count; + ++i) { + const VkDescriptorPoolSize& descriptor_count_for_type = + descriptor_counts[i]; + for (uint32_t j = 0; j < descriptor_pool_size_count_; ++j) { + const VkDescriptorPoolSize& descriptors_remaining_for_type = + descriptor_pool_sizes_[j]; + if (descriptor_count_for_type.type != + descriptors_remaining_for_type.type) { + continue; + } + assert_true(descriptor_count_for_type.descriptorCount <= + descriptors_remaining_for_type.descriptorCount); + if (descriptor_count_for_type.descriptorCount >= + descriptors_remaining_for_type.descriptorCount) { + new_page_becomes_full = true; + break; + } + } + } + // Allocate from a new page. However, if the new page becomes full + // immediately, create a dedicated pool instead for the exact number of + // descriptors not to leave any unused space in the pool. + VkDescriptorPoolCreateInfo new_descriptor_pool_create_info; + new_descriptor_pool_create_info.sType = + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + new_descriptor_pool_create_info.pNext = nullptr; + new_descriptor_pool_create_info.flags = 0; + if (new_page_becomes_full) { + new_descriptor_pool_create_info.maxSets = 1; + new_descriptor_pool_create_info.poolSizeCount = descriptor_type_count; + new_descriptor_pool_create_info.pPoolSizes = descriptor_counts; + } else { + new_descriptor_pool_create_info.maxSets = descriptor_sets_per_page_; + new_descriptor_pool_create_info.poolSizeCount = descriptor_pool_size_count_; + new_descriptor_pool_create_info.pPoolSizes = descriptor_pool_sizes_.get(); + } + VkDescriptorPool new_descriptor_pool; + if (dfn.vkCreateDescriptorPool(device, &new_descriptor_pool_create_info, + nullptr, &new_descriptor_pool) != VK_SUCCESS) { + XELOGE( + "LinkedTypeDescriptorSetAllocator: Failed to create a descriptor pool"); + return VK_NULL_HANDLE; + } + descriptor_set_allocate_info.descriptorPool = new_descriptor_pool; + if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, + &descriptor_set) != VK_SUCCESS) { + XELOGE("LinkedTypeDescriptorSetAllocator: Failed to allocate descriptors"); + dfn.vkDestroyDescriptorPool(device, new_descriptor_pool, nullptr); + return VK_NULL_HANDLE; + } + if (new_page_becomes_full) { + pages_full_.push_back(new_descriptor_pool); + } else { + // Move the latest page that allocation couldn't be done in to the usable + // pages to replace it with the new one. + if (page_usable_latest_.pool != VK_NULL_HANDLE) { + // Calculate the map key (the maximum number of remaining descriptors of + // any type). + uint32_t latest_page_max_descriptors_remaining = + page_usable_latest_.descriptors_remaining[0].descriptorCount; + for (uint32_t i = 1; i < descriptor_pool_size_count_; ++i) { + latest_page_max_descriptors_remaining = std::max( + latest_page_max_descriptors_remaining, + page_usable_latest_.descriptors_remaining[i].descriptorCount); + } + assert_not_zero(latest_page_max_descriptors_remaining); + pages_usable_.emplace(latest_page_max_descriptors_remaining, + std::move(page_usable_latest_)); + } + page_usable_latest_.pool = new_descriptor_pool; + page_usable_latest_.descriptors_remaining = + std::unique_ptr( + new VkDescriptorPoolSize[descriptor_pool_size_count_]); + for (uint32_t i = 0; i < descriptor_pool_size_count_; ++i) { + const VkDescriptorPoolSize& descriptor_pool_size_for_type = + descriptor_pool_sizes_[i]; + page_usable_latest_.descriptors_remaining[i] = + descriptor_pool_size_for_type; + for (uint32_t j = 0; j < descriptor_type_count; ++j) { + const VkDescriptorPoolSize& descriptor_count_for_type = + descriptor_counts[j]; + if (descriptor_count_for_type.type != + descriptor_pool_size_for_type.type) { + continue; + } + page_usable_latest_.descriptors_remaining[i].descriptorCount -= + descriptor_count_for_type.descriptorCount; + break; + } + } + page_usable_latest_.descriptor_sets_remaining = + descriptor_sets_per_page_ - 1; + } + return descriptor_set; +} + +} // namespace vulkan +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/vulkan/linked_type_descriptor_set_allocator.h b/src/xenia/ui/vulkan/linked_type_descriptor_set_allocator.h new file mode 100644 index 000000000..999c616b1 --- /dev/null +++ b/src/xenia/ui/vulkan/linked_type_descriptor_set_allocator.h @@ -0,0 +1,125 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2022 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_VULKAN_LINKED_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_ +#define XENIA_UI_VULKAN_LINKED_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_ + +#include +#include +#include +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/ui/vulkan/vulkan_provider.h" + +namespace xe { +namespace ui { +namespace vulkan { + +// Allocates multiple descriptors of in descriptor set layouts consisting of +// descriptors of types specified during initialization. +// +// "LinkedType" means that the allocator is designed for allocating descriptor +// sets containing descriptors of multiple types together - for instance, it +// will mark the entire page as full even if no space is left in it for just one +// of the descriptor types (not all at once). +// +// The primary usage scenario for this kind of an allocator is allocating image +// and sampler descriptors in a single descriptor set if they both are actually +// used in one. It is expected that the ratio of the numbers of descriptors per +// type specified during the initialization will roughly correspond to the ratio +// of the numbers of descriptors that will actually be allocated. For instance, +// if there are approximately 2 images for each 1 sampler, it's recommended to +// make the image count per page twice the sampler count per page. +// +// If some allocations use just one type, and some use just another, completely +// independently, it's preferable to use separate allocators rather than a +// single one. +// +// This allocator is also suitable for allocating variable-length descriptor +// sets containing descriptors of just a single type. +// +// There's no way to free these descriptors within the allocator object itself, +// per-layout free lists should be used externally. +class LinkedTypeDescriptorSetAllocator { + public: + // Multiple descriptor sizes for the same descriptor type, and zero sizes, are + // not allowed. + explicit LinkedTypeDescriptorSetAllocator( + const ui::vulkan::VulkanProvider& provider, + const VkDescriptorPoolSize* descriptor_sizes, + uint32_t descriptor_size_count, uint32_t descriptor_sets_per_page) + : provider_(provider), + descriptor_pool_sizes_(new VkDescriptorPoolSize[descriptor_size_count]), + descriptor_pool_size_count_(descriptor_size_count), + descriptor_sets_per_page_(descriptor_sets_per_page) { + assert_not_zero(descriptor_size_count); + assert_not_zero(descriptor_sets_per_page_); +#ifndef NDEBUG + for (uint32_t i = 0; i < descriptor_size_count; ++i) { + const VkDescriptorPoolSize& descriptor_size = descriptor_sizes[i]; + assert_not_zero(descriptor_size.descriptorCount); + for (uint32_t j = 0; j < i; ++j) { + assert_true(descriptor_sizes[j].type != descriptor_size.type); + } + } +#endif + std::memcpy(descriptor_pool_sizes_.get(), descriptor_sizes, + sizeof(VkDescriptorPoolSize) * descriptor_size_count); + } + LinkedTypeDescriptorSetAllocator( + const LinkedTypeDescriptorSetAllocator& allocator) = delete; + LinkedTypeDescriptorSetAllocator& operator=( + const LinkedTypeDescriptorSetAllocator& allocator) = delete; + ~LinkedTypeDescriptorSetAllocator() { Reset(); } + + void Reset(); + + VkDescriptorSet Allocate(VkDescriptorSetLayout descriptor_set_layout, + const VkDescriptorPoolSize* descriptor_counts, + uint32_t descriptor_type_count); + + private: + struct Page { + VkDescriptorPool pool; + std::unique_ptr descriptors_remaining; + uint32_t descriptor_sets_remaining; + }; + + const ui::vulkan::VulkanProvider& provider_; + + std::unique_ptr descriptor_pool_sizes_; + uint32_t descriptor_pool_size_count_; + uint32_t descriptor_sets_per_page_; + + std::vector pages_full_; + // Because allocations must be contiguous, overflow may happen even if a page + // still has free descriptors, so multiple pages may have free space. + // To avoid removing and re-adding the page to the map that keeps them sorted + // (the key is the maximum number of free descriptors remaining across all + // types - and lookups need to be made with the maximum of the requested + // number of descriptors across all types since it's pointless to check the + // pages that can't even potentially fit the largest amount of descriptors of + // a requested type, and unlike using the minimum as the key, this doesn't + // degenerate if, for example, 0 descriptors are requested for some type - and + // it changes at every allocation from a page), instead of always looking for + // a free space in the map, maintaining one page outside the map, and + // allocation attempts will be made from that page first. + std::multimap pages_usable_; + // Doesn't exist if page_usable_latest_.pool == VK_NULL_HANDLE. + Page page_usable_latest_ = {}; +}; + +} // namespace vulkan +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_VULKAN_CONNECTED_DESCRIPTOR_SET_ALLOCATOR_H_ diff --git a/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.cc b/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.cc deleted file mode 100644 index 44a3d31fe..000000000 --- a/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.cc +++ /dev/null @@ -1,216 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2022 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/vulkan/single_type_descriptor_set_allocator.h" - -#include "xenia/base/logging.h" -#include "xenia/ui/vulkan/vulkan_util.h" - -namespace xe { -namespace ui { -namespace vulkan { - -void SingleTypeDescriptorSetAllocator::Reset() { - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorPool, device, - page_usable_latest_.pool); - for (const std::pair& page_pair : pages_usable_) { - dfn.vkDestroyDescriptorPool(device, page_pair.second.pool, nullptr); - } - pages_usable_.clear(); - for (VkDescriptorPool pool : pages_full_) { - dfn.vkDestroyDescriptorPool(device, pool, nullptr); - } - pages_full_.clear(); -} - -VkDescriptorSet SingleTypeDescriptorSetAllocator::Allocate( - VkDescriptorSetLayout descriptor_set_layout, uint32_t descriptor_count) { - assert_not_zero(descriptor_count); - if (descriptor_count == 0) { - return VK_NULL_HANDLE; - } - - const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn(); - VkDevice device = provider_.device(); - - VkDescriptorSetAllocateInfo descriptor_set_allocate_info; - descriptor_set_allocate_info.sType = - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - descriptor_set_allocate_info.pNext = nullptr; - descriptor_set_allocate_info.descriptorSetCount = 1; - descriptor_set_allocate_info.pSetLayouts = &descriptor_set_layout; - VkDescriptorSet descriptor_set; - - if (descriptor_count > descriptor_pool_size_.descriptorCount) { - // Can't allocate in the pool, need a dedicated allocation. - VkDescriptorPoolSize dedicated_descriptor_pool_size; - dedicated_descriptor_pool_size.type = descriptor_pool_size_.type; - dedicated_descriptor_pool_size.descriptorCount = descriptor_count; - VkDescriptorPoolCreateInfo dedicated_descriptor_pool_create_info; - dedicated_descriptor_pool_create_info.sType = - VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - dedicated_descriptor_pool_create_info.pNext = nullptr; - dedicated_descriptor_pool_create_info.flags = 0; - dedicated_descriptor_pool_create_info.maxSets = 1; - dedicated_descriptor_pool_create_info.poolSizeCount = 1; - dedicated_descriptor_pool_create_info.pPoolSizes = - &dedicated_descriptor_pool_size; - VkDescriptorPool dedicated_descriptor_pool; - if (dfn.vkCreateDescriptorPool( - device, &dedicated_descriptor_pool_create_info, nullptr, - &dedicated_descriptor_pool) != VK_SUCCESS) { - XELOGE( - "SingleTypeDescriptorSetAllocator: Failed to create a dedicated pool " - "for {} descriptors", - dedicated_descriptor_pool_size.descriptorCount); - return VK_NULL_HANDLE; - } - descriptor_set_allocate_info.descriptorPool = dedicated_descriptor_pool; - if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, - &descriptor_set) != VK_SUCCESS) { - XELOGE( - "SingleTypeDescriptorSetAllocator: Failed to allocate {} descriptors " - "in a dedicated pool", - descriptor_count); - dfn.vkDestroyDescriptorPool(device, dedicated_descriptor_pool, nullptr); - return VK_NULL_HANDLE; - } - pages_full_.push_back(dedicated_descriptor_pool); - return descriptor_set; - } - - // Try allocating from the latest page an allocation has happened from, to - // avoid detaching from the map and re-attaching for every allocation. - if (page_usable_latest_.pool != VK_NULL_HANDLE) { - assert_not_zero(page_usable_latest_.descriptors_remaining); - assert_not_zero(page_usable_latest_.descriptor_sets_remaining); - if (page_usable_latest_.descriptors_remaining >= descriptor_count) { - descriptor_set_allocate_info.descriptorPool = page_usable_latest_.pool; - if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, - &descriptor_set) == VK_SUCCESS) { - page_usable_latest_.descriptors_remaining -= descriptor_count; - --page_usable_latest_.descriptor_sets_remaining; - if (!page_usable_latest_.descriptors_remaining || - !page_usable_latest_.descriptor_sets_remaining) { - pages_full_.push_back(page_usable_latest_.pool); - page_usable_latest_.pool = VK_NULL_HANDLE; - } - return descriptor_set; - } - // Failed to allocate internally even though there should be enough space, - // don't try to allocate from this pool again at all. - pages_full_.push_back(page_usable_latest_.pool); - page_usable_latest_.pool = VK_NULL_HANDLE; - } - } - - // If allocating from the latest pool wasn't possible, pick any that has free - // space. Prefer filling pages that have the most free space as they can more - // likely be used for more allocations later. - while (!pages_usable_.empty()) { - auto page_usable_last_it = std::prev(pages_usable_.cend()); - if (page_usable_last_it->second.descriptors_remaining < descriptor_count) { - // All other pages_usable_ entries have fewer free descriptors too (the - // remaining count is the map key). - break; - } - // Remove the page from the map unconditionally - in case of a successful - // allocation, it will have a different number of free descriptors, thus a - // new map key (but it will also become page_usable_latest_ instead even), - // or will become full, and in case of a failure to allocate internally even - // though there still should be enough space, it should never be allocated - // from again. - Page map_page = page_usable_last_it->second; - pages_usable_.erase(page_usable_last_it); - descriptor_set_allocate_info.descriptorPool = map_page.pool; - if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, - &descriptor_set) != VK_SUCCESS) { - pages_full_.push_back(map_page.pool); - continue; - } - map_page.descriptors_remaining -= descriptor_count; - --map_page.descriptor_sets_remaining; - if (!map_page.descriptors_remaining || - !map_page.descriptor_sets_remaining) { - pages_full_.push_back(map_page.pool); - } else { - if (page_usable_latest_.pool != VK_NULL_HANDLE) { - // Make the page with more free descriptors the next to allocate from. - if (map_page.descriptors_remaining > - page_usable_latest_.descriptors_remaining) { - pages_usable_.emplace(page_usable_latest_.descriptors_remaining, - page_usable_latest_); - page_usable_latest_ = map_page; - } else { - pages_usable_.emplace(map_page.descriptors_remaining, map_page); - } - } else { - page_usable_latest_ = map_page; - } - } - return descriptor_set; - } - - // Try allocating from a new page. - VkDescriptorPoolCreateInfo new_descriptor_pool_create_info; - new_descriptor_pool_create_info.sType = - VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - new_descriptor_pool_create_info.pNext = nullptr; - new_descriptor_pool_create_info.flags = 0; - new_descriptor_pool_create_info.maxSets = descriptor_sets_per_page_; - new_descriptor_pool_create_info.poolSizeCount = 1; - new_descriptor_pool_create_info.pPoolSizes = &descriptor_pool_size_; - VkDescriptorPool new_descriptor_pool; - if (dfn.vkCreateDescriptorPool(device, &new_descriptor_pool_create_info, - nullptr, &new_descriptor_pool) != VK_SUCCESS) { - XELOGE( - "SingleTypeDescriptorSetAllocator: Failed to create a pool for {} sets " - "with {} descriptors", - descriptor_sets_per_page_, descriptor_pool_size_.descriptorCount); - return VK_NULL_HANDLE; - } - descriptor_set_allocate_info.descriptorPool = new_descriptor_pool; - if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info, - &descriptor_set) != VK_SUCCESS) { - XELOGE( - "SingleTypeDescriptorSetAllocator: Failed to allocate {} descriptors", - descriptor_count); - dfn.vkDestroyDescriptorPool(device, new_descriptor_pool, nullptr); - return VK_NULL_HANDLE; - } - Page new_page; - new_page.pool = new_descriptor_pool; - new_page.descriptors_remaining = - descriptor_pool_size_.descriptorCount - descriptor_count; - new_page.descriptor_sets_remaining = descriptor_sets_per_page_ - 1; - if (!new_page.descriptors_remaining || !new_page.descriptor_sets_remaining) { - pages_full_.push_back(new_page.pool); - } else { - if (page_usable_latest_.pool != VK_NULL_HANDLE) { - // Make the page with more free descriptors the next to allocate from. - if (new_page.descriptors_remaining > - page_usable_latest_.descriptors_remaining) { - pages_usable_.emplace(page_usable_latest_.descriptors_remaining, - page_usable_latest_); - page_usable_latest_ = new_page; - } else { - pages_usable_.emplace(new_page.descriptors_remaining, new_page); - } - } else { - page_usable_latest_ = new_page; - } - } - return descriptor_set; -} - -} // namespace vulkan -} // namespace ui -} // namespace xe diff --git a/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.h b/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.h deleted file mode 100644 index 7a21f6f35..000000000 --- a/src/xenia/ui/vulkan/single_type_descriptor_set_allocator.h +++ /dev/null @@ -1,84 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2022 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_ -#define XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_ - -#include -#include -#include -#include - -#include "xenia/base/assert.h" -#include "xenia/ui/vulkan/vulkan_provider.h" - -namespace xe { -namespace ui { -namespace vulkan { - -// Allocates multiple descriptors of a single type in descriptor set layouts -// consisting of descriptors of only that type. There's no way to free these -// descriptors within the SingleTypeDescriptorSetAllocator, per-layout free -// lists should be used externally. -class SingleTypeDescriptorSetAllocator { - public: - explicit SingleTypeDescriptorSetAllocator( - const ui::vulkan::VulkanProvider& provider, - VkDescriptorType descriptor_type, uint32_t descriptors_per_page, - uint32_t descriptor_sets_per_page) - : provider_(provider), - descriptor_sets_per_page_(descriptor_sets_per_page) { - assert_not_zero(descriptor_sets_per_page_); - descriptor_pool_size_.type = descriptor_type; - // Not allocating sets with 0 descriptors using the allocator - pointless to - // have the descriptor count below the set count. - descriptor_pool_size_.descriptorCount = - std::max(descriptors_per_page, descriptor_sets_per_page); - } - SingleTypeDescriptorSetAllocator( - const SingleTypeDescriptorSetAllocator& allocator) = delete; - SingleTypeDescriptorSetAllocator& operator=( - const SingleTypeDescriptorSetAllocator& allocator) = delete; - ~SingleTypeDescriptorSetAllocator() { Reset(); } - - void Reset(); - - VkDescriptorSet Allocate(VkDescriptorSetLayout descriptor_set_layout, - uint32_t descriptor_count); - - private: - struct Page { - VkDescriptorPool pool; - uint32_t descriptors_remaining; - uint32_t descriptor_sets_remaining; - }; - - const ui::vulkan::VulkanProvider& provider_; - - VkDescriptorPoolSize descriptor_pool_size_; - uint32_t descriptor_sets_per_page_; - - std::vector pages_full_; - // Because allocations must be contiguous, overflow may happen even if a page - // still has free descriptors, so multiple pages may have free space. - // To avoid removing and re-adding the page to the map that keeps them sorted - // (the key is the number of free descriptors remaining, and it changes at - // every allocation from a page), instead of always looking for a free space - // in the map, maintaining one page outside the map, and allocation attempts - // will be made from that page first. - std::multimap pages_usable_; - // Doesn't exist if page_usable_latest_.pool == VK_NULL_HANDLE. - Page page_usable_latest_ = {}; -}; - -} // namespace vulkan -} // namespace ui -} // namespace xe - -#endif // XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_ diff --git a/third_party/premake-core b/third_party/premake-core index 7eba28258..fe71eb790 160000 --- a/third_party/premake-core +++ b/third_party/premake-core @@ -1 +1 @@ -Subproject commit 7eba2825887e49d3a72b30e0a7480bd427a5bab0 +Subproject commit fe71eb790c7d085cd3c6a7b71a50167b4da06e69