Merge branch 'master' of https://github.com/xenia-project/xenia into canary_experimental

This commit is contained in:
Gliniak 2022-07-10 10:50:39 +02:00
commit 1d00372e6b
49 changed files with 2378 additions and 1159 deletions

View File

@ -38,51 +38,40 @@ def targets_android(platform):
'imgui', 'imgui',
'mspack', 'mspack',
'snappy', 'snappy',
'spirv-tools',
'xxhash', 'xxhash',
# 'xenia-core', 'xenia-core',
# 'xenia-app-discord', # 'xenia-app-discord',
# 'xenia-apu', 'xenia-apu',
# 'xenia-apu-nop', 'xenia-apu-nop',
'xenia-base', 'xenia-base',
'xenia-base-tests', 'xenia-base-tests',
# 'xenia-cpu', 'xenia-cpu',
# 'xenia-cpu-tests', # 'xenia-cpu-tests',
# 'xenia-cpu-ppc-tests', # 'xenia-cpu-ppc-tests',
# 'xenia-cpu-backend-x64', # 'xenia-cpu-backend-x64',
# 'xenia-debug-ui', # 'xenia-debug-ui',
# 'xenia-gpu', 'xenia-gpu',
# 'xenia-gpu-shader-compiler', 'xenia-gpu-shader-compiler',
# 'xenia-gpu-null', 'xenia-gpu-null',
# 'xenia-gpu-vulkan', 'xenia-gpu-vulkan',
# 'xenia-gpu-vulkan-trace-viewer', # 'xenia-gpu-vulkan-trace-viewer',
# 'xenia-gpu-vulkan-trace-dump', 'xenia-gpu-vulkan-trace-dump',
'xenia-hid', 'xenia-hid',
# 'xenia-hid-demo', # 'xenia-hid-demo',
'xenia-hid-nop', 'xenia-hid-nop',
# 'xenia-kernel', 'xenia-kernel',
'xenia-ui', 'xenia-ui',
'xenia-ui-spirv', 'xenia-ui-vulkan',
# 'xenia-ui-vulkan',
# 'xenia-ui-window-vulkan-demo', # 'xenia-ui-window-vulkan-demo',
'xenia-vfs', 'xenia-vfs',
'xenia-vfs-dump', 'xenia-vfs-dump',
] ]
if platform == 'Android-x86_64': if platform == 'Android-x86_64':
targets.extend([ targets.extend([
'xenia-core',
'xenia-apu',
'xenia-apu-nop',
'xenia-cpu',
'xenia-cpu-tests', 'xenia-cpu-tests',
'xenia-cpu-ppc-tests', 'xenia-cpu-ppc-tests',
'xenia-cpu-backend-x64', 'xenia-cpu-backend-x64',
'xenia-debug-ui', 'xenia-debug-ui',
'xenia-gpu',
'xenia-gpu-null',
'xenia-gpu-vulkan',
'xenia-gpu-shader-compiler',
'xenia-kernel',
]) ])
return targets return targets

View File

@ -15,7 +15,6 @@ project("xenia-app")
"xenia-base", "xenia-base",
"xenia-core", "xenia-core",
"xenia-cpu", "xenia-cpu",
"xenia-cpu-backend-x64",
"xenia-debug-ui", "xenia-debug-ui",
"xenia-gpu", "xenia-gpu",
"xenia-gpu-null", "xenia-gpu-null",
@ -60,6 +59,11 @@ project("xenia-app")
project_root, project_root,
}) })
filter("architecture:x86_64")
links({
"xenia-cpu-backend-x64",
})
filter("platforms:Windows") filter("platforms:Windows")
files({ files({
"main_resources.rc", "main_resources.rc",

View File

@ -477,7 +477,7 @@ void EmulatorApp::EmulatorThread() {
// Setup and initialize all subsystems. If we can't do something // Setup and initialize all subsystems. If we can't do something
// (unsupported system, memory issues, etc) this will fail early. // (unsupported system, memory issues, etc) this will fail early.
X_STATUS result = emulator_->Setup( X_STATUS result = emulator_->Setup(
emulator_window_->window(), emulator_window_->imgui_drawer(), emulator_window_->window(), emulator_window_->imgui_drawer(), true,
CreateAudioSystem, CreateGraphicsSystem, CreateInputDrivers); CreateAudioSystem, CreateGraphicsSystem, CreateInputDrivers);
if (XFAILED(result)) { if (XFAILED(result)) {
XELOGE("Failed to setup emulator: {:08X}", result); XELOGE("Failed to setup emulator: {:08X}", result);

View File

@ -0,0 +1,88 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/base/exception_handler.h"
namespace xe {
// Based on VIXL Instruction::IsLoad and IsStore.
// https://github.com/Linaro/vixl/blob/d48909dd0ac62197edb75d26ed50927e4384a199/src/aarch64/instructions-aarch64.cc#L484
//
// Copyright 2015, VIXL authors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of ARM Limited nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
bool IsArm64LoadPrefetchStore(uint32_t instruction, bool& is_store_out) {
if ((instruction & kArm64LoadLiteralFMask) == kArm64LoadLiteralFixed) {
return true;
}
if ((instruction & kArm64LoadStoreAnyFMask) != kArm64LoadStoreAnyFixed) {
return false;
}
if ((instruction & kArm64LoadStorePairAnyFMask) ==
kArm64LoadStorePairAnyFixed) {
is_store_out = !(instruction & kArm64LoadStorePairLoadBit);
return true;
}
switch (Arm64LoadStoreOp(instruction & kArm64LoadStoreMask)) {
case Arm64LoadStoreOp::kLDRB_w:
case Arm64LoadStoreOp::kLDRH_w:
case Arm64LoadStoreOp::kLDR_w:
case Arm64LoadStoreOp::kLDR_x:
case Arm64LoadStoreOp::kLDRSB_x:
case Arm64LoadStoreOp::kLDRSH_x:
case Arm64LoadStoreOp::kLDRSW_x:
case Arm64LoadStoreOp::kLDRSB_w:
case Arm64LoadStoreOp::kLDRSH_w:
case Arm64LoadStoreOp::kLDR_b:
case Arm64LoadStoreOp::kLDR_h:
case Arm64LoadStoreOp::kLDR_s:
case Arm64LoadStoreOp::kLDR_d:
case Arm64LoadStoreOp::kLDR_q:
case Arm64LoadStoreOp::kPRFM:
is_store_out = false;
return true;
case Arm64LoadStoreOp::kSTRB_w:
case Arm64LoadStoreOp::kSTRH_w:
case Arm64LoadStoreOp::kSTR_w:
case Arm64LoadStoreOp::kSTR_x:
case Arm64LoadStoreOp::kSTR_b:
case Arm64LoadStoreOp::kSTR_h:
case Arm64LoadStoreOp::kSTR_s:
case Arm64LoadStoreOp::kSTR_d:
case Arm64LoadStoreOp::kSTR_q:
is_store_out = true;
return true;
default:
return false;
}
}
} // namespace xe

View File

@ -2,7 +2,7 @@
****************************************************************************** ******************************************************************************
* Xenia : Xbox 360 Emulator Research Project * * Xenia : Xbox 360 Emulator Research Project *
****************************************************************************** ******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. * * Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. * * Released under the BSD license - see LICENSE in the root for more details. *
****************************************************************************** ******************************************************************************
*/ */
@ -10,14 +10,97 @@
#ifndef XENIA_BASE_EXCEPTION_HANDLER_H_ #ifndef XENIA_BASE_EXCEPTION_HANDLER_H_
#define XENIA_BASE_EXCEPTION_HANDLER_H_ #define XENIA_BASE_EXCEPTION_HANDLER_H_
#include <cstdint>
#include <functional> #include <functional>
#include <vector> #include <vector>
#include "xenia/base/assert.h" #include "xenia/base/assert.h"
#include "xenia/base/x64_context.h" #include "xenia/base/host_thread_context.h"
namespace xe { namespace xe {
// AArch64 load and store decoding based on VIXL.
// https://github.com/Linaro/vixl/blob/ae5957cd66517b3f31dbf37e9bf39db6594abfe3/src/aarch64/constants-aarch64.h
//
// Copyright 2015, VIXL authors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of ARM Limited nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// `Instruction address + literal offset` loads.
// This includes PRFM_lit.
constexpr uint32_t kArm64LoadLiteralFMask = UINT32_C(0x3B000000);
constexpr uint32_t kArm64LoadLiteralFixed = UINT32_C(0x18000000);
constexpr uint32_t kArm64LoadStoreAnyFMask = UINT32_C(0x0A000000);
constexpr uint32_t kArm64LoadStoreAnyFixed = UINT32_C(0x08000000);
constexpr uint32_t kArm64LoadStorePairAnyFMask = UINT32_C(0x3A000000);
constexpr uint32_t kArm64LoadStorePairAnyFixed = UINT32_C(0x28000000);
constexpr uint32_t kArm64LoadStorePairLoadBit = UINT32_C(1) << 22;
constexpr uint32_t kArm64LoadStoreMask = UINT32_C(0xC4C00000);
enum class Arm64LoadStoreOp : uint32_t {
kSTRB_w = UINT32_C(0x00000000),
kSTRH_w = UINT32_C(0x40000000),
kSTR_w = UINT32_C(0x80000000),
kSTR_x = UINT32_C(0xC0000000),
kLDRB_w = UINT32_C(0x00400000),
kLDRH_w = UINT32_C(0x40400000),
kLDR_w = UINT32_C(0x80400000),
kLDR_x = UINT32_C(0xC0400000),
kLDRSB_x = UINT32_C(0x00800000),
kLDRSH_x = UINT32_C(0x40800000),
kLDRSW_x = UINT32_C(0x80800000),
kLDRSB_w = UINT32_C(0x00C00000),
kLDRSH_w = UINT32_C(0x40C00000),
kSTR_b = UINT32_C(0x04000000),
kSTR_h = UINT32_C(0x44000000),
kSTR_s = UINT32_C(0x84000000),
kSTR_d = UINT32_C(0xC4000000),
kSTR_q = UINT32_C(0x04800000),
kLDR_b = UINT32_C(0x04400000),
kLDR_h = UINT32_C(0x44400000),
kLDR_s = UINT32_C(0x84400000),
kLDR_d = UINT32_C(0xC4400000),
kLDR_q = UINT32_C(0x04C00000),
kPRFM = UINT32_C(0xC0800000),
};
constexpr uint32_t kArm64LoadStoreOffsetFMask = UINT32_C(0x3B200C00);
enum class Arm64LoadStoreOffsetFixed : uint32_t {
kUnscaledOffset = UINT32_C(0x38000000),
kPostIndex = UINT32_C(0x38000400),
kPreIndex = UINT32_C(0x38000C00),
kRegisterOffset = UINT32_C(0x38200800),
};
constexpr uint32_t kArm64LoadStoreUnsignedOffsetFMask = UINT32_C(0x3B000000);
constexpr uint32_t kArm64LoadStoreUnsignedOffsetFixed = UINT32_C(0x39000000);
bool IsArm64LoadPrefetchStore(uint32_t instruction, bool& is_store_out);
class Exception { class Exception {
public: public:
enum class Code { enum class Code {
@ -32,7 +115,7 @@ class Exception {
kWrite, kWrite,
}; };
void InitializeAccessViolation(X64Context* thread_context, void InitializeAccessViolation(HostThreadContext* thread_context,
uint64_t fault_address, uint64_t fault_address,
AccessViolationOperation operation) { AccessViolationOperation operation) {
code_ = Code::kAccessViolation; code_ = Code::kAccessViolation;
@ -40,7 +123,7 @@ class Exception {
fault_address_ = fault_address; fault_address_ = fault_address;
access_violation_operation_ = operation; access_violation_operation_ = operation;
} }
void InitializeIllegalInstruction(X64Context* thread_context) { void InitializeIllegalInstruction(HostThreadContext* thread_context) {
code_ = Code::kIllegalInstruction; code_ = Code::kIllegalInstruction;
thread_context_ = thread_context; thread_context_ = thread_context;
} }
@ -48,24 +131,67 @@ class Exception {
Code code() const { return code_; } Code code() const { return code_; }
// Returns the platform-specific thread context info. // Returns the platform-specific thread context info.
X64Context* thread_context() const { return thread_context_; } // Note that certain registers must be modified through Modify* proxy
// functions rather than directly:
// x86-64:
// - General-purpose registers (r##, r8-r15).
// - XMM registers.
// AArch64:
// - General-purpose registers (Xn), including FP and LR.
// - SIMD and floating-point registers (Vn).
HostThreadContext* thread_context() const { return thread_context_; }
#if XE_ARCH_AMD64
// Returns the program counter where the exception occurred. // Returns the program counter where the exception occurred.
// RIP on x64.
uint64_t pc() const { return thread_context_->rip; }
// Sets the program counter where execution will resume.
void set_resume_pc(uint64_t pc) { thread_context_->rip = pc; }
#else
// Returns the program counter where the exception occurred.
// RIP on x64.
uint64_t pc() const { uint64_t pc() const {
#if XE_ARCH_AMD64
return thread_context_->rip;
#elif XE_ARCH_ARM64
return thread_context_->pc;
#else
assert_always(); assert_always();
return 0; return 0;
#endif // XE_ARCH
} }
// Sets the program counter where execution will resume. // Sets the program counter where execution will resume.
void set_resume_pc(uint64_t pc) { assert_always(); } void set_resume_pc(uint64_t pc) {
#endif #if XE_ARCH_AMD64
thread_context_->rip = pc;
#elif XE_ARCH_ARM64
thread_context_->pc = pc;
#else
assert_always();
#endif // XE_ARCH
}
#if XE_ARCH_AMD64
// The index is relative to X64Register::kIntRegisterFirst.
uint64_t& ModifyIntRegister(uint32_t index) {
assert_true(index <= 15);
modified_int_registers_ |= UINT16_C(1) << index;
return thread_context_->int_registers[index];
}
uint16_t modified_int_registers() const { return modified_int_registers_; }
vec128_t& ModifyXmmRegister(uint32_t index) {
assert_true(index <= 15);
modified_xmm_registers_ |= UINT16_C(1) << index;
return thread_context_->xmm_registers[index];
}
uint16_t modified_xmm_registers() const { return modified_xmm_registers_; }
#elif XE_ARCH_ARM64
uint64_t& ModifyXRegister(uint32_t index) {
assert_true(index <= 30);
modified_x_registers_ |= UINT32_C(1) << index;
return thread_context_->x[index];
}
uint32_t modified_x_registers() const { return modified_x_registers_; }
vec128_t& ModifyVRegister(uint32_t index) {
assert_true(index <= 31);
modified_v_registers_ |= UINT32_C(1) << index;
return thread_context_->v[index];
}
uint32_t modified_v_registers() const { return modified_v_registers_; }
#endif // XE_ARCH
// In case of AV, address that was read from/written to. // In case of AV, address that was read from/written to.
uint64_t fault_address() const { return fault_address_; } uint64_t fault_address() const { return fault_address_; }
@ -77,7 +203,14 @@ class Exception {
private: private:
Code code_ = Code::kInvalidException; Code code_ = Code::kInvalidException;
X64Context* thread_context_ = nullptr; HostThreadContext* thread_context_ = nullptr;
#if XE_ARCH_AMD64
uint16_t modified_int_registers_ = 0;
uint16_t modified_xmm_registers_ = 0;
#elif XE_ARCH_ARM64
uint32_t modified_x_registers_ = 0;
uint32_t modified_v_registers_ = 0;
#endif // XE_ARCH
uint64_t fault_address_ = 0; uint64_t fault_address_ = 0;
AccessViolationOperation access_violation_operation_ = AccessViolationOperation access_violation_operation_ =
AccessViolationOperation::kUnknown; AccessViolationOperation::kUnknown;

View File

@ -1,35 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/base/exception_handler.h"
#include "xenia/base/assert.h"
#include "xenia/base/math.h"
#include "xenia/base/platform_linux.h"
namespace xe {
// This can be as large as needed, but isn't often needed.
// As we will be sometimes firing many exceptions we want to avoid having to
// scan the table too much or invoke many custom handlers.
constexpr size_t kMaxHandlerCount = 8;
// All custom handlers, left-aligned and null terminated.
// Executed in order.
std::pair<ExceptionHandler::Handler, void*> handlers_[kMaxHandlerCount];
void ExceptionHandler::Install(Handler fn, void* data) {
// TODO(dougvj) stub
}
void ExceptionHandler::Uninstall(Handler fn, void* data) {
// TODO(dougvj) stub
}
} // namespace xe

View File

@ -2,17 +2,285 @@
****************************************************************************** ******************************************************************************
* Xenia : Xbox 360 Emulator Research Project * * Xenia : Xbox 360 Emulator Research Project *
****************************************************************************** ******************************************************************************
* Copyright 2017 Ben Vanik. All rights reserved. * * Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. * * Released under the BSD license - see LICENSE in the root for more details. *
****************************************************************************** ******************************************************************************
*/ */
#include "xenia/base/exception_handler.h" #include "xenia/base/exception_handler.h"
#include <signal.h>
#include <ucontext.h>
#include <cstdint>
#include "xenia/base/assert.h"
#include "xenia/base/host_thread_context.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/platform.h"
namespace xe { namespace xe {
// TODO(DrChat): Exception handling on linux. bool signal_handlers_installed_ = false;
void ExceptionHandler::Install(Handler fn, void* data) {} struct sigaction original_sigill_handler_;
void ExceptionHandler::Uninstall(Handler fn, void* data) {} struct sigaction original_sigsegv_handler_;
} // namespace xe // This can be as large as needed, but isn't often needed.
// As we will be sometimes firing many exceptions we want to avoid having to
// scan the table too much or invoke many custom handlers.
constexpr size_t kMaxHandlerCount = 8;
// All custom handlers, left-aligned and null terminated.
// Executed in order.
std::pair<ExceptionHandler::Handler, void*> handlers_[kMaxHandlerCount];
static void ExceptionHandlerCallback(int signal_number, siginfo_t* signal_info,
void* signal_context) {
mcontext_t& mcontext =
reinterpret_cast<ucontext_t*>(signal_context)->uc_mcontext;
HostThreadContext thread_context;
#if XE_ARCH_AMD64
thread_context.rip = uint64_t(mcontext.gregs[REG_RIP]);
thread_context.eflags = uint32_t(mcontext.gregs[REG_EFL]);
// The REG_ order may be different than the register indices in the
// instruction encoding.
thread_context.rax = uint64_t(mcontext.gregs[REG_RAX]);
thread_context.rcx = uint64_t(mcontext.gregs[REG_RCX]);
thread_context.rdx = uint64_t(mcontext.gregs[REG_RDX]);
thread_context.rbx = uint64_t(mcontext.gregs[REG_RBX]);
thread_context.rsp = uint64_t(mcontext.gregs[REG_RSP]);
thread_context.rbp = uint64_t(mcontext.gregs[REG_RBP]);
thread_context.rsi = uint64_t(mcontext.gregs[REG_RSI]);
thread_context.rdi = uint64_t(mcontext.gregs[REG_RDI]);
thread_context.r8 = uint64_t(mcontext.gregs[REG_R8]);
thread_context.r9 = uint64_t(mcontext.gregs[REG_R9]);
thread_context.r10 = uint64_t(mcontext.gregs[REG_R10]);
thread_context.r11 = uint64_t(mcontext.gregs[REG_R11]);
thread_context.r12 = uint64_t(mcontext.gregs[REG_R12]);
thread_context.r13 = uint64_t(mcontext.gregs[REG_R13]);
thread_context.r14 = uint64_t(mcontext.gregs[REG_R14]);
thread_context.r15 = uint64_t(mcontext.gregs[REG_R15]);
std::memcpy(thread_context.xmm_registers, mcontext.fpregs->_xmm,
sizeof(thread_context.xmm_registers));
#elif XE_ARCH_ARM64
std::memcpy(thread_context.x, mcontext.regs, sizeof(thread_context.x));
thread_context.sp = mcontext.sp;
thread_context.pc = mcontext.pc;
thread_context.pstate = mcontext.pstate;
struct fpsimd_context* mcontext_fpsimd = nullptr;
struct esr_context* mcontext_esr = nullptr;
for (struct _aarch64_ctx* mcontext_extension =
reinterpret_cast<struct _aarch64_ctx*>(mcontext.__reserved);
mcontext_extension->magic;
mcontext_extension = reinterpret_cast<struct _aarch64_ctx*>(
reinterpret_cast<uint8_t*>(mcontext_extension) +
mcontext_extension->size)) {
switch (mcontext_extension->magic) {
case FPSIMD_MAGIC:
mcontext_fpsimd =
reinterpret_cast<struct fpsimd_context*>(mcontext_extension);
break;
case ESR_MAGIC:
mcontext_esr =
reinterpret_cast<struct esr_context*>(mcontext_extension);
break;
default:
break;
}
}
assert_not_null(mcontext_fpsimd);
if (mcontext_fpsimd) {
thread_context.fpsr = mcontext_fpsimd->fpsr;
thread_context.fpcr = mcontext_fpsimd->fpcr;
std::memcpy(thread_context.v, mcontext_fpsimd->vregs,
sizeof(thread_context.v));
}
#endif // XE_ARCH
Exception ex;
switch (signal_number) {
case SIGILL:
ex.InitializeIllegalInstruction(&thread_context);
break;
case SIGSEGV: {
Exception::AccessViolationOperation access_violation_operation;
#if XE_ARCH_AMD64
// x86_pf_error_code::X86_PF_WRITE
constexpr uint64_t kX86PageFaultErrorCodeWrite = UINT64_C(1) << 1;
access_violation_operation =
(uint64_t(mcontext.gregs[REG_ERR]) & kX86PageFaultErrorCodeWrite)
? Exception::AccessViolationOperation::kWrite
: Exception::AccessViolationOperation::kRead;
#elif XE_ARCH_ARM64
// For a Data Abort (EC - ESR_EL1 bits 31:26 - 0b100100 from a lower
// Exception Level, 0b100101 without a change in the Exception Level),
// bit 6 is 0 for reading from a memory location, 1 for writing to a
// memory location.
if (mcontext_esr && ((mcontext_esr->esr >> 26) & 0b111110) == 0b100100) {
access_violation_operation =
(mcontext_esr->esr & (UINT64_C(1) << 6))
? Exception::AccessViolationOperation::kWrite
: Exception::AccessViolationOperation::kRead;
} else {
// Determine the memory access direction based on which instruction has
// requested it.
// esr_context may be unavailable on certain hosts (for instance, on
// Android, it was added only in NDK r16 - which is the first NDK
// version to support the Android API level 27, while NDK r15 doesn't
// have esr_context in its API 26 sigcontext.h).
// On AArch64 (unlike on AArch32), the program counter is the address of
// the currently executing instruction.
bool instruction_is_store;
if (IsArm64LoadPrefetchStore(
*reinterpret_cast<const uint32_t*>(mcontext.pc),
instruction_is_store)) {
access_violation_operation =
instruction_is_store ? Exception::AccessViolationOperation::kWrite
: Exception::AccessViolationOperation::kRead;
} else {
assert_always(
"No ESR in the exception thread context, or it's not a Data "
"Abort, and the faulting instruction is not a known load, "
"prefetch or store instruction");
access_violation_operation =
Exception::AccessViolationOperation::kUnknown;
}
}
#else
access_violation_operation =
Exception::AccessViolationOperation::kUnknown;
#endif // XE_ARCH
ex.InitializeAccessViolation(
&thread_context, reinterpret_cast<uint64_t>(signal_info->si_addr),
access_violation_operation);
} break;
default:
assert_unhandled_case(signal_number);
}
for (size_t i = 0; i < xe::countof(handlers_) && handlers_[i].first; ++i) {
if (handlers_[i].first(&ex, handlers_[i].second)) {
// Exception handled.
#if XE_ARCH_AMD64
mcontext.gregs[REG_RIP] = greg_t(thread_context.rip);
mcontext.gregs[REG_EFL] = greg_t(thread_context.eflags);
uint32_t modified_register_index;
// The order must match the order in X64Register.
static const size_t kIntRegisterMap[] = {
REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP,
REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11,
REG_R12, REG_R13, REG_R14, REG_R15,
};
uint16_t modified_int_registers_remaining = ex.modified_int_registers();
while (xe::bit_scan_forward(modified_int_registers_remaining,
&modified_register_index)) {
modified_int_registers_remaining &=
~(UINT16_C(1) << modified_register_index);
mcontext.gregs[kIntRegisterMap[modified_register_index]] =
thread_context.int_registers[modified_register_index];
}
uint16_t modified_xmm_registers_remaining = ex.modified_xmm_registers();
while (xe::bit_scan_forward(modified_xmm_registers_remaining,
&modified_register_index)) {
modified_xmm_registers_remaining &=
~(UINT16_C(1) << modified_register_index);
std::memcpy(&mcontext.fpregs->_xmm[modified_register_index],
&thread_context.xmm_registers[modified_register_index],
sizeof(vec128_t));
}
#elif XE_ARCH_ARM64
uint32_t modified_register_index;
uint32_t modified_x_registers_remaining = ex.modified_x_registers();
while (xe::bit_scan_forward(modified_x_registers_remaining,
&modified_register_index)) {
modified_x_registers_remaining &=
~(UINT32_C(1) << modified_register_index);
mcontext.regs[modified_register_index] =
thread_context.x[modified_register_index];
}
mcontext.sp = thread_context.sp;
mcontext.pc = thread_context.pc;
mcontext.pstate = thread_context.pstate;
if (mcontext_fpsimd) {
mcontext_fpsimd->fpsr = thread_context.fpsr;
mcontext_fpsimd->fpcr = thread_context.fpcr;
uint32_t modified_v_registers_remaining = ex.modified_v_registers();
while (xe::bit_scan_forward(modified_v_registers_remaining,
&modified_register_index)) {
modified_v_registers_remaining &=
~(UINT32_C(1) << modified_register_index);
std::memcpy(&mcontext_fpsimd->vregs[modified_register_index],
&thread_context.v[modified_register_index],
sizeof(vec128_t));
mcontext.regs[modified_register_index] =
thread_context.x[modified_register_index];
}
}
#endif // XE_ARCH
return;
}
}
}
void ExceptionHandler::Install(Handler fn, void* data) {
if (!signal_handlers_installed_) {
struct sigaction signal_handler;
std::memset(&signal_handler, 0, sizeof(signal_handler));
signal_handler.sa_sigaction = ExceptionHandlerCallback;
signal_handler.sa_flags = SA_SIGINFO;
if (sigaction(SIGILL, &signal_handler, &original_sigill_handler_) != 0) {
assert_always("Failed to install new SIGILL handler");
}
if (sigaction(SIGSEGV, &signal_handler, &original_sigsegv_handler_) != 0) {
assert_always("Failed to install new SIGSEGV handler");
}
signal_handlers_installed_ = true;
}
for (size_t i = 0; i < xe::countof(handlers_); ++i) {
if (!handlers_[i].first) {
handlers_[i].first = fn;
handlers_[i].second = data;
return;
}
}
assert_always("Too many exception handlers installed");
}
void ExceptionHandler::Uninstall(Handler fn, void* data) {
for (size_t i = 0; i < xe::countof(handlers_); ++i) {
if (handlers_[i].first == fn && handlers_[i].second == data) {
for (; i < xe::countof(handlers_) - 1; ++i) {
handlers_[i] = handlers_[i + 1];
}
handlers_[i].first = nullptr;
handlers_[i].second = nullptr;
break;
}
}
bool has_any = false;
for (size_t i = 0; i < xe::countof(handlers_); ++i) {
if (handlers_[i].first) {
has_any = true;
break;
}
}
if (!has_any) {
if (signal_handlers_installed_) {
if (sigaction(SIGILL, &original_sigill_handler_, NULL) != 0) {
assert_always("Failed to restore original SIGILL handler");
}
if (sigaction(SIGSEGV, &original_sigsegv_handler_, NULL) != 0) {
assert_always("Failed to restore original SIGSEGV handler");
}
signal_handlers_installed_ = false;
}
}
}
} // namespace xe

View File

@ -35,8 +35,7 @@ LONG CALLBACK ExceptionHandlerCallback(PEXCEPTION_POINTERS ex_info) {
return EXCEPTION_CONTINUE_SEARCH; return EXCEPTION_CONTINUE_SEARCH;
} }
// TODO(benvanik): avoid this by mapping X64Context virtual? HostThreadContext thread_context;
X64Context thread_context;
thread_context.rip = ex_info->ContextRecord->Rip; thread_context.rip = ex_info->ContextRecord->Rip;
thread_context.eflags = ex_info->ContextRecord->EFlags; thread_context.eflags = ex_info->ContextRecord->EFlags;
std::memcpy(thread_context.int_registers, &ex_info->ContextRecord->Rax, std::memcpy(thread_context.int_registers, &ex_info->ContextRecord->Rax,
@ -79,8 +78,26 @@ LONG CALLBACK ExceptionHandlerCallback(PEXCEPTION_POINTERS ex_info) {
for (size_t i = 0; i < xe::countof(handlers_) && handlers_[i].first; ++i) { for (size_t i = 0; i < xe::countof(handlers_) && handlers_[i].first; ++i) {
if (handlers_[i].first(&ex, handlers_[i].second)) { if (handlers_[i].first(&ex, handlers_[i].second)) {
// Exception handled. // Exception handled.
// TODO(benvanik): update all thread state? Dirty flags?
ex_info->ContextRecord->Rip = thread_context.rip; ex_info->ContextRecord->Rip = thread_context.rip;
ex_info->ContextRecord->EFlags = thread_context.eflags;
uint32_t modified_register_index;
uint16_t modified_int_registers_remaining = ex.modified_int_registers();
while (xe::bit_scan_forward(modified_int_registers_remaining,
&modified_register_index)) {
modified_int_registers_remaining &=
~(UINT16_C(1) << modified_register_index);
(&ex_info->ContextRecord->Rax)[modified_register_index] =
thread_context.int_registers[modified_register_index];
}
uint16_t modified_xmm_registers_remaining = ex.modified_xmm_registers();
while (xe::bit_scan_forward(modified_xmm_registers_remaining,
&modified_register_index)) {
modified_xmm_registers_remaining &=
~(UINT16_C(1) << modified_register_index);
std::memcpy(&ex_info->ContextRecord->Xmm0 + modified_register_index,
&thread_context.xmm_registers[modified_register_index],
sizeof(vec128_t));
}
return EXCEPTION_CONTINUE_EXECUTION; return EXCEPTION_CONTINUE_EXECUTION;
} }
} }

View File

@ -0,0 +1,95 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/base/host_thread_context.h"
#include "xenia/base/assert.h"
#include "xenia/base/platform.h"
#include "xenia/base/string_util.h"
namespace xe {
// NOTE: this order matches 1:1 with the HostRegister enums.
static const char* kRegisterNames[] = {
#if XE_ARCH_AMD64
"rip", "eflags", "rax", "rcx", "rdx", "rbx", "rsp",
"rbp", "rsi", "rdi", "r8", "r9", "r10", "r11",
"r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2",
"xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9",
"xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
#elif XE_ARCH_ARM64
"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",
"x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19",
"x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29",
"x30", "sp", "pc", "pstate", "fpsr", "fpcr", "v0", "v1", "v2", "v3",
"v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13",
"v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
#endif // XE_ARCH
};
const char* HostThreadContext::GetRegisterName(HostRegister reg) {
return kRegisterNames[int(reg)];
}
std::string HostThreadContext::GetStringFromValue(HostRegister reg,
bool hex) const {
#if XE_ARCH_AMD64
switch (reg) {
case X64Register::kRip:
return hex ? string_util::to_hex_string(rip) : std::to_string(rip);
case X64Register::kEflags:
return hex ? string_util::to_hex_string(eflags) : std::to_string(eflags);
default:
if (reg >= X64Register::kIntRegisterFirst &&
reg <= X64Register::kIntRegisterLast) {
auto value =
int_registers[int(reg) - int(X64Register::kIntRegisterFirst)];
return hex ? string_util::to_hex_string(value) : std::to_string(value);
} else if (reg >= X64Register::kXmm0 && reg <= X64Register::kXmm15) {
auto value = xmm_registers[int(reg) - int(X64Register::kXmm0)];
return hex ? string_util::to_hex_string(value) : xe::to_string(value);
} else {
assert_unhandled_case(reg);
return std::string();
}
}
#elif XE_ARCH_ARM64
switch (reg) {
case Arm64Register::kSp:
return hex ? string_util::to_hex_string(sp) : std::to_string(sp);
case Arm64Register::kPc:
return hex ? string_util::to_hex_string(pc) : std::to_string(pc);
case Arm64Register::kPstate:
return hex ? string_util::to_hex_string(pstate) : std::to_string(pstate);
case Arm64Register::kFpsr:
return hex ? string_util::to_hex_string(fpsr) : std::to_string(fpsr);
case Arm64Register::kFpcr:
return hex ? string_util::to_hex_string(fpcr) : std::to_string(fpcr);
default:
if (reg >= Arm64Register::kX0 && reg <= Arm64Register::kX30) {
auto value = x[int(reg) - int(Arm64Register::kX0)];
return hex ? string_util::to_hex_string(value) : std::to_string(value);
} else if (reg >= Arm64Register::kV0 && reg <= Arm64Register::kV31) {
auto value = v[int(reg) - int(Arm64Register::kV0)];
return hex ? string_util::to_hex_string(value) : xe::to_string(value);
} else {
assert_unhandled_case(reg);
return std::string();
}
}
#else
assert_always(
"HostThreadContext::GetStringFromValue not implemented for the target "
"CPU architecture");
return std::string();
#endif // XE_ARCH
}
} // namespace xe

View File

@ -2,13 +2,13 @@
****************************************************************************** ******************************************************************************
* Xenia : Xbox 360 Emulator Research Project * * Xenia : Xbox 360 Emulator Research Project *
****************************************************************************** ******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. * * Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. * * Released under the BSD license - see LICENSE in the root for more details. *
****************************************************************************** ******************************************************************************
*/ */
#ifndef XENIA_BASE_X64_CONTEXT_H_ #ifndef XENIA_BASE_HOST_THREAD_CONTEXT_H_
#define XENIA_BASE_X64_CONTEXT_H_ #define XENIA_BASE_HOST_THREAD_CONTEXT_H_
#include <cstdint> #include <cstdint>
#include <string> #include <string>
@ -22,15 +22,18 @@
namespace xe { namespace xe {
class X64Context; // NOTE: The order of the registers in the enumerations must match the order in
// the string table in host_thread_context.cc, as well as remapping tables in
// exception handler implementations.
#if XE_ARCH_AMD64
enum class X64Register { enum class X64Register {
// NOTE: this order matches 1:1 with the order in the X64Context.
// NOTE: this order matches 1:1 with a string table in the x64_context.cc.
kRip, kRip,
kEflags, kEflags,
kRax,
kIntRegisterFirst,
// The order matches the indices in the instruction encoding, as well as the
// Windows CONTEXT structure.
kRax = kIntRegisterFirst,
kRcx, kRcx,
kRdx, kRdx,
kRbx, kRbx,
@ -46,6 +49,8 @@ enum class X64Register {
kR13, kR13,
kR14, kR14,
kR15, kR15,
kIntRegisterLast = kR15,
kXmm0, kXmm0,
kXmm1, kXmm1,
kXmm2, kXmm2,
@ -64,8 +69,91 @@ enum class X64Register {
kXmm15, kXmm15,
}; };
class X64Context { enum class Arm64Register {
kX0,
kX1,
kX2,
kX3,
kX4,
kX5,
kX6,
kX7,
kX8,
kX9,
kX10,
kX11,
kX12,
kX13,
kX14,
kX15,
kX16,
kX17,
kX18,
kX19,
kX20,
kX21,
kX22,
kX23,
kX24,
kX25,
kX26,
kX27,
kX28,
// FP (frame pointer).
kX29,
// LR (link register).
kX30,
kSp,
kPc,
kPstate,
kFpsr,
kFpcr,
// The whole 128 bits of a Vn register are also known as Qn (quadword).
kV0,
kV1,
kV2,
kV3,
kV4,
kV5,
kV6,
kV7,
kV8,
kV9,
kV10,
kV11,
kV12,
kV13,
kV14,
kV15,
kV16,
kV17,
kV18,
kV19,
kV20,
kV21,
kV22,
kV23,
kV24,
kV25,
kV26,
kV27,
kV28,
kV29,
kV30,
kV31,
};
#if XE_ARCH_AMD64
using HostRegister = X64Register;
#elif XE_ARCH_ARM64
using HostRegister = Arm64Register;
#else
enum class HostRegister {};
#endif // XE_ARCH
class HostThreadContext {
public: public:
#if XE_ARCH_AMD64
uint64_t rip; uint64_t rip;
uint32_t eflags; uint32_t eflags;
union { union {
@ -89,7 +177,6 @@ class X64Context {
}; };
uint64_t int_registers[16]; uint64_t int_registers[16];
}; };
union { union {
struct { struct {
vec128_t xmm0; vec128_t xmm0;
@ -111,12 +198,19 @@ class X64Context {
}; };
vec128_t xmm_registers[16]; vec128_t xmm_registers[16];
}; };
#elif XE_ARCH_ARM64
uint64_t x[31];
uint64_t sp;
uint64_t pc;
uint64_t pstate;
uint32_t fpsr;
uint32_t fpcr;
vec128_t v[32];
#endif // XE_ARCH
static const char* GetRegisterName(X64Register reg); static const char* GetRegisterName(HostRegister reg);
std::string GetStringFromValue(X64Register reg, bool hex) const; std::string GetStringFromValue(HostRegister reg, bool hex) const;
void SetValueFromString(X64Register reg, std::string value, bool hex);
}; };
#endif // XE_ARCH_AMD64
} // namespace xe } // namespace xe

View File

@ -1,67 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/base/x64_context.h"
#include "xenia/base/assert.h"
#include "xenia/base/platform.h"
#include "xenia/base/string_util.h"
namespace xe {
#if XE_ARCH_AMD64
// NOTE: this order matches 1:1 with the X64Register enum.
static const char* kRegisterNames[] = {
"rip", "eflags", "rax", "rcx", "rdx", "rbx", "rsp",
"rbp", "rsi", "rdi", "r8", "r9", "r10", "r11",
"r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2",
"xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9",
"xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
};
const char* X64Context::GetRegisterName(X64Register reg) {
return kRegisterNames[static_cast<int>(reg)];
}
std::string X64Context::GetStringFromValue(X64Register reg, bool hex) const {
switch (reg) {
case X64Register::kRip:
return hex ? string_util::to_hex_string(rip) : std::to_string(rip);
case X64Register::kEflags:
return hex ? string_util::to_hex_string(eflags) : std::to_string(eflags);
default:
if (static_cast<int>(reg) >= static_cast<int>(X64Register::kRax) &&
static_cast<int>(reg) <= static_cast<int>(X64Register::kR15)) {
auto value = int_registers[static_cast<int>(reg) -
static_cast<int>(X64Register::kRax)];
return hex ? string_util::to_hex_string(value) : std::to_string(value);
} else if (static_cast<int>(reg) >=
static_cast<int>(X64Register::kXmm0) &&
static_cast<int>(reg) <=
static_cast<int>(X64Register::kXmm15)) {
auto value = xmm_registers[static_cast<int>(reg) -
static_cast<int>(X64Register::kXmm0)];
return hex ? string_util::to_hex_string(value) : xe::to_string(value);
} else {
assert_unhandled_case(reg);
return "";
}
}
}
void X64Context::SetValueFromString(X64Register reg, std::string value,
bool hex) {
// TODO(benvanik): set value from string.
assert_always(false);
}
#endif // XE_ARCH_AMD64
} // namespace xe

View File

@ -0,0 +1,36 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/cpu/backend/null_backend.h"
#include "xenia/cpu/backend/assembler.h"
#include "xenia/cpu/function.h"
namespace xe {
namespace cpu {
namespace backend {
void NullBackend::CommitExecutableRange(uint32_t guest_low,
uint32_t guest_high) {}
std::unique_ptr<Assembler> NullBackend::CreateAssembler() { return nullptr; }
std::unique_ptr<GuestFunction> NullBackend::CreateGuestFunction(
Module* module, uint32_t address) {
return nullptr;
}
uint64_t NullBackend::CalculateNextHostInstruction(ThreadDebugInfo* thread_info,
uint64_t current_pc) {
return current_pc;
}
} // namespace backend
} // namespace cpu
} // namespace xe

View File

@ -0,0 +1,36 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_CPU_BACKEND_NULL_BACKEND_H_
#define XENIA_CPU_BACKEND_NULL_BACKEND_H_
#include "xenia/cpu/backend/backend.h"
namespace xe {
namespace cpu {
namespace backend {
class NullBackend : public Backend {
public:
void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high) override;
std::unique_ptr<Assembler> CreateAssembler() override;
std::unique_ptr<GuestFunction> CreateGuestFunction(Module* module,
uint32_t address) override;
uint64_t CalculateNextHostInstruction(ThreadDebugInfo* thread_info,
uint64_t current_pc) override;
};
} // namespace backend
} // namespace cpu
} // namespace xe
#endif // XENIA_CPU_BACKEND_NULL_BACKEND_H_

View File

@ -163,7 +163,7 @@ std::unique_ptr<GuestFunction> X64Backend::CreateGuestFunction(
return std::make_unique<X64Function>(module, address); return std::make_unique<X64Function>(module, address);
} }
uint64_t ReadCapstoneReg(X64Context* context, x86_reg reg) { uint64_t ReadCapstoneReg(HostThreadContext* context, x86_reg reg) {
switch (reg) { switch (reg) {
case X86_REG_RAX: case X86_REG_RAX:
return context->rax; return context->rax;

View File

@ -27,8 +27,6 @@ namespace x64 {
class X64CodeCache; class X64CodeCache;
#define XENIA_HAS_X64_BACKEND 1
typedef void* (*HostToGuestThunk)(void* target, void* arg0, void* arg1); typedef void* (*HostToGuestThunk)(void* target, void* arg0, void* arg1);
typedef void* (*GuestToHostThunk)(void* target, void* arg0, void* arg1); typedef void* (*GuestToHostThunk)(void* target, void* arg0, void* arg1);
typedef void (*ResolveFunctionThunk)(); typedef void (*ResolveFunctionThunk)();

View File

@ -1414,14 +1414,17 @@ void Value::DotProduct3(Value* other) {
assert_true(this->type == VEC128_TYPE && other->type == VEC128_TYPE); assert_true(this->type == VEC128_TYPE && other->type == VEC128_TYPE);
switch (type) { switch (type) {
case VEC128_TYPE: { case VEC128_TYPE: {
alignas(16) float result[4];
__m128 src1 = _mm_load_ps(constant.v128.f32);
__m128 src2 = _mm_load_ps(other->constant.v128.f32);
__m128 dest = _mm_dp_ps(src1, src2, 0b01110001);
_mm_store_ps(result, dest);
// TODO(rick): is this sane? // TODO(rick): is this sane?
type = FLOAT32_TYPE; type = FLOAT32_TYPE;
constant.f32 = result[0]; // Using x86 DPPS ordering for consistency with x86-64 code generation:
// (X1 * X2 + Y1 * Y2) + (Z1 * Z2 + 0.0f)
// (+ 0.0f for zero sign, as zero imm8[4:7] bits result in zero terms,
// not in complete exclusion of them)
// TODO(Triang3l): NaN on overflow.
constant.f32 =
(constant.v128.f32[0] * other->constant.v128.f32[0] +
constant.v128.f32[1] * other->constant.v128.f32[1]) +
(constant.v128.f32[2] * other->constant.v128.f32[2] + 0.0f);
} break; } break;
default: default:
assert_unhandled_case(type); assert_unhandled_case(type);
@ -1433,14 +1436,15 @@ void Value::DotProduct4(Value* other) {
assert_true(this->type == VEC128_TYPE && other->type == VEC128_TYPE); assert_true(this->type == VEC128_TYPE && other->type == VEC128_TYPE);
switch (type) { switch (type) {
case VEC128_TYPE: { case VEC128_TYPE: {
alignas(16) float result[4];
__m128 src1 = _mm_load_ps(constant.v128.f32);
__m128 src2 = _mm_load_ps(other->constant.v128.f32);
__m128 dest = _mm_dp_ps(src1, src2, 0b11110001);
_mm_store_ps(result, dest);
// TODO(rick): is this sane? // TODO(rick): is this sane?
type = FLOAT32_TYPE; type = FLOAT32_TYPE;
constant.f32 = result[0]; // Using x86 DPPS ordering for consistency with x86-64 code generation:
// (X1 * X2 + Y1 * Y2) + (Z1 * Z2 + W1 * W2)
// TODO(Triang3l): NaN on overflow.
constant.f32 = (constant.v128.f32[0] * other->constant.v128.f32[0] +
constant.v128.f32[1] * other->constant.v128.f32[1]) +
(constant.v128.f32[2] * other->constant.v128.f32[2] +
constant.v128.f32[3] * other->constant.v128.f32[3]);
} break; } break;
default: default:
assert_unhandled_case(type); assert_unhandled_case(type);

View File

@ -18,6 +18,7 @@
#include "xenia/base/exception_handler.h" #include "xenia/base/exception_handler.h"
#include "xenia/base/logging.h" #include "xenia/base/logging.h"
#include "xenia/base/memory.h" #include "xenia/base/memory.h"
#include "xenia/base/platform.h"
namespace xe { namespace xe {
namespace cpu { namespace cpu {
@ -114,28 +115,10 @@ bool MMIOHandler::CheckStore(uint32_t virtual_address, uint32_t value) {
return false; return false;
} }
struct DecodedMov { bool MMIOHandler::TryDecodeLoadStore(const uint8_t* p,
size_t length; DecodedLoadStore& decoded_out) {
// Inidicates this is a load (or conversely a store). std::memset(&decoded_out, 0, sizeof(decoded_out));
bool is_load; #if XE_ARCH_AMD64
// Indicates the memory must be swapped.
bool byte_swap;
// Source (for store) or target (for load) register.
// AX CX DX BX SP BP SI DI // REX.R=0
// R8 R9 R10 R11 R12 R13 R14 R15 // REX.R=1
uint32_t value_reg;
// [base + (index * scale) + displacement]
bool mem_has_base;
uint8_t mem_base_reg;
bool mem_has_index;
uint8_t mem_index_reg;
uint8_t mem_scale;
int32_t mem_displacement;
bool is_constant;
int32_t constant;
};
bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
uint8_t i = 0; // Current byte decode index. uint8_t i = 0; // Current byte decode index.
uint8_t rex = 0; uint8_t rex = 0;
if ((p[i] & 0xF0) == 0x40) { if ((p[i] & 0xF0) == 0x40) {
@ -148,8 +131,8 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
// 44 0f 38 f1 a4 02 00 movbe DWORD PTR [rdx+rax*1+0x0],r12d // 44 0f 38 f1 a4 02 00 movbe DWORD PTR [rdx+rax*1+0x0],r12d
// 42 0f 38 f1 8c 22 00 movbe DWORD PTR [rdx+r12*1+0x0],ecx // 42 0f 38 f1 8c 22 00 movbe DWORD PTR [rdx+r12*1+0x0],ecx
// 0f 38 f1 8c 02 00 00 movbe DWORD PTR [rdx + rax * 1 + 0x0], ecx // 0f 38 f1 8c 02 00 00 movbe DWORD PTR [rdx + rax * 1 + 0x0], ecx
mov->is_load = false; decoded_out.is_load = false;
mov->byte_swap = true; decoded_out.byte_swap = true;
i += 3; i += 3;
} else if (p[i] == 0x0F && p[i + 1] == 0x38 && p[i + 2] == 0xF0) { } else if (p[i] == 0x0F && p[i + 1] == 0x38 && p[i + 2] == 0xF0) {
// MOVBE r32, m32 (load) // MOVBE r32, m32 (load)
@ -159,8 +142,8 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
// 46 0f 38 f0 a4 22 00 movbe r12d,DWORD PTR [rdx+r12*1+0x0] // 46 0f 38 f0 a4 22 00 movbe r12d,DWORD PTR [rdx+r12*1+0x0]
// 0f 38 f0 8c 02 00 00 movbe ecx,DWORD PTR [rdx+rax*1+0x0] // 0f 38 f0 8c 02 00 00 movbe ecx,DWORD PTR [rdx+rax*1+0x0]
// 0F 38 F0 1C 02 movbe ebx,dword ptr [rdx+rax] // 0F 38 F0 1C 02 movbe ebx,dword ptr [rdx+rax]
mov->is_load = true; decoded_out.is_load = true;
mov->byte_swap = true; decoded_out.byte_swap = true;
i += 3; i += 3;
} else if (p[i] == 0x89) { } else if (p[i] == 0x89) {
// MOV m32, r32 (store) // MOV m32, r32 (store)
@ -168,8 +151,8 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
// 44 89 24 02 mov DWORD PTR[rdx + rax * 1], r12d // 44 89 24 02 mov DWORD PTR[rdx + rax * 1], r12d
// 42 89 0c 22 mov DWORD PTR[rdx + r12 * 1], ecx // 42 89 0c 22 mov DWORD PTR[rdx + r12 * 1], ecx
// 89 0c 02 mov DWORD PTR[rdx + rax * 1], ecx // 89 0c 02 mov DWORD PTR[rdx + rax * 1], ecx
mov->is_load = false; decoded_out.is_load = false;
mov->byte_swap = false; decoded_out.byte_swap = false;
++i; ++i;
} else if (p[i] == 0x8B) { } else if (p[i] == 0x8B) {
// MOV r32, m32 (load) // MOV r32, m32 (load)
@ -178,16 +161,16 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
// 42 8b 0c 22 mov ecx, DWORD PTR[rdx + r12 * 1] // 42 8b 0c 22 mov ecx, DWORD PTR[rdx + r12 * 1]
// 46 8b 24 22 mov r12d, DWORD PTR[rdx + r12 * 1] // 46 8b 24 22 mov r12d, DWORD PTR[rdx + r12 * 1]
// 8b 0c 02 mov ecx, DWORD PTR[rdx + rax * 1] // 8b 0c 02 mov ecx, DWORD PTR[rdx + rax * 1]
mov->is_load = true; decoded_out.is_load = true;
mov->byte_swap = false; decoded_out.byte_swap = false;
++i; ++i;
} else if (p[i] == 0xC7) { } else if (p[i] == 0xC7) {
// MOV m32, simm32 // MOV m32, simm32
// https://web.archive.org/web/20161017042413/https://www.asmpedia.org/index.php?title=MOV // https://web.archive.org/web/20161017042413/https://www.asmpedia.org/index.php?title=MOV
// C7 04 02 02 00 00 00 mov dword ptr [rdx+rax],2 // C7 04 02 02 00 00 00 mov dword ptr [rdx+rax],2
mov->is_load = false; decoded_out.is_load = false;
mov->byte_swap = false; decoded_out.byte_swap = false;
mov->is_constant = true; decoded_out.is_constant = true;
++i; ++i;
} else { } else {
return false; return false;
@ -204,13 +187,13 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
uint8_t mod = (modrm & 0b11000000) >> 6; uint8_t mod = (modrm & 0b11000000) >> 6;
uint8_t reg = (modrm & 0b00111000) >> 3; uint8_t reg = (modrm & 0b00111000) >> 3;
uint8_t rm = (modrm & 0b00000111); uint8_t rm = (modrm & 0b00000111);
mov->value_reg = reg + (rex_r ? 8 : 0); decoded_out.value_reg = reg + (rex_r ? 8 : 0);
mov->mem_has_base = false; decoded_out.mem_has_base = false;
mov->mem_base_reg = 0; decoded_out.mem_base_reg = 0;
mov->mem_has_index = false; decoded_out.mem_has_index = false;
mov->mem_index_reg = 0; decoded_out.mem_index_reg = 0;
mov->mem_scale = 1; decoded_out.mem_scale = 1;
mov->mem_displacement = 0; decoded_out.mem_displacement = 0;
bool has_sib = false; bool has_sib = false;
switch (rm) { switch (rm) {
case 0b100: // SIB case 0b100: // SIB
@ -221,17 +204,17 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
// RIP-relative not supported. // RIP-relative not supported.
return false; return false;
} }
mov->mem_has_base = true; decoded_out.mem_has_base = true;
mov->mem_base_reg = rm + (rex_b ? 8 : 0); decoded_out.mem_base_reg = rm + (rex_b ? 8 : 0);
break; break;
default: default:
mov->mem_has_base = true; decoded_out.mem_has_base = true;
mov->mem_base_reg = rm + (rex_b ? 8 : 0); decoded_out.mem_base_reg = rm + (rex_b ? 8 : 0);
break; break;
} }
if (has_sib) { if (has_sib) {
uint8_t sib = p[i++]; uint8_t sib = p[i++];
mov->mem_scale = 1 << ((sib & 0b11000000) >> 8); decoded_out.mem_scale = 1 << ((sib & 0b11000000) >> 8);
uint8_t sib_index = (sib & 0b00111000) >> 3; uint8_t sib_index = (sib & 0b00111000) >> 3;
uint8_t sib_base = (sib & 0b00000111); uint8_t sib_base = (sib & 0b00000111);
switch (sib_index) { switch (sib_index) {
@ -239,8 +222,9 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
// No index. // No index.
break; break;
default: default:
mov->mem_has_index = true; decoded_out.mem_has_index = true;
mov->mem_index_reg = sib_index + (rex_x ? 8 : 0); decoded_out.mem_index_reg = sib_index + (rex_x ? 8 : 0);
decoded_out.mem_index_size = sizeof(uint64_t);
break; break;
} }
switch (sib_base) { switch (sib_base) {
@ -249,29 +233,162 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
assert_zero(mod); assert_zero(mod);
return false; return false;
default: default:
mov->mem_has_base = true; decoded_out.mem_has_base = true;
mov->mem_base_reg = sib_base + (rex_b ? 8 : 0); decoded_out.mem_base_reg = sib_base + (rex_b ? 8 : 0);
break; break;
} }
} }
switch (mod) { switch (mod) {
case 0b00: { case 0b00: {
mov->mem_displacement += 0; decoded_out.mem_displacement += 0;
} break; } break;
case 0b01: { case 0b01: {
mov->mem_displacement += int8_t(p[i++]); decoded_out.mem_displacement += int8_t(p[i++]);
} break; } break;
case 0b10: { case 0b10: {
mov->mem_displacement += xe::load<int32_t>(p + i); decoded_out.mem_displacement += xe::load<int32_t>(p + i);
i += 4; i += 4;
} break; } break;
} }
if (mov->is_constant) { if (decoded_out.is_constant) {
mov->constant = xe::load<int32_t>(p + i); decoded_out.constant = xe::load<int32_t>(p + i);
i += 4; i += 4;
} }
mov->length = i; decoded_out.length = i;
return true; return true;
#elif XE_ARCH_ARM64
decoded_out.length = sizeof(uint32_t);
uint32_t instruction = *reinterpret_cast<const uint32_t*>(p);
// Literal loading (PC-relative) is not handled.
if ((instruction & kArm64LoadStoreAnyFMask) != kArm64LoadStoreAnyFixed) {
// Not a load or a store instruction.
return false;
}
if ((instruction & kArm64LoadStorePairAnyFMask) ==
kArm64LoadStorePairAnyFixed) {
// Handling MMIO only for single 32-bit values, not for pairs.
return false;
}
uint8_t value_reg_base;
switch (Arm64LoadStoreOp(instruction & kArm64LoadStoreMask)) {
case Arm64LoadStoreOp::kSTR_w:
decoded_out.is_load = false;
value_reg_base = DecodedLoadStore::kArm64ValueRegX0;
break;
case Arm64LoadStoreOp::kLDR_w:
decoded_out.is_load = true;
value_reg_base = DecodedLoadStore::kArm64ValueRegX0;
break;
case Arm64LoadStoreOp::kSTR_s:
decoded_out.is_load = false;
value_reg_base = DecodedLoadStore::kArm64ValueRegV0;
break;
case Arm64LoadStoreOp::kLDR_s:
decoded_out.is_load = true;
value_reg_base = DecodedLoadStore::kArm64ValueRegV0;
break;
default:
return false;
}
// `Rt` field (load / store register).
decoded_out.value_reg = value_reg_base + (instruction & 31);
if (decoded_out.is_load &&
decoded_out.value_reg == DecodedLoadStore::kArm64ValueRegZero) {
// Zero constant rather than a register read.
decoded_out.is_constant = true;
decoded_out.constant = 0;
}
decoded_out.mem_has_base = true;
// The base is Xn (for 0...30) or SP (for 31).
// `Rn` field (first source register).
decoded_out.mem_base_reg = (instruction >> 5) & 31;
bool is_unsigned_offset =
(instruction & kArm64LoadStoreUnsignedOffsetFMask) ==
kArm64LoadStoreUnsignedOffsetFixed;
if (is_unsigned_offset) {
// LDR|STR Wt|St, [Xn|SP{, #pimm}]
// pimm (positive immediate) is scaled by the size of the data (4 for
// words).
// `ImmLSUnsigned` field.
uint32_t unsigned_offset = (instruction >> 10) & 4095;
decoded_out.mem_displacement =
ptrdiff_t(sizeof(uint32_t) * unsigned_offset);
} else {
Arm64LoadStoreOffsetFixed offset =
Arm64LoadStoreOffsetFixed(instruction & kArm64LoadStoreOffsetFMask);
// simm (signed immediate) is not scaled.
// Only applicable to kUnscaledOffset, kPostIndex and kPreIndex.
// `ImmLS` field.
int32_t signed_offset = int32_t(instruction << (32 - (9 + 12))) >> (32 - 9);
// For both post- and pre-indexing, the new address is written to the
// register after the data register write, thus if Xt and Xn are the same,
// the final value in the register will be the new address.
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
switch (offset) {
case Arm64LoadStoreOffsetFixed::kUnscaledOffset: {
// LDUR|STUR Wt|St, [Xn|SP{, #simm}]
decoded_out.mem_displacement = signed_offset;
} break;
case Arm64LoadStoreOffsetFixed::kPostIndex: {
// LDR|STR Wt|St, [Xn|SP], #simm
decoded_out.mem_base_writeback = true;
decoded_out.mem_base_writeback_offset = signed_offset;
} break;
case Arm64LoadStoreOffsetFixed::kPreIndex: {
// LDR|STR Wt|St, [Xn|SP, #simm]!
decoded_out.mem_base_writeback = true;
decoded_out.mem_base_writeback_offset = signed_offset;
decoded_out.mem_displacement = signed_offset;
} break;
case Arm64LoadStoreOffsetFixed::kRegisterOffset: {
// LDR|STR Wt|St, [Xn|SP, (Wm|Xm){, extend {amount}}]
// `Rm` field.
decoded_out.mem_index_reg = (instruction >> 16) & 31;
if (decoded_out.mem_index_reg != DecodedLoadStore::kArm64RegZero) {
decoded_out.mem_has_index = true;
// Allowed extend types in the `option` field are UXTW (0b010), LSL
// (0b011 - identical to UXTX), SXTW (0b110), SXTX (0b111).
// The shift (0 or 2 for 32-bit LDR/STR) can be applied regardless of
// the extend type ("LSL" is just a term for assembly readability,
// internally it's treated simply as UXTX).
// If bit 0 of the `option` field is 0 (UXTW, SXTW), the index
// register is treated as 32-bit (Wm) extended to 64-bit. If it's 1
// (LSL aka UXTX, SXTX), the index register is treated as 64-bit (Xm).
// `ExtendMode` (`option`) field.
uint32_t extend_mode = (instruction >> 13) & 0b111;
if (!(extend_mode & 0b010)) {
// Sub-word index - undefined.
return false;
}
decoded_out.mem_index_size =
(extend_mode & 0b001) ? sizeof(uint64_t) : sizeof(uint32_t);
decoded_out.mem_index_sign_extend = (extend_mode & 0b100) != 0;
// Shift is either 0 or log2(sizeof(load or store size)).
// Supporting MMIO only for 4-byte words.
// `ImmShiftLS` field.
decoded_out.mem_scale =
(instruction & (UINT32_C(1) << 12)) ? sizeof(uint32_t) : 1;
}
} break;
default:
return false;
}
}
return true;
#else
#error TryDecodeLoadStore not implemented for the target CPU architecture.
return false;
#endif // XE_ARCH
} }
bool MMIOHandler::ExceptionCallbackThunk(Exception* ex, void* data) { bool MMIOHandler::ExceptionCallbackThunk(Exception* ex, void* data) {
@ -300,11 +417,13 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) {
// Access violations are pretty rare, so we can do a linear search here. // Access violations are pretty rare, so we can do a linear search here.
// Only check if in the virtual range, as we only support virtual ranges. // Only check if in the virtual range, as we only support virtual ranges.
const MMIORange* range = nullptr; const MMIORange* range = nullptr;
uint32_t fault_guest_virtual_address = 0;
if (ex->fault_address() < uint64_t(physical_membase_)) { if (ex->fault_address() < uint64_t(physical_membase_)) {
uint32_t fault_virtual_address = host_to_guest_virtual_( fault_guest_virtual_address = host_to_guest_virtual_(
host_to_guest_virtual_context_, fault_host_address); host_to_guest_virtual_context_, fault_host_address);
for (const auto& test_range : mapped_ranges_) { for (const auto& test_range : mapped_ranges_) {
if ((fault_virtual_address & test_range.mask) == test_range.address) { if ((fault_guest_virtual_address & test_range.mask) ==
test_range.address) {
// Address is within the range of this mapping. // Address is within the range of this mapping.
range = &test_range; range = &test_range;
break; break;
@ -336,44 +455,114 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) {
auto rip = ex->pc(); auto rip = ex->pc();
auto p = reinterpret_cast<const uint8_t*>(rip); auto p = reinterpret_cast<const uint8_t*>(rip);
DecodedMov mov = {0}; DecodedLoadStore decoded_load_store;
bool decoded = TryDecodeMov(p, &mov); if (!TryDecodeLoadStore(p, decoded_load_store)) {
if (!decoded) { XELOGE("Unable to decode MMIO load or store instruction at {}", p);
XELOGE("Unable to decode MMIO mov at {}", p);
assert_always("Unknown MMIO instruction type"); assert_always("Unknown MMIO instruction type");
return false; return false;
} }
if (mov.is_load) { HostThreadContext& thread_context = *ex->thread_context();
#if XE_ARCH_ARM64
// Preserve the base address with the pre- or the post-index offset to write
// it after writing the result (since the base address register and the
// register to load to may be the same, in which case it should receive the
// original base address with the offset).
uintptr_t mem_base_writeback_address = 0;
if (decoded_load_store.mem_has_base &&
decoded_load_store.mem_base_writeback) {
if (decoded_load_store.mem_base_reg ==
DecodedLoadStore::kArm64MemBaseRegSp) {
mem_base_writeback_address = thread_context.sp;
} else {
assert_true(decoded_load_store.mem_base_reg <= 30);
mem_base_writeback_address =
thread_context.x[decoded_load_store.mem_base_reg];
}
mem_base_writeback_address += decoded_load_store.mem_base_writeback_offset;
}
#endif // XE_ARCH_ARM64
uint8_t value_reg = decoded_load_store.value_reg;
if (decoded_load_store.is_load) {
// Load of a memory value - read from range, swap, and store in the // Load of a memory value - read from range, swap, and store in the
// register. // register.
uint32_t value = range->read(nullptr, range->callback_context, uint32_t value = range->read(nullptr, range->callback_context,
static_cast<uint32_t>(ex->fault_address())); fault_guest_virtual_address);
uint64_t* reg_ptr = &ex->thread_context()->int_registers[mov.value_reg]; if (!decoded_load_store.byte_swap) {
if (!mov.byte_swap) {
// We swap only if it's not a movbe, as otherwise we are swapping twice. // We swap only if it's not a movbe, as otherwise we are swapping twice.
value = xe::byte_swap(value); value = xe::byte_swap(value);
} }
*reg_ptr = value; #if XE_ARCH_AMD64
ex->ModifyIntRegister(value_reg) = value;
#elif XE_ARCH_ARM64
if (value_reg >= DecodedLoadStore::kArm64ValueRegX0 &&
value_reg <= (DecodedLoadStore::kArm64ValueRegX0 + 30)) {
ex->ModifyXRegister(value_reg - DecodedLoadStore::kArm64ValueRegX0) =
value;
} else if (value_reg >= DecodedLoadStore::kArm64ValueRegV0 &&
value_reg <= (DecodedLoadStore::kArm64ValueRegV0 + 31)) {
ex->ModifyVRegister(value_reg - DecodedLoadStore::kArm64ValueRegV0)
.u32[0] = value;
} else {
assert_true(value_reg == DecodedLoadStore::kArm64ValueRegZero);
// Register write is ignored for X31.
}
#else
#error Register value writing not implemented for the target CPU architecture.
#endif // XE_ARCH
} else { } else {
// Store of a register value - read register, swap, write to range. // Store of a register value - read register, swap, write to range.
int32_t value; uint32_t value;
if (mov.is_constant) { if (decoded_load_store.is_constant) {
value = uint32_t(mov.constant); value = uint32_t(decoded_load_store.constant);
} else { } else {
uint64_t* reg_ptr = &ex->thread_context()->int_registers[mov.value_reg]; #if XE_ARCH_AMD64
value = static_cast<uint32_t>(*reg_ptr); value = uint32_t(thread_context.int_registers[value_reg]);
if (!mov.byte_swap) { #elif XE_ARCH_ARM64
if (value_reg >= DecodedLoadStore::kArm64ValueRegX0 &&
value_reg <= (DecodedLoadStore::kArm64ValueRegX0 + 30)) {
value = uint32_t(
thread_context.x[value_reg - DecodedLoadStore::kArm64ValueRegX0]);
} else if (value_reg >= DecodedLoadStore::kArm64ValueRegV0 &&
value_reg <= (DecodedLoadStore::kArm64ValueRegV0 + 31)) {
value = thread_context.v[value_reg - DecodedLoadStore::kArm64ValueRegV0]
.u32[0];
} else {
assert_true(value_reg == DecodedLoadStore::kArm64ValueRegZero);
value = 0;
}
#else
#error Register value reading not implemented for the target CPU architecture.
#endif // XE_ARCH
if (!decoded_load_store.byte_swap) {
// We swap only if it's not a movbe, as otherwise we are swapping twice. // We swap only if it's not a movbe, as otherwise we are swapping twice.
value = xe::byte_swap(static_cast<uint32_t>(value)); value = xe::byte_swap(value);
} }
} }
range->write(nullptr, range->callback_context, range->write(nullptr, range->callback_context, fault_guest_virtual_address,
static_cast<uint32_t>(ex->fault_address()), value); value);
} }
#if XE_ARCH_ARM64
// Write the base address with the pre- or the post-index offset, overwriting
// the register to load to if it's the same.
if (decoded_load_store.mem_has_base &&
decoded_load_store.mem_base_writeback) {
if (decoded_load_store.mem_base_reg ==
DecodedLoadStore::kArm64MemBaseRegSp) {
thread_context.sp = mem_base_writeback_address;
} else {
assert_true(decoded_load_store.mem_base_reg <= 30);
ex->ModifyXRegister(decoded_load_store.mem_base_reg) =
mem_base_writeback_address;
}
}
#endif // XE_ARCH_ARM64
// Advance RIP to the next instruction so that we resume properly. // Advance RIP to the next instruction so that we resume properly.
ex->set_resume_pc(rip + mov.length); ex->set_resume_pc(rip + decoded_load_store.length);
return true; return true;
} }

View File

@ -15,10 +15,11 @@
#include <vector> #include <vector>
#include "xenia/base/mutex.h" #include "xenia/base/mutex.h"
#include "xenia/base/platform.h"
namespace xe { namespace xe {
class Exception; class Exception;
class X64Context; class HostThreadContext;
} // namespace xe } // namespace xe
namespace xe { namespace xe {
@ -93,6 +94,61 @@ class MMIOHandler {
static MMIOHandler* global_handler_; static MMIOHandler* global_handler_;
xe::global_critical_region global_critical_region_; xe::global_critical_region global_critical_region_;
private:
struct DecodedLoadStore {
// Matches the Xn/Wn register number for 0 reads and ignored writes in many
// usage cases.
static constexpr uint8_t kArm64RegZero = 31;
// Matches the actual register number encoding for an SP base in AArch64
// load and store instructions.
static constexpr uint8_t kArm64MemBaseRegSp = kArm64RegZero;
static constexpr uint8_t kArm64ValueRegX0 = 0;
static constexpr uint8_t kArm64ValueRegZero =
kArm64ValueRegX0 + kArm64RegZero;
static constexpr uint8_t kArm64ValueRegV0 = 32;
size_t length;
// Inidicates this is a load (or conversely a store).
bool is_load;
// Indicates the memory must be swapped.
bool byte_swap;
// Source (for store) or target (for load) register.
// For x86-64:
// AX CX DX BX SP BP SI DI // REX.R=0
// R8 R9 R10 R11 R12 R13 R14 R15 // REX.R=1
// For AArch64:
// - kArm64ValueRegX0 + [0...30]: Xn (Wn for 32 bits - upper 32 bits of Xn
// are zeroed on Wn write).
// - kArm64ValueRegZero: Zero constant for register read, ignored register
// write (though memory must still be accessed - a MMIO load may have side
// effects even if the result is discarded).
// - kArm64ValueRegV0 + [0...31]: Vn (Sn for 32 bits).
uint8_t value_reg;
// [base + (index * scale) + displacement]
bool mem_has_base;
// On AArch64, if mem_base_reg is kArm64MemBaseRegSp, the base register is
// SP, not Xn.
uint8_t mem_base_reg;
// For AArch64 pre- and post-indexing. In case of a load, the base register
// is written back after the loaded data is written to the register,
// overwriting the value register if it's the same.
bool mem_base_writeback;
int32_t mem_base_writeback_offset;
bool mem_has_index;
uint8_t mem_index_reg;
uint8_t mem_index_size;
bool mem_index_sign_extend;
uint8_t mem_scale;
ptrdiff_t mem_displacement;
bool is_constant;
int32_t constant;
};
static bool TryDecodeLoadStore(const uint8_t* p,
DecodedLoadStore& decoded_out);
}; };
} // namespace cpu } // namespace cpu

View File

@ -15,13 +15,16 @@
#include "xenia/base/math.h" #include "xenia/base/math.h"
#include "xenia/base/platform.h" #include "xenia/base/platform.h"
#include "xenia/base/string_buffer.h" #include "xenia/base/string_buffer.h"
#include "xenia/cpu/backend/x64/x64_backend.h"
#include "xenia/cpu/cpu_flags.h" #include "xenia/cpu/cpu_flags.h"
#include "xenia/cpu/ppc/ppc_context.h" #include "xenia/cpu/ppc/ppc_context.h"
#include "xenia/cpu/ppc/ppc_frontend.h" #include "xenia/cpu/ppc/ppc_frontend.h"
#include "xenia/cpu/processor.h" #include "xenia/cpu/processor.h"
#include "xenia/cpu/raw_module.h" #include "xenia/cpu/raw_module.h"
#if XE_ARCH_AMD64
#include "xenia/cpu/backend/x64/x64_backend.h"
#endif // XE_ARCH
#if XE_COMPILER_MSVC #if XE_COMPILER_MSVC
#include "xenia/base/platform_win.h" #include "xenia/base/platform_win.h"
#endif // XE_COMPILER_MSVC #endif // XE_COMPILER_MSVC
@ -196,17 +199,17 @@ class TestRunner {
std::unique_ptr<xe::cpu::backend::Backend> backend; std::unique_ptr<xe::cpu::backend::Backend> backend;
if (!backend) { if (!backend) {
#if defined(XENIA_HAS_X64_BACKEND) && XENIA_HAS_X64_BACKEND #if XE_ARCH_AMD64
if (cvars::cpu == "x64") { if (cvars::cpu == "x64") {
backend.reset(new xe::cpu::backend::x64::X64Backend()); backend.reset(new xe::cpu::backend::x64::X64Backend());
} }
#endif // XENIA_HAS_X64_BACKEND #endif // XE_ARCH
if (cvars::cpu == "any") { if (cvars::cpu == "any") {
#if defined(XENIA_HAS_X64_BACKEND) && XENIA_HAS_X64_BACKEND
if (!backend) { if (!backend) {
#if XE_ARCH_AMD64
backend.reset(new xe::cpu::backend::x64::X64Backend()); backend.reset(new xe::cpu::backend::x64::X64Backend());
#endif // XE_ARCH
} }
#endif // XENIA_HAS_X64_BACKEND
} }
} }

View File

@ -11,7 +11,6 @@ project("xenia-cpu-ppc-tests")
"fmt", "fmt",
"mspack", "mspack",
"xenia-core", "xenia-core",
"xenia-cpu-backend-x64",
"xenia-cpu", "xenia-cpu",
"xenia-base", "xenia-base",
}) })
@ -24,6 +23,10 @@ project("xenia-cpu-ppc-tests")
}) })
filter("files:*.s") filter("files:*.s")
flags({"ExcludeFromBuild"}) flags({"ExcludeFromBuild"})
filter("architecture:x86_64")
links({
"xenia-cpu-backend-x64",
})
filter("platforms:Windows") filter("platforms:Windows")
debugdir(project_root) debugdir(project_root)
debugargs({ debugargs({

View File

@ -19,6 +19,7 @@
#include "xenia/base/literals.h" #include "xenia/base/literals.h"
#include "xenia/base/logging.h" #include "xenia/base/logging.h"
#include "xenia/base/memory.h" #include "xenia/base/memory.h"
#include "xenia/base/platform.h"
#include "xenia/base/profiling.h" #include "xenia/base/profiling.h"
#include "xenia/base/threading.h" #include "xenia/base/threading.h"
#include "xenia/cpu/breakpoint.h" #include "xenia/cpu/breakpoint.h"
@ -133,7 +134,11 @@ bool Processor::Setup(std::unique_ptr<backend::Backend> backend) {
// Stack walker is used when profiling, debugging, and dumping. // Stack walker is used when profiling, debugging, and dumping.
// Note that creation may fail, in which case we'll have to disable those // Note that creation may fail, in which case we'll have to disable those
// features. // features.
stack_walker_ = StackWalker::Create(backend_->code_cache()); // The code cache may be unavailable in case of a "null" backend.
cpu::backend::CodeCache* code_cache = backend_->code_cache();
if (code_cache) {
stack_walker_ = StackWalker::Create(code_cache);
}
if (!stack_walker_) { if (!stack_walker_) {
// TODO(benvanik): disable features. // TODO(benvanik): disable features.
if (cvars::debug) { if (cvars::debug) {
@ -698,7 +703,13 @@ bool Processor::OnThreadBreakpointHit(Exception* ex) {
// Apply thread context changes. // Apply thread context changes.
// TODO(benvanik): apply to all threads? // TODO(benvanik): apply to all threads?
#if XE_ARCH_AMD64
ex->set_resume_pc(thread_info->host_context.rip); ex->set_resume_pc(thread_info->host_context.rip);
#elif XE_ARCH_ARM64
ex->set_resume_pc(thread_info->host_context.pc);
#else
#error Instruction pointer not specified for the target CPU architecture.
#endif // XE_ARCH
// Resume execution. // Resume execution.
return true; return true;
@ -828,8 +839,8 @@ bool Processor::ResumeAllThreads() {
return true; return true;
} }
void Processor::UpdateThreadExecutionStates(uint32_t override_thread_id, void Processor::UpdateThreadExecutionStates(
X64Context* override_context) { uint32_t override_thread_id, HostThreadContext* override_context) {
auto global_lock = global_critical_region_.Acquire(); auto global_lock = global_critical_region_.Acquire();
uint64_t frame_host_pcs[64]; uint64_t frame_host_pcs[64];
xe::cpu::StackFrame cpu_frames[64]; xe::cpu::StackFrame cpu_frames[64];
@ -851,7 +862,7 @@ void Processor::UpdateThreadExecutionStates(uint32_t override_thread_id,
// Grab stack trace and X64 context then resolve all symbols. // Grab stack trace and X64 context then resolve all symbols.
uint64_t hash; uint64_t hash;
X64Context* in_host_context = nullptr; HostThreadContext* in_host_context = nullptr;
if (override_thread_id == thread_info->thread_id) { if (override_thread_id == thread_info->thread_id) {
// If we were passed an override context we use that. Otherwise, ask the // If we were passed an override context we use that. Otherwise, ask the
// stack walker for a new context. // stack walker for a new context.

View File

@ -215,8 +215,9 @@ class Processor {
// Updates all cached thread execution info (state, call stacks, etc). // Updates all cached thread execution info (state, call stacks, etc).
// The given override thread handle and context will be used in place of // The given override thread handle and context will be used in place of
// sampled values for that thread. // sampled values for that thread.
void UpdateThreadExecutionStates(uint32_t override_handle = 0, void UpdateThreadExecutionStates(
X64Context* override_context = nullptr); uint32_t override_handle = 0,
HostThreadContext* override_context = nullptr);
// Suspends all breakpoints, uninstalling them as required. // Suspends all breakpoints, uninstalling them as required.
// No breakpoints will be triggered until they are resumed. // No breakpoints will be triggered until they are resumed.

View File

@ -13,7 +13,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include "xenia/base/x64_context.h" #include "xenia/base/host_thread_context.h"
#include "xenia/cpu/function.h" #include "xenia/cpu/function.h"
namespace xe { namespace xe {
@ -83,8 +83,8 @@ class StackWalker {
virtual size_t CaptureStackTrace(void* thread_handle, virtual size_t CaptureStackTrace(void* thread_handle,
uint64_t* frame_host_pcs, uint64_t* frame_host_pcs,
size_t frame_offset, size_t frame_count, size_t frame_offset, size_t frame_count,
const X64Context* in_host_context, const HostThreadContext* in_host_context,
X64Context* out_host_context, HostThreadContext* out_host_context,
uint64_t* out_stack_hash = nullptr) = 0; uint64_t* out_stack_hash = nullptr) = 0;
// Resolves symbol information for the given stack frames. // Resolves symbol information for the given stack frames.

View File

@ -153,8 +153,8 @@ class Win32StackWalker : public StackWalker {
size_t CaptureStackTrace(void* thread_handle, uint64_t* frame_host_pcs, size_t CaptureStackTrace(void* thread_handle, uint64_t* frame_host_pcs,
size_t frame_offset, size_t frame_count, size_t frame_offset, size_t frame_count,
const X64Context* in_host_context, const HostThreadContext* in_host_context,
X64Context* out_host_context, HostThreadContext* out_host_context,
uint64_t* out_stack_hash) override { uint64_t* out_stack_hash) override {
// TODO(benvanik): use xstate? // TODO(benvanik): use xstate?
// https://msdn.microsoft.com/en-us/library/windows/desktop/hh134240(v=vs.85).aspx // https://msdn.microsoft.com/en-us/library/windows/desktop/hh134240(v=vs.85).aspx

View File

@ -12,7 +12,7 @@
#include <vector> #include <vector>
#include "xenia/base/x64_context.h" #include "xenia/base/host_thread_context.h"
#include "xenia/cpu/thread.h" #include "xenia/cpu/thread.h"
#include "xenia/cpu/thread_state.h" #include "xenia/cpu/thread_state.h"
@ -70,10 +70,10 @@ struct ThreadDebugInfo {
// Last-sampled PPC context. // Last-sampled PPC context.
// This is updated whenever the debugger stops. // This is updated whenever the debugger stops.
ppc::PPCContext guest_context; ppc::PPCContext guest_context;
// Last-sampled host x64 context. // Last-sampled host context.
// This is updated whenever the debugger stops and must be used instead of any // This is updated whenever the debugger stops and must be used instead of any
// value taken from the StackWalker as it properly respects exception stacks. // value taken from the StackWalker as it properly respects exception stacks.
X64Context host_context; HostThreadContext host_context;
// A single frame in a call stack. // A single frame in a call stack.
struct Frame { struct Frame {

View File

@ -960,7 +960,7 @@ void DebugWindow::DrawRegistersPane() {
auto reg = static_cast<X64Register>(i); auto reg = static_cast<X64Register>(i);
ImGui::BeginGroup(); ImGui::BeginGroup();
ImGui::AlignTextToFramePadding(); ImGui::AlignTextToFramePadding();
ImGui::Text("%3s", X64Context::GetRegisterName(reg)); ImGui::Text("%3s", HostThreadContext::GetRegisterName(reg));
ImGui::SameLine(); ImGui::SameLine();
ImGui::Dummy(ImVec2(4, 0)); ImGui::Dummy(ImVec2(4, 0));
ImGui::SameLine(); ImGui::SameLine();
@ -985,7 +985,7 @@ void DebugWindow::DrawRegistersPane() {
static_cast<X64Register>(static_cast<int>(X64Register::kXmm0) + i); static_cast<X64Register>(static_cast<int>(X64Register::kXmm0) + i);
ImGui::BeginGroup(); ImGui::BeginGroup();
ImGui::AlignTextToFramePadding(); ImGui::AlignTextToFramePadding();
ImGui::Text("%5s", X64Context::GetRegisterName(reg)); ImGui::Text("%5s", HostThreadContext::GetRegisterName(reg));
ImGui::SameLine(); ImGui::SameLine();
ImGui::Dummy(ImVec2(4, 0)); ImGui::Dummy(ImVec2(4, 0));
ImGui::SameLine(); ImGui::SameLine();

View File

@ -13,7 +13,7 @@
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "xenia/base/x64_context.h" #include "xenia/base/host_thread_context.h"
#include "xenia/cpu/breakpoint.h" #include "xenia/cpu/breakpoint.h"
#include "xenia/cpu/debug_listener.h" #include "xenia/cpu/debug_listener.h"
#include "xenia/cpu/processor.h" #include "xenia/cpu/processor.h"

View File

@ -24,9 +24,10 @@
#include "xenia/base/literals.h" #include "xenia/base/literals.h"
#include "xenia/base/logging.h" #include "xenia/base/logging.h"
#include "xenia/base/mapped_memory.h" #include "xenia/base/mapped_memory.h"
#include "xenia/base/platform.h"
#include "xenia/base/string.h" #include "xenia/base/string.h"
#include "xenia/cpu/backend/code_cache.h" #include "xenia/cpu/backend/code_cache.h"
#include "xenia/cpu/backend/x64/x64_backend.h" #include "xenia/cpu/backend/null_backend.h"
#include "xenia/cpu/cpu_flags.h" #include "xenia/cpu/cpu_flags.h"
#include "xenia/cpu/thread_state.h" #include "xenia/cpu/thread_state.h"
#include "xenia/gpu/graphics_system.h" #include "xenia/gpu/graphics_system.h"
@ -50,6 +51,10 @@
#include "xenia/vfs/devices/null_device.h" #include "xenia/vfs/devices/null_device.h"
#include "xenia/vfs/devices/stfs_container_device.h" #include "xenia/vfs/devices/stfs_container_device.h"
#if XE_ARCH_AMD64
#include "xenia/cpu/backend/x64/x64_backend.h"
#endif // XE_ARCH
DEFINE_double(time_scalar, 1.0, DEFINE_double(time_scalar, 1.0,
"Scalar used to speed or slow time (1x, 2x, 1/2x, etc).", "Scalar used to speed or slow time (1x, 2x, 1/2x, etc).",
"General"); "General");
@ -127,6 +132,7 @@ Emulator::~Emulator() {
X_STATUS Emulator::Setup( X_STATUS Emulator::Setup(
ui::Window* display_window, ui::ImGuiDrawer* imgui_drawer, ui::Window* display_window, ui::ImGuiDrawer* imgui_drawer,
bool require_cpu_backend,
std::function<std::unique_ptr<apu::AudioSystem>(cpu::Processor*)> std::function<std::unique_ptr<apu::AudioSystem>(cpu::Processor*)>
audio_system_factory, audio_system_factory,
std::function<std::unique_ptr<gpu::GraphicsSystem>()> std::function<std::unique_ptr<gpu::GraphicsSystem>()>
@ -160,19 +166,20 @@ X_STATUS Emulator::Setup(
export_resolver_ = std::make_unique<xe::cpu::ExportResolver>(); export_resolver_ = std::make_unique<xe::cpu::ExportResolver>();
std::unique_ptr<xe::cpu::backend::Backend> backend; std::unique_ptr<xe::cpu::backend::Backend> backend;
if (!backend) { #if XE_ARCH_AMD64
#if defined(XENIA_HAS_X64_BACKEND) && XENIA_HAS_X64_BACKEND if (cvars::cpu == "x64") {
if (cvars::cpu == "x64") { backend.reset(new xe::cpu::backend::x64::X64Backend());
}
#endif // XE_ARCH
if (cvars::cpu == "any") {
if (!backend) {
#if XE_ARCH_AMD64
backend.reset(new xe::cpu::backend::x64::X64Backend()); backend.reset(new xe::cpu::backend::x64::X64Backend());
#endif // XE_ARCH
} }
#endif // XENIA_HAS_X64_BACKEND }
if (cvars::cpu == "any") { if (!backend && !require_cpu_backend) {
#if defined(XENIA_HAS_X64_BACKEND) && XENIA_HAS_X64_BACKEND backend.reset(new xe::cpu::backend::NullBackend());
if (!backend) {
backend.reset(new xe::cpu::backend::x64::X64Backend());
}
#endif // XENIA_HAS_X64_BACKEND
}
} }
// Initialize the CPU. // Initialize the CPU.

View File

@ -165,6 +165,7 @@ class Emulator {
// functions. // functions.
X_STATUS Setup( X_STATUS Setup(
ui::Window* display_window, ui::ImGuiDrawer* imgui_drawer, ui::Window* display_window, ui::ImGuiDrawer* imgui_drawer,
bool require_cpu_backend,
std::function<std::unique_ptr<apu::AudioSystem>(cpu::Processor*)> std::function<std::unique_ptr<apu::AudioSystem>(cpu::Processor*)>
audio_system_factory, audio_system_factory,
std::function<std::unique_ptr<gpu::GraphicsSystem>()> std::function<std::unique_ptr<gpu::GraphicsSystem>()>

View File

@ -497,7 +497,7 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
TransferInvocation(const Transfer& transfer, TransferInvocation(const Transfer& transfer,
const TransferShaderKey& shader_key) const TransferShaderKey& shader_key)
: transfer(transfer), shader_key(shader_key) {} : transfer(transfer), shader_key(shader_key) {}
bool operator<(const TransferInvocation& other_invocation) { bool operator<(const TransferInvocation& other_invocation) const {
// TODO(Triang3l): See if it may be better to sort by the source in the // TODO(Triang3l): See if it may be better to sort by the source in the
// first place, especially when reading the same data multiple times (like // first place, especially when reading the same data multiple times (like
// to write the stencil bits after depth) for better read locality. // to write the stencil bits after depth) for better read locality.
@ -639,7 +639,7 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
DumpInvocation(const ResolveCopyDumpRectangle& rectangle, DumpInvocation(const ResolveCopyDumpRectangle& rectangle,
const DumpPipelineKey& pipeline_key) const DumpPipelineKey& pipeline_key)
: rectangle(rectangle), pipeline_key(pipeline_key) {} : rectangle(rectangle), pipeline_key(pipeline_key) {}
bool operator<(const DumpInvocation& other_invocation) { bool operator<(const DumpInvocation& other_invocation) const {
// Sort by the pipeline key primarily to reduce pipeline state (context) // Sort by the pipeline key primarily to reduce pipeline state (context)
// switches. // switches.
if (pipeline_key != other_invocation.pipeline_key) { if (pipeline_key != other_invocation.pipeline_key) {

View File

@ -30,7 +30,6 @@ project("xenia-gpu-d3d12-trace-viewer")
"xenia-base", "xenia-base",
"xenia-core", "xenia-core",
"xenia-cpu", "xenia-cpu",
"xenia-cpu-backend-x64",
"xenia-gpu", "xenia-gpu",
"xenia-gpu-d3d12", "xenia-gpu-d3d12",
"xenia-hid", "xenia-hid",
@ -68,6 +67,11 @@ project("xenia-gpu-d3d12-trace-viewer")
}) })
end end
filter("architecture:x86_64")
links({
"xenia-cpu-backend-x64",
})
group("src") group("src")
project("xenia-gpu-d3d12-trace-dump") project("xenia-gpu-d3d12-trace-dump")
uuid("686b859c-0046-44c4-a02c-41fc3fb75698") uuid("686b859c-0046-44c4-a02c-41fc3fb75698")
@ -79,7 +83,6 @@ project("xenia-gpu-d3d12-trace-dump")
"xenia-base", "xenia-base",
"xenia-core", "xenia-core",
"xenia-cpu", "xenia-cpu",
"xenia-cpu-backend-x64",
"xenia-gpu", "xenia-gpu",
"xenia-gpu-d3d12", "xenia-gpu-d3d12",
"xenia-hid", "xenia-hid",
@ -115,3 +118,8 @@ project("xenia-gpu-d3d12-trace-dump")
"1>scratch/stdout-trace-dump.txt", "1>scratch/stdout-trace-dump.txt",
}) })
end end
filter("architecture:x86_64")
links({
"xenia-cpu-backend-x64",
})

View File

@ -942,7 +942,7 @@ void PrimitiveProcessor::Get16BitResetIndexUsage(
is_ffff_simd = is_ffff_simd =
_mm_or_si128(is_ffff_simd, _mm_cmpeq_epi16(source_simd, ffff_simd)); _mm_or_si128(is_ffff_simd, _mm_cmpeq_epi16(source_simd, ffff_simd));
#elif XE_ARCH_ARM64 #elif XE_ARCH_ARM64
is_reset_simd = vcorrq_u16( is_reset_simd = vorrq_u16(
is_reset_simd, vceqq_u16(source_simd, reset_index_guest_endian_simd)); is_reset_simd, vceqq_u16(source_simd, reset_index_guest_endian_simd));
is_ffff_simd = vmaxq_u16(is_ffff_simd, source_simd); is_ffff_simd = vmaxq_u16(is_ffff_simd, source_simd);
#else #else

View File

@ -374,8 +374,14 @@ void RenderTargetCache::InitializeCommon() {
RenderTargetKey(), RenderTargetKey())); RenderTargetKey(), RenderTargetKey()));
} }
void RenderTargetCache::ShutdownCommon() { void RenderTargetCache::DestroyAllRenderTargets(bool shutting_down) {
ownership_ranges_.clear(); ownership_ranges_.clear();
if (!shutting_down) {
ownership_ranges_.emplace(
std::piecewise_construct, std::forward_as_tuple(uint32_t(0)),
std::forward_as_tuple(xenos::kEdramTileCount, RenderTargetKey(),
RenderTargetKey(), RenderTargetKey()));
}
for (const auto& render_target_pair : render_targets_) { for (const auto& render_target_pair : render_targets_) {
if (render_target_pair.second) { if (render_target_pair.second) {
@ -385,6 +391,8 @@ void RenderTargetCache::ShutdownCommon() {
render_targets_.clear(); render_targets_.clear();
} }
void RenderTargetCache::ShutdownCommon() { DestroyAllRenderTargets(true); }
void RenderTargetCache::ClearCache() { void RenderTargetCache::ClearCache() {
// Keep only render targets currently owning any EDRAM data. // Keep only render targets currently owning any EDRAM data.
if (!render_targets_.empty()) { if (!render_targets_.empty()) {

View File

@ -193,6 +193,10 @@ class RenderTargetCache {
// Call last in implementation-specific initialization (when things like path // Call last in implementation-specific initialization (when things like path
// are initialized by the implementation). // are initialized by the implementation).
void InitializeCommon(); void InitializeCommon();
// May be called from the destructor, or from the implementation shutdown to
// destroy all render targets before destroying what they depend on in the
// implementation.
void DestroyAllRenderTargets(bool shutting_down);
// Call last in implementation-specific shutdown, also callable from the // Call last in implementation-specific shutdown, also callable from the
// destructor. // destructor.
void ShutdownCommon(); void ShutdownCommon();

View File

@ -75,9 +75,6 @@ SpirvShaderTranslator::Features::Features(
} }
} }
const std::string SpirvShaderTranslator::kInterpolatorNamePrefix =
"xe_interpolator_";
SpirvShaderTranslator::SpirvShaderTranslator(const Features& features) SpirvShaderTranslator::SpirvShaderTranslator(const Features& features)
: features_(features) {} : features_(features) {}
@ -164,6 +161,8 @@ void SpirvShaderTranslator::StartTranslation() {
type_float2_ = builder_->makeVectorType(type_float_, 2); type_float2_ = builder_->makeVectorType(type_float_, 2);
type_float3_ = builder_->makeVectorType(type_float_, 3); type_float3_ = builder_->makeVectorType(type_float_, 3);
type_float4_ = builder_->makeVectorType(type_float_, 4); type_float4_ = builder_->makeVectorType(type_float_, 4);
type_interpolators_ = builder_->makeArrayType(
type_float4_, builder_->makeUintConstant(xenos::kMaxInterpolators), 0);
const_int_0_ = builder_->makeIntConstant(0); const_int_0_ = builder_->makeIntConstant(0);
id_vector_temp_.clear(); id_vector_temp_.clear();
@ -257,8 +256,9 @@ void SpirvShaderTranslator::StartTranslation() {
"xe_uniform_system_constants"); "xe_uniform_system_constants");
builder_->addDecoration(uniform_system_constants_, builder_->addDecoration(uniform_system_constants_,
spv::DecorationDescriptorSet, spv::DecorationDescriptorSet,
kDescriptorSetSystemConstants); int(kDescriptorSetConstants));
builder_->addDecoration(uniform_system_constants_, spv::DecorationBinding, 0); builder_->addDecoration(uniform_system_constants_, spv::DecorationBinding,
int(kConstantBufferSystem));
if (features_.spirv_version >= spv::Spv_1_4) { if (features_.spirv_version >= spv::Spv_1_4) {
main_interface_.push_back(uniform_system_constants_); main_interface_.push_back(uniform_system_constants_);
} }
@ -285,12 +285,13 @@ void SpirvShaderTranslator::StartTranslation() {
uniform_float_constants_ = builder_->createVariable( uniform_float_constants_ = builder_->createVariable(
spv::NoPrecision, spv::StorageClassUniform, type_float_constants, spv::NoPrecision, spv::StorageClassUniform, type_float_constants,
"xe_uniform_float_constants"); "xe_uniform_float_constants");
builder_->addDecoration(uniform_float_constants_,
spv::DecorationDescriptorSet,
int(kDescriptorSetConstants));
builder_->addDecoration( builder_->addDecoration(
uniform_float_constants_, spv::DecorationDescriptorSet, uniform_float_constants_, spv::DecorationBinding,
int(is_pixel_shader() ? kDescriptorSetFloatConstantsPixel int(is_pixel_shader() ? kConstantBufferFloatPixel
: kDescriptorSetFloatConstantsVertex)); : kConstantBufferFloatVertex));
builder_->addDecoration(uniform_float_constants_, spv::DecorationBinding,
0);
if (features_.spirv_version >= spv::Spv_1_4) { if (features_.spirv_version >= spv::Spv_1_4) {
main_interface_.push_back(uniform_float_constants_); main_interface_.push_back(uniform_float_constants_);
} }
@ -326,9 +327,9 @@ void SpirvShaderTranslator::StartTranslation() {
"xe_uniform_bool_loop_constants"); "xe_uniform_bool_loop_constants");
builder_->addDecoration(uniform_bool_loop_constants_, builder_->addDecoration(uniform_bool_loop_constants_,
spv::DecorationDescriptorSet, spv::DecorationDescriptorSet,
int(kDescriptorSetBoolLoopConstants)); int(kDescriptorSetConstants));
builder_->addDecoration(uniform_bool_loop_constants_, spv::DecorationBinding, builder_->addDecoration(uniform_bool_loop_constants_, spv::DecorationBinding,
0); int(kConstantBufferBoolLoop));
if (features_.spirv_version >= spv::Spv_1_4) { if (features_.spirv_version >= spv::Spv_1_4) {
main_interface_.push_back(uniform_bool_loop_constants_); main_interface_.push_back(uniform_bool_loop_constants_);
} }
@ -352,8 +353,9 @@ void SpirvShaderTranslator::StartTranslation() {
"xe_uniform_fetch_constants"); "xe_uniform_fetch_constants");
builder_->addDecoration(uniform_fetch_constants_, builder_->addDecoration(uniform_fetch_constants_,
spv::DecorationDescriptorSet, spv::DecorationDescriptorSet,
int(kDescriptorSetFetchConstants)); int(kDescriptorSetConstants));
builder_->addDecoration(uniform_fetch_constants_, spv::DecorationBinding, 0); builder_->addDecoration(uniform_fetch_constants_, spv::DecorationBinding,
int(kConstantBufferFetch));
if (features_.spirv_version >= spv::Spv_1_4) { if (features_.spirv_version >= spv::Spv_1_4) {
main_interface_.push_back(uniform_fetch_constants_); main_interface_.push_back(uniform_fetch_constants_);
} }
@ -639,6 +641,16 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
entry_point->addIdOperand(interface_id); entry_point->addIdOperand(interface_id);
} }
// Specify the binding indices for samplers when the number of textures is
// known, as samplers are located after images in the texture descriptor set.
size_t texture_binding_count = texture_bindings_.size();
size_t sampler_binding_count = sampler_bindings_.size();
for (size_t i = 0; i < sampler_binding_count; ++i) {
builder_->addDecoration(sampler_bindings_[i].variable,
spv::DecorationBinding,
int(texture_binding_count + i));
}
// TODO(Triang3l): Avoid copy? // TODO(Triang3l): Avoid copy?
std::vector<unsigned int> module_uints; std::vector<unsigned int> module_uints;
builder_->dump(module_uints); builder_->dump(module_uints);
@ -1056,17 +1068,15 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() {
main_interface_.push_back(input_vertex_index_); main_interface_.push_back(input_vertex_index_);
} }
// Create the Xenia-specific outputs. // Create the interpolator output.
// TODO(Triang3l): Change to an interpolator array. input_output_interpolators_ =
for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { builder_->createVariable(spv::NoPrecision, spv::StorageClassOutput,
spv::Id interpolator = builder_->createVariable( type_interpolators_, "xe_out_interpolators");
spv::NoPrecision, spv::StorageClassOutput, type_float4_, builder_->addDecoration(input_output_interpolators_, spv::DecorationLocation,
(kInterpolatorNamePrefix + std::to_string(i)).c_str()); 0);
input_output_interpolators_[i] = interpolator; builder_->addDecoration(input_output_interpolators_,
builder_->addDecoration(interpolator, spv::DecorationLocation, int(i)); spv::DecorationInvariant);
builder_->addDecoration(interpolator, spv::DecorationInvariant); main_interface_.push_back(input_output_interpolators_);
main_interface_.push_back(interpolator);
}
// Create the gl_PerVertex output for used system outputs. // Create the gl_PerVertex output for used system outputs.
std::vector<spv::Id> struct_per_vertex_members; std::vector<spv::Id> struct_per_vertex_members;
@ -1095,7 +1105,12 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
// Zero the interpolators. // Zero the interpolators.
for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) {
builder_->createStore(const_float4_0_, input_output_interpolators_[i]); id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->makeIntConstant(int(i)));
builder_->createStore(const_float4_0_,
builder_->createAccessChain(
spv::StorageClassOutput,
input_output_interpolators_, id_vector_temp_));
} }
// Load the vertex index or the tessellation parameters. // Load the vertex index or the tessellation parameters.
@ -1269,17 +1284,13 @@ void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() {
} }
void SpirvShaderTranslator::StartFragmentShaderBeforeMain() { void SpirvShaderTranslator::StartFragmentShaderBeforeMain() {
// Interpolator inputs. // Interpolator input.
uint32_t interpolator_count = input_output_interpolators_ =
std::min(xenos::kMaxInterpolators, register_count()); builder_->createVariable(spv::NoPrecision, spv::StorageClassInput,
for (uint32_t i = 0; i < interpolator_count; ++i) { type_interpolators_, "xe_in_interpolators");
spv::Id interpolator = builder_->createVariable( builder_->addDecoration(input_output_interpolators_, spv::DecorationLocation,
spv::NoPrecision, spv::StorageClassInput, type_float4_, 0);
(kInterpolatorNamePrefix + std::to_string(i)).c_str()); main_interface_.push_back(input_output_interpolators_);
input_output_interpolators_[i] = interpolator;
builder_->addDecoration(interpolator, spv::DecorationLocation, int(i));
main_interface_.push_back(interpolator);
}
bool param_gen_needed = GetPsParamGenInterpolator() != UINT32_MAX; bool param_gen_needed = GetPsParamGenInterpolator() != UINT32_MAX;
@ -1347,7 +1358,10 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
// Register array element. // Register array element.
id_vector_temp_.push_back(builder_->makeIntConstant(int(i))); id_vector_temp_.push_back(builder_->makeIntConstant(int(i)));
builder_->createStore( builder_->createStore(
builder_->createLoad(input_output_interpolators_[i], spv::NoPrecision), builder_->createLoad(builder_->createAccessChain(
spv::StorageClassInput,
input_output_interpolators_, id_vector_temp_),
spv::NoPrecision),
builder_->createAccessChain(spv::StorageClassFunction, builder_->createAccessChain(spv::StorageClassFunction,
var_main_registers_, id_vector_temp_)); var_main_registers_, id_vector_temp_));
} }
@ -1824,7 +1838,12 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result,
} break; } break;
case InstructionStorageTarget::kInterpolator: case InstructionStorageTarget::kInterpolator:
assert_true(is_vertex_shader()); assert_true(is_vertex_shader());
target_pointer = input_output_interpolators_[result.storage_index]; id_vector_temp_util_.clear();
id_vector_temp_util_.push_back(
builder_->makeIntConstant(int(result.storage_index)));
target_pointer = builder_->createAccessChain(spv::StorageClassOutput,
input_output_interpolators_,
id_vector_temp_util_);
break; break;
case InstructionStorageTarget::kPosition: case InstructionStorageTarget::kPosition:
assert_true(is_vertex_shader()); assert_true(is_vertex_shader());

View File

@ -131,6 +131,16 @@ class SpirvShaderTranslator : public ShaderTranslator {
float color_exp_bias[4]; float color_exp_bias[4];
}; };
enum ConstantBuffer : uint32_t {
kConstantBufferSystem,
kConstantBufferFloatVertex,
kConstantBufferFloatPixel,
kConstantBufferBoolLoop,
kConstantBufferFetch,
kConstantBufferCount,
};
// The minimum limit for maxPerStageDescriptorStorageBuffers is 4, and for // The minimum limit for maxPerStageDescriptorStorageBuffers is 4, and for
// maxStorageBufferRange it's 128 MB. These are the values of those limits on // maxStorageBufferRange it's 128 MB. These are the values of those limits on
// Arm Mali as of November 2020. Xenia needs 512 MB shared memory to be bound, // Arm Mali as of November 2020. Xenia needs 512 MB shared memory to be bound,
@ -159,31 +169,28 @@ class SpirvShaderTranslator : public ShaderTranslator {
// Never changed. // Never changed.
kDescriptorSetSharedMemoryAndEdram, kDescriptorSetSharedMemoryAndEdram,
// Pretty rarely used and rarely changed - flow control constants. // Changed in case of changes in the data.
kDescriptorSetBoolLoopConstants, kDescriptorSetConstants,
// May stay the same across many draws.
kDescriptorSetSystemConstants,
// Less frequently changed (per-material).
kDescriptorSetFloatConstantsPixel,
// Quite frequently changed (for one object drawn multiple times, for
// instance - may contain projection matrices).
kDescriptorSetFloatConstantsVertex,
// Very frequently changed, especially for UI draws, and for models drawn in
// multiple parts - contains vertex and texture fetch constants.
kDescriptorSetFetchConstants,
// Mutable part of the pipeline layout: // Mutable part of the pipeline layout:
kDescriptorSetMutableLayoutsStart, kDescriptorSetMutableLayoutsStart,
// Rarely used at all, but may be changed at an unpredictable rate when // Rarely used at all, but may be changed at an unpredictable rate when
// vertex textures are used. // vertex textures are used (for example, for bones of an object, which may
kDescriptorSetSamplersVertex = kDescriptorSetMutableLayoutsStart, // consist of multiple draw commands with different materials).
kDescriptorSetTexturesVertex, kDescriptorSetTexturesVertex = kDescriptorSetMutableLayoutsStart,
// Per-material textures. // Per-material textures.
kDescriptorSetSamplersPixel,
kDescriptorSetTexturesPixel, kDescriptorSetTexturesPixel,
kDescriptorSetCount, kDescriptorSetCount,
}; };
static_assert(
kDescriptorSetCount <= 4,
"The number of descriptor sets used by translated shaders must be within "
"the minimum Vulkan maxBoundDescriptorSets requirement of 4, which is "
"the limit on most GPUs used in Android devices - Arm Mali, Imagination "
"PowerVR, Qualcomm Adreno 6xx and older, as well as on old PC Nvidia "
"drivers");
// "Xenia Emulator Microcode Translator". // "Xenia Emulator Microcode Translator".
// https://github.com/KhronosGroup/SPIRV-Headers/blob/c43a43c7cc3af55910b9bec2a71e3e8a622443cf/include/spirv/spir-v.xml#L79 // https://github.com/KhronosGroup/SPIRV-Headers/blob/c43a43c7cc3af55910b9bec2a71e3e8a622443cf/include/spirv/spir-v.xml#L79
@ -522,6 +529,8 @@ class SpirvShaderTranslator : public ShaderTranslator {
spv::Id type_float_vectors_[4]; spv::Id type_float_vectors_[4];
}; };
spv::Id type_interpolators_;
spv::Id const_int_0_; spv::Id const_int_0_;
spv::Id const_int4_0_; spv::Id const_int4_0_;
spv::Id const_uint_0_; spv::Id const_uint_0_;
@ -582,11 +591,12 @@ class SpirvShaderTranslator : public ShaderTranslator {
// PS, only when needed - bool. // PS, only when needed - bool.
spv::Id input_front_facing_; spv::Id input_front_facing_;
// In vertex or tessellation evaluation shaders - outputs, always // VS output or PS input, only when needed - type_interpolators_.
// xenos::kMaxInterpolators. // The Qualcomm Adreno driver has strict requirements for stage linkage - if
// In pixel shaders - inputs, min(xenos::kMaxInterpolators, register_count()). // this is an array in one stage, it must be an array in the other (in case of
spv::Id input_output_interpolators_[xenos::kMaxInterpolators]; // Xenia, including geometry shaders); it must not be an array in one and just
static const std::string kInterpolatorNamePrefix; // elements in consecutive locations in another.
spv::Id input_output_interpolators_;
enum OutputPerVertexMember : unsigned int { enum OutputPerVertexMember : unsigned int {
kOutputPerVertexMemberPosition, kOutputPerVertexMemberPosition,

View File

@ -2573,10 +2573,10 @@ size_t SpirvShaderTranslator::FindOrAddSamplerBinding(
builder_->makeSamplerType(), name.str().c_str()); builder_->makeSamplerType(), name.str().c_str());
builder_->addDecoration( builder_->addDecoration(
new_sampler_binding.variable, spv::DecorationDescriptorSet, new_sampler_binding.variable, spv::DecorationDescriptorSet,
int(is_vertex_shader() ? kDescriptorSetSamplersVertex int(is_vertex_shader() ? kDescriptorSetTexturesVertex
: kDescriptorSetSamplersPixel)); : kDescriptorSetTexturesPixel));
builder_->addDecoration(new_sampler_binding.variable, spv::DecorationBinding, // The binding indices will be specified later after all textures are added as
int(new_sampler_binding_index)); // samplers are located after images in the descriptor set.
if (features_.spirv_version >= spv::Spv_1_4) { if (features_.spirv_version >= spv::Spv_1_4) {
main_interface_.push_back(new_sampler_binding.variable); main_interface_.push_back(new_sampler_binding.variable);
} }

View File

@ -95,8 +95,8 @@ bool TraceDump::Setup() {
// Create the emulator but don't initialize so we can setup the window. // Create the emulator but don't initialize so we can setup the window.
emulator_ = std::make_unique<Emulator>("", "", "", ""); emulator_ = std::make_unique<Emulator>("", "", "", "");
X_STATUS result = emulator_->Setup( X_STATUS result = emulator_->Setup(
nullptr, nullptr, nullptr, [this]() { return CreateGraphicsSystem(); }, nullptr, nullptr, false, nullptr,
nullptr); [this]() { return CreateGraphicsSystem(); }, nullptr);
if (XFAILED(result)) { if (XFAILED(result)) {
XELOGE("Failed to setup emulator: {:08X}", result); XELOGE("Failed to setup emulator: {:08X}", result);
return false; return false;

View File

@ -125,7 +125,7 @@ bool TraceViewer::Setup() {
// Create the emulator but don't initialize so we can setup the window. // Create the emulator but don't initialize so we can setup the window.
emulator_ = std::make_unique<Emulator>("", "", "", ""); emulator_ = std::make_unique<Emulator>("", "", "", "");
X_STATUS result = emulator_->Setup( X_STATUS result = emulator_->Setup(
window_.get(), nullptr, nullptr, window_.get(), nullptr, false, nullptr,
[this]() { return CreateGraphicsSystem(); }, nullptr); [this]() { return CreateGraphicsSystem(); }, nullptr);
if (XFAILED(result)) { if (XFAILED(result)) {
XELOGE("Failed to setup emulator: {:08X}", result); XELOGE("Failed to setup emulator: {:08X}", result);

View File

@ -34,7 +34,6 @@ project("xenia-gpu-vulkan-trace-viewer")
"xenia-base", "xenia-base",
"xenia-core", "xenia-core",
"xenia-cpu", "xenia-cpu",
"xenia-cpu-backend-x64",
"xenia-gpu", "xenia-gpu",
"xenia-gpu-vulkan", "xenia-gpu-vulkan",
"xenia-hid", "xenia-hid",
@ -66,6 +65,11 @@ project("xenia-gpu-vulkan-trace-viewer")
"../../ui/windowed_app_main_"..platform_suffix..".cc", "../../ui/windowed_app_main_"..platform_suffix..".cc",
}) })
filter("architecture:x86_64")
links({
"xenia-cpu-backend-x64",
})
filter("platforms:Linux") filter("platforms:Linux")
links({ links({
"X11", "X11",
@ -95,7 +99,6 @@ project("xenia-gpu-vulkan-trace-dump")
"xenia-base", "xenia-base",
"xenia-core", "xenia-core",
"xenia-cpu", "xenia-cpu",
"xenia-cpu-backend-x64",
"xenia-gpu", "xenia-gpu",
"xenia-gpu-vulkan", "xenia-gpu-vulkan",
"xenia-hid", "xenia-hid",
@ -126,6 +129,11 @@ project("xenia-gpu-vulkan-trace-dump")
"../../base/console_app_main_"..platform_suffix..".cc", "../../base/console_app_main_"..platform_suffix..".cc",
}) })
filter("architecture:x86_64")
links({
"xenia-cpu-backend-x64",
})
filter("platforms:Linux") filter("platforms:Linux")
links({ links({
"X11", "X11",

File diff suppressed because it is too large Load Diff

View File

@ -36,7 +36,7 @@
#include "xenia/gpu/vulkan/vulkan_texture_cache.h" #include "xenia/gpu/vulkan/vulkan_texture_cache.h"
#include "xenia/gpu/xenos.h" #include "xenia/gpu/xenos.h"
#include "xenia/kernel/kernel_state.h" #include "xenia/kernel/kernel_state.h"
#include "xenia/ui/vulkan/single_type_descriptor_set_allocator.h" #include "xenia/ui/vulkan/linked_type_descriptor_set_allocator.h"
#include "xenia/ui/vulkan/vulkan_presenter.h" #include "xenia/ui/vulkan/vulkan_presenter.h"
#include "xenia/ui/vulkan/vulkan_provider.h" #include "xenia/ui/vulkan/vulkan_provider.h"
#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h" #include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h"
@ -49,10 +49,6 @@ class VulkanCommandProcessor : public CommandProcessor {
public: public:
// Single-descriptor layouts for use within a single frame. // Single-descriptor layouts for use within a single frame.
enum class SingleTransientDescriptorLayout { enum class SingleTransientDescriptorLayout {
kUniformBufferGuestVertex,
kUniformBufferFragment,
kUniformBufferGuestShader,
kUniformBufferSystemConstants,
kUniformBufferCompute, kUniformBufferCompute,
kStorageBufferCompute, kStorageBufferCompute,
kCount, kCount,
@ -231,9 +227,9 @@ class VulkanCommandProcessor : public CommandProcessor {
VkDescriptorSet& descriptor_set_out); VkDescriptorSet& descriptor_set_out);
// The returned reference is valid until a cache clear. // The returned reference is valid until a cache clear.
VkDescriptorSetLayout GetTextureDescriptorSetLayout(bool is_samplers, VkDescriptorSetLayout GetTextureDescriptorSetLayout(bool is_vertex,
bool is_vertex, size_t texture_count,
size_t binding_count); size_t sampler_count);
// The returned reference is valid until a cache clear. // The returned reference is valid until a cache clear.
const VulkanPipelineCache::PipelineLayoutProvider* GetPipelineLayout( const VulkanPipelineCache::PipelineLayoutProvider* GetPipelineLayout(
size_t texture_count_pixel, size_t sampler_count_pixel, size_t texture_count_pixel, size_t sampler_count_pixel,
@ -298,12 +294,11 @@ class VulkanCommandProcessor : public CommandProcessor {
union TextureDescriptorSetLayoutKey { union TextureDescriptorSetLayoutKey {
uint32_t key; uint32_t key;
struct { struct {
// 0 - sampled image descriptors, 1 - sampler descriptors. // If texture and sampler counts are both 0, use
uint32_t is_samplers : 1; // descriptor_set_layout_empty_ instead as these are owning references.
uint32_t texture_count : 16;
uint32_t sampler_count : 15;
uint32_t is_vertex : 1; uint32_t is_vertex : 1;
// For 0, use descriptor_set_layout_empty_ instead as these are owning
// references.
uint32_t binding_count : 30;
}; };
TextureDescriptorSetLayoutKey() : key(0) { TextureDescriptorSetLayoutKey() : key(0) {
@ -354,40 +349,26 @@ class VulkanCommandProcessor : public CommandProcessor {
explicit PipelineLayout( explicit PipelineLayout(
VkPipelineLayout pipeline_layout, VkPipelineLayout pipeline_layout,
VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref, VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref,
VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref, VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref)
VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref,
VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref)
: pipeline_layout_(pipeline_layout), : pipeline_layout_(pipeline_layout),
descriptor_set_layout_textures_vertex_ref_( descriptor_set_layout_textures_vertex_ref_(
descriptor_set_layout_textures_vertex_ref), descriptor_set_layout_textures_vertex_ref),
descriptor_set_layout_samplers_vertex_ref_(
descriptor_set_layout_samplers_vertex_ref),
descriptor_set_layout_textures_pixel_ref_( descriptor_set_layout_textures_pixel_ref_(
descriptor_set_layout_textures_pixel_ref), descriptor_set_layout_textures_pixel_ref) {}
descriptor_set_layout_samplers_pixel_ref_(
descriptor_set_layout_samplers_pixel_ref) {}
VkPipelineLayout GetPipelineLayout() const override { VkPipelineLayout GetPipelineLayout() const override {
return pipeline_layout_; return pipeline_layout_;
} }
VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref() const { VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref() const {
return descriptor_set_layout_textures_vertex_ref_; return descriptor_set_layout_textures_vertex_ref_;
} }
VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref() const {
return descriptor_set_layout_samplers_vertex_ref_;
}
VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref() const { VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref() const {
return descriptor_set_layout_textures_pixel_ref_; return descriptor_set_layout_textures_pixel_ref_;
} }
VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref() const {
return descriptor_set_layout_samplers_pixel_ref_;
}
private: private:
VkPipelineLayout pipeline_layout_; VkPipelineLayout pipeline_layout_;
VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref_; VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref_;
VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref_;
VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref_; VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref_;
VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref_;
}; };
struct UsedSingleTransientDescriptor { struct UsedSingleTransientDescriptor {
@ -458,16 +439,20 @@ class VulkanCommandProcessor : public CommandProcessor {
uint32_t used_texture_mask); uint32_t used_texture_mask);
bool UpdateBindings(const VulkanShader* vertex_shader, bool UpdateBindings(const VulkanShader* vertex_shader,
const VulkanShader* pixel_shader); const VulkanShader* pixel_shader);
// Allocates a descriptor set and fills the VkWriteDescriptorSet structure. // Allocates a descriptor set and fills one or two VkWriteDescriptorSet
// The descriptor set layout must be the one for the given is_samplers, // structure instances (for images and samplers).
// is_vertex, binding_count (from GetTextureDescriptorSetLayout - may be // The descriptor set layout must be the one for the given is_vertex,
// texture_count, sampler_count (from GetTextureDescriptorSetLayout - may be
// already available at the moment of the call, no need to locate it again). // already available at the moment of the call, no need to locate it again).
// Returns whether the allocation was successful. // Returns how many VkWriteDescriptorSet structure instances have been
bool WriteTransientTextureBindings( // written, or 0 if there was a failure to allocate the descriptor set or no
bool is_samplers, bool is_vertex, uint32_t binding_count, // bindings were requested.
uint32_t WriteTransientTextureBindings(
bool is_vertex, uint32_t texture_count, uint32_t sampler_count,
VkDescriptorSetLayout descriptor_set_layout, VkDescriptorSetLayout descriptor_set_layout,
const VkDescriptorImageInfo* image_info, const VkDescriptorImageInfo* texture_image_info,
VkWriteDescriptorSet& write_descriptor_set_out); const VkDescriptorImageInfo* sampler_image_info,
VkWriteDescriptorSet* descriptor_set_writes_out);
bool device_lost_ = false; bool device_lost_ = false;
@ -530,6 +515,7 @@ class VulkanCommandProcessor : public CommandProcessor {
VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE; VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE;
VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ = VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ =
VK_NULL_HANDLE; VK_NULL_HANDLE;
VkDescriptorSetLayout descriptor_set_layout_constants_ = VK_NULL_HANDLE;
std::array<VkDescriptorSetLayout, std::array<VkDescriptorSetLayout,
size_t(SingleTransientDescriptorLayout::kCount)> size_t(SingleTransientDescriptorLayout::kCount)>
descriptor_set_layouts_single_transient_{}; descriptor_set_layouts_single_transient_{};
@ -543,19 +529,27 @@ class VulkanCommandProcessor : public CommandProcessor {
PipelineLayoutKey::Hasher> PipelineLayoutKey::Hasher>
pipeline_layouts_; pipeline_layouts_;
ui::vulkan::SingleTypeDescriptorSetAllocator // No specific reason for 32768, just the "too much" descriptor count from
// Direct3D 12 PIX warnings.
static constexpr uint32_t kLinkedTypeDescriptorPoolSetCount = 32768;
static const VkDescriptorPoolSize kDescriptorPoolSizeUniformBuffer;
static const VkDescriptorPoolSize kDescriptorPoolSizeStorageBuffer;
static const VkDescriptorPoolSize kDescriptorPoolSizeTextures[2];
ui::vulkan::LinkedTypeDescriptorSetAllocator
transient_descriptor_allocator_uniform_buffer_; transient_descriptor_allocator_uniform_buffer_;
ui::vulkan::SingleTypeDescriptorSetAllocator ui::vulkan::LinkedTypeDescriptorSetAllocator
transient_descriptor_allocator_storage_buffer_; transient_descriptor_allocator_storage_buffer_;
std::deque<UsedSingleTransientDescriptor> single_transient_descriptors_used_; std::deque<UsedSingleTransientDescriptor> single_transient_descriptors_used_;
std::array<std::vector<VkDescriptorSet>, std::array<std::vector<VkDescriptorSet>,
size_t(SingleTransientDescriptorLayout::kCount)> size_t(SingleTransientDescriptorLayout::kCount)>
single_transient_descriptors_free_; single_transient_descriptors_free_;
// <Usage frame, set>.
std::deque<std::pair<uint64_t, VkDescriptorSet>>
constants_transient_descriptors_used_;
std::vector<VkDescriptorSet> constants_transient_descriptors_free_;
ui::vulkan::SingleTypeDescriptorSetAllocator ui::vulkan::LinkedTypeDescriptorSetAllocator
transient_descriptor_allocator_sampled_image_; transient_descriptor_allocator_textures_;
ui::vulkan::SingleTypeDescriptorSetAllocator
transient_descriptor_allocator_sampler_;
std::deque<UsedTextureTransientDescriptorSet> std::deque<UsedTextureTransientDescriptorSet>
texture_transient_descriptor_sets_used_; texture_transient_descriptor_sets_used_;
std::unordered_map<TextureDescriptorSetLayoutKey, std::unordered_map<TextureDescriptorSetLayoutKey,
@ -701,6 +695,11 @@ class VulkanCommandProcessor : public CommandProcessor {
// Pipeline layout of the current guest graphics pipeline. // Pipeline layout of the current guest graphics pipeline.
const PipelineLayout* current_guest_graphics_pipeline_layout_; const PipelineLayout* current_guest_graphics_pipeline_layout_;
VkDescriptorBufferInfo current_constant_buffer_infos_
[SpirvShaderTranslator::kConstantBufferCount];
// Whether up-to-date data has been written to constant (uniform) buffers, and
// the buffer infos in current_constant_buffer_infos_ point to them.
uint32_t current_constant_buffers_up_to_date_;
VkDescriptorSet current_graphics_descriptor_sets_ VkDescriptorSet current_graphics_descriptor_sets_
[SpirvShaderTranslator::kDescriptorSetCount]; [SpirvShaderTranslator::kDescriptorSetCount];
// Whether descriptor sets in current_graphics_descriptor_sets_ point to // Whether descriptor sets in current_graphics_descriptor_sets_ point to

View File

@ -661,6 +661,12 @@ void VulkanRenderTargetCache::Shutdown(bool from_destructor) {
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn(); const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device(); VkDevice device = provider.device();
// Destroy all render targets before the descriptor set pool is destroyed -
// may happen if shutting down the VulkanRenderTargetCache by destroying it,
// so ShutdownCommon is called by the RenderTargetCache destructor, when it's
// already too late.
DestroyAllRenderTargets(true);
for (const auto& dump_pipeline_pair : dump_pipelines_) { for (const auto& dump_pipeline_pair : dump_pipelines_) {
// May be null to prevent recreation attempts. // May be null to prevent recreation attempts.
if (dump_pipeline_pair.second != VK_NULL_HANDLE) { if (dump_pipeline_pair.second != VK_NULL_HANDLE) {

View File

@ -647,7 +647,7 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
TransferInvocation(const Transfer& transfer, TransferInvocation(const Transfer& transfer,
const TransferShaderKey& shader_key) const TransferShaderKey& shader_key)
: transfer(transfer), shader_key(shader_key) {} : transfer(transfer), shader_key(shader_key) {}
bool operator<(const TransferInvocation& other_invocation) { bool operator<(const TransferInvocation& other_invocation) const {
// TODO(Triang3l): See if it may be better to sort by the source in the // TODO(Triang3l): See if it may be better to sort by the source in the
// first place, especially when reading the same data multiple times (like // first place, especially when reading the same data multiple times (like
// to write the stencil bits after depth) for better read locality. // to write the stencil bits after depth) for better read locality.
@ -784,7 +784,7 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
DumpInvocation(const ResolveCopyDumpRectangle& rectangle, DumpInvocation(const ResolveCopyDumpRectangle& rectangle,
const DumpPipelineKey& pipeline_key) const DumpPipelineKey& pipeline_key)
: rectangle(rectangle), pipeline_key(pipeline_key) {} : rectangle(rectangle), pipeline_key(pipeline_key) {}
bool operator<(const DumpInvocation& other_invocation) { bool operator<(const DumpInvocation& other_invocation) const {
// Sort by the pipeline key primarily to reduce pipeline state (context) // Sort by the pipeline key primarily to reduce pipeline state (context)
// switches. // switches.
if (pipeline_key != other_invocation.pipeline_key) { if (pipeline_key != other_invocation.pipeline_key) {

View File

@ -0,0 +1,415 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/vulkan/linked_type_descriptor_set_allocator.h"
#include <algorithm>
#include <iterator>
#include <utility>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/ui/vulkan/vulkan_util.h"
namespace xe {
namespace ui {
namespace vulkan {
void LinkedTypeDescriptorSetAllocator::Reset() {
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorPool, device,
page_usable_latest_.pool);
page_usable_latest_.descriptors_remaining.reset();
for (const std::pair<const uint32_t, Page>& page_pair : pages_usable_) {
dfn.vkDestroyDescriptorPool(device, page_pair.second.pool, nullptr);
}
pages_usable_.clear();
for (VkDescriptorPool pool : pages_full_) {
dfn.vkDestroyDescriptorPool(device, pool, nullptr);
}
pages_full_.clear();
}
VkDescriptorSet LinkedTypeDescriptorSetAllocator::Allocate(
VkDescriptorSetLayout descriptor_set_layout,
const VkDescriptorPoolSize* descriptor_counts,
uint32_t descriptor_type_count) {
assert_not_zero(descriptor_type_count);
#ifndef NDEBUG
for (uint32_t i = 0; i < descriptor_type_count; ++i) {
const VkDescriptorPoolSize& descriptor_count_for_type =
descriptor_counts[i];
assert_not_zero(descriptor_count_for_type.descriptorCount);
for (uint32_t j = 0; j < i; ++j) {
assert_true(descriptor_counts[j].type != descriptor_count_for_type.type);
}
}
#endif
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
VkDescriptorSetAllocateInfo descriptor_set_allocate_info;
descriptor_set_allocate_info.sType =
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
descriptor_set_allocate_info.pNext = nullptr;
descriptor_set_allocate_info.descriptorSetCount = 1;
descriptor_set_allocate_info.pSetLayouts = &descriptor_set_layout;
VkDescriptorSet descriptor_set;
// Check if more descriptors have been requested than a page can hold, or
// descriptors of types not provided by this allocator, and if that's the
// case, create a dedicated pool for this allocation.
bool dedicated_descriptor_pool_needed = false;
for (uint32_t i = 0; i < descriptor_type_count; ++i) {
const VkDescriptorPoolSize& descriptor_count_for_type =
descriptor_counts[i];
// If the type is one that's not supported by the allocator, a dedicated
// pool is required. If it's supported, and the allocator has large enough
// pools to hold the requested number of descriptors,
// dedicated_descriptor_pool_needed will be set to false for this iteration,
// and the loop will continue. Otherwise, if that doesn't happen, a
// dedicated pool is required.
dedicated_descriptor_pool_needed = true;
for (uint32_t j = 0; j < descriptor_pool_size_count_; ++j) {
const VkDescriptorPoolSize& descriptor_pool_size =
descriptor_pool_sizes_[j];
if (descriptor_count_for_type.type != descriptor_pool_size.type) {
continue;
}
if (descriptor_count_for_type.descriptorCount <=
descriptor_pool_size.descriptorCount) {
// For this type, pages can hold enough descriptors.
dedicated_descriptor_pool_needed = false;
}
break;
}
if (dedicated_descriptor_pool_needed) {
// For at least one requested type, pages can't hold enough descriptors.
break;
}
}
if (dedicated_descriptor_pool_needed) {
VkDescriptorPoolCreateInfo dedicated_descriptor_pool_create_info;
dedicated_descriptor_pool_create_info.sType =
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
dedicated_descriptor_pool_create_info.pNext = nullptr;
dedicated_descriptor_pool_create_info.flags = 0;
dedicated_descriptor_pool_create_info.maxSets = 1;
dedicated_descriptor_pool_create_info.poolSizeCount = descriptor_type_count;
dedicated_descriptor_pool_create_info.pPoolSizes = descriptor_counts;
VkDescriptorPool dedicated_descriptor_pool;
if (dfn.vkCreateDescriptorPool(
device, &dedicated_descriptor_pool_create_info, nullptr,
&dedicated_descriptor_pool) != VK_SUCCESS) {
XELOGE(
"LinkedTypeDescriptorSetAllocator: Failed to create a dedicated "
"descriptor pool for a descriptor set that is too large for a pool "
"page");
return VK_NULL_HANDLE;
}
descriptor_set_allocate_info.descriptorPool = dedicated_descriptor_pool;
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
&descriptor_set) != VK_SUCCESS) {
XELOGE(
"LinkedTypeDescriptorSetAllocator: Failed to allocate descriptors in "
"a dedicated pool");
dfn.vkDestroyDescriptorPool(device, dedicated_descriptor_pool, nullptr);
return VK_NULL_HANDLE;
}
pages_full_.push_back(dedicated_descriptor_pool);
return descriptor_set;
}
// Try allocating from the latest page an allocation has happened from, to
// avoid detaching from the map and re-attaching for every allocation.
if (page_usable_latest_.pool != VK_NULL_HANDLE) {
assert_not_zero(page_usable_latest_.descriptor_sets_remaining);
bool allocate_from_latest_page = true;
bool latest_page_becomes_full =
page_usable_latest_.descriptor_sets_remaining == 1;
for (uint32_t i = 0; i < descriptor_type_count; ++i) {
const VkDescriptorPoolSize& descriptor_count_for_type =
descriptor_counts[i];
for (uint32_t j = 0; j < descriptor_pool_size_count_; ++j) {
const VkDescriptorPoolSize& descriptors_remaining_for_type =
page_usable_latest_.descriptors_remaining[j];
if (descriptor_count_for_type.type !=
descriptors_remaining_for_type.type) {
continue;
}
if (descriptor_count_for_type.descriptorCount >=
descriptors_remaining_for_type.descriptorCount) {
if (descriptor_count_for_type.descriptorCount >
descriptors_remaining_for_type.descriptorCount) {
allocate_from_latest_page = false;
break;
}
latest_page_becomes_full = true;
}
}
if (!allocate_from_latest_page) {
break;
}
}
if (allocate_from_latest_page) {
descriptor_set_allocate_info.descriptorPool = page_usable_latest_.pool;
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
&descriptor_set) != VK_SUCCESS) {
descriptor_set = VK_NULL_HANDLE;
// Failed to allocate internally even though there should be enough
// space, don't try to allocate from this pool again at all.
latest_page_becomes_full = true;
}
if (latest_page_becomes_full) {
pages_full_.push_back(page_usable_latest_.pool);
page_usable_latest_.pool = VK_NULL_HANDLE;
page_usable_latest_.descriptors_remaining.reset();
} else {
--page_usable_latest_.descriptor_sets_remaining;
for (uint32_t i = 0; i < descriptor_type_count; ++i) {
const VkDescriptorPoolSize& descriptor_count_for_type =
descriptor_counts[i];
for (uint32_t j = 0; j < descriptor_pool_size_count_; ++j) {
VkDescriptorPoolSize& descriptors_remaining_for_type =
page_usable_latest_.descriptors_remaining[j];
if (descriptor_count_for_type.type !=
descriptors_remaining_for_type.type) {
continue;
}
descriptors_remaining_for_type.descriptorCount -=
descriptor_count_for_type.descriptorCount;
}
}
}
if (descriptor_set != VK_NULL_HANDLE) {
return descriptor_set;
}
}
}
// Count the maximum number of descriptors requested for any type to stop
// searching for pages once they can't satisfy this requirement.
uint32_t max_descriptors_per_type = descriptor_counts[0].descriptorCount;
for (uint32_t i = 1; i < descriptor_type_count; ++i) {
max_descriptors_per_type = std::max(max_descriptors_per_type,
descriptor_counts[i].descriptorCount);
}
// If allocating from the latest pool wasn't possible, pick any that has
// enough free space. Prefer filling pages that have the most free space as
// they can more likely be used for more allocations later.
auto page_usable_it_next = pages_usable_.rbegin();
while (page_usable_it_next != pages_usable_.rend()) {
auto page_usable_it = page_usable_it_next;
++page_usable_it_next;
if (page_usable_it->first < max_descriptors_per_type) {
// All other pages_usable_ entries have smaller maximum number of free
// descriptor for any type (it's the map key).
break;
}
// Check if the page has enough free descriptors for all requested types,
// and whether allocating the requested number of descriptors in it will
// result in the page becoming full.
bool map_page_has_sufficient_space = true;
bool map_page_becomes_full =
page_usable_it->second.descriptor_sets_remaining == 1;
for (uint32_t i = 0; i < descriptor_type_count; ++i) {
const VkDescriptorPoolSize& descriptor_count_for_type =
descriptor_counts[i];
for (uint32_t j = 0; j < descriptor_pool_size_count_; ++j) {
const VkDescriptorPoolSize& descriptors_remaining_for_type =
page_usable_it->second.descriptors_remaining[j];
if (descriptor_count_for_type.type !=
descriptors_remaining_for_type.type) {
continue;
}
if (descriptor_count_for_type.descriptorCount >=
descriptors_remaining_for_type.descriptorCount) {
if (descriptor_count_for_type.descriptorCount >
descriptors_remaining_for_type.descriptorCount) {
map_page_has_sufficient_space = false;
break;
}
map_page_becomes_full = true;
}
}
if (!map_page_has_sufficient_space) {
break;
}
}
if (!map_page_has_sufficient_space) {
// Even though the coarse (maximum number of descriptors for any type)
// check has passed, for the exact types requested this page doesn't have
// sufficient space - try another one.
continue;
}
// Remove the page from the map unconditionally - in case of a successful
// allocation, it will have a different number of free descriptors for
// different types, thus potentially a new map key (but it will also become
// page_usable_latest_ instead even), or will become full, and in case of a
// failure to allocate internally even though there still should be enough
// space, it should never be allocated from again.
Page map_page = std::move(page_usable_it->second);
// Convert the reverse iterator to a forward iterator for erasing.
pages_usable_.erase(std::next(page_usable_it).base());
descriptor_set_allocate_info.descriptorPool = map_page.pool;
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
&descriptor_set) != VK_SUCCESS) {
descriptor_set = VK_NULL_HANDLE;
// Failed to allocate internally even though there should be enough space,
// don't try to allocate from this pool again at all.
map_page_becomes_full = true;
}
if (map_page_becomes_full) {
map_page.descriptors_remaining.reset();
pages_full_.push_back(map_page.pool);
} else {
--map_page.descriptor_sets_remaining;
for (uint32_t i = 0; i < descriptor_type_count; ++i) {
const VkDescriptorPoolSize& descriptor_count_for_type =
descriptor_counts[i];
for (uint32_t j = 0; j < descriptor_pool_size_count_; ++j) {
VkDescriptorPoolSize& descriptors_remaining_for_type =
map_page.descriptors_remaining[j];
if (descriptor_count_for_type.type !=
descriptors_remaining_for_type.type) {
continue;
}
descriptors_remaining_for_type.descriptorCount -=
descriptor_count_for_type.descriptorCount;
}
}
// Move the latest page that allocation couldn't be done in to the usable
// pages to replace it with the new one.
if (page_usable_latest_.pool != VK_NULL_HANDLE) {
// Calculate the map key (the maximum number of remaining descriptors of
// any type).
uint32_t latest_page_max_descriptors_remaining =
page_usable_latest_.descriptors_remaining[0].descriptorCount;
for (uint32_t i = 1; i < descriptor_pool_size_count_; ++i) {
latest_page_max_descriptors_remaining = std::max(
latest_page_max_descriptors_remaining,
page_usable_latest_.descriptors_remaining[i].descriptorCount);
}
assert_not_zero(latest_page_max_descriptors_remaining);
pages_usable_.emplace(latest_page_max_descriptors_remaining,
std::move(page_usable_latest_));
}
page_usable_latest_ = std::move(map_page);
}
if (descriptor_set != VK_NULL_HANDLE) {
return descriptor_set;
}
}
// Try allocating from a new page.
// See if the new page has instantly become full.
bool new_page_becomes_full = descriptor_sets_per_page_ == 1;
for (uint32_t i = 0; !new_page_becomes_full && i < descriptor_type_count;
++i) {
const VkDescriptorPoolSize& descriptor_count_for_type =
descriptor_counts[i];
for (uint32_t j = 0; j < descriptor_pool_size_count_; ++j) {
const VkDescriptorPoolSize& descriptors_remaining_for_type =
descriptor_pool_sizes_[j];
if (descriptor_count_for_type.type !=
descriptors_remaining_for_type.type) {
continue;
}
assert_true(descriptor_count_for_type.descriptorCount <=
descriptors_remaining_for_type.descriptorCount);
if (descriptor_count_for_type.descriptorCount >=
descriptors_remaining_for_type.descriptorCount) {
new_page_becomes_full = true;
break;
}
}
}
// Allocate from a new page. However, if the new page becomes full
// immediately, create a dedicated pool instead for the exact number of
// descriptors not to leave any unused space in the pool.
VkDescriptorPoolCreateInfo new_descriptor_pool_create_info;
new_descriptor_pool_create_info.sType =
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
new_descriptor_pool_create_info.pNext = nullptr;
new_descriptor_pool_create_info.flags = 0;
if (new_page_becomes_full) {
new_descriptor_pool_create_info.maxSets = 1;
new_descriptor_pool_create_info.poolSizeCount = descriptor_type_count;
new_descriptor_pool_create_info.pPoolSizes = descriptor_counts;
} else {
new_descriptor_pool_create_info.maxSets = descriptor_sets_per_page_;
new_descriptor_pool_create_info.poolSizeCount = descriptor_pool_size_count_;
new_descriptor_pool_create_info.pPoolSizes = descriptor_pool_sizes_.get();
}
VkDescriptorPool new_descriptor_pool;
if (dfn.vkCreateDescriptorPool(device, &new_descriptor_pool_create_info,
nullptr, &new_descriptor_pool) != VK_SUCCESS) {
XELOGE(
"LinkedTypeDescriptorSetAllocator: Failed to create a descriptor pool");
return VK_NULL_HANDLE;
}
descriptor_set_allocate_info.descriptorPool = new_descriptor_pool;
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
&descriptor_set) != VK_SUCCESS) {
XELOGE("LinkedTypeDescriptorSetAllocator: Failed to allocate descriptors");
dfn.vkDestroyDescriptorPool(device, new_descriptor_pool, nullptr);
return VK_NULL_HANDLE;
}
if (new_page_becomes_full) {
pages_full_.push_back(new_descriptor_pool);
} else {
// Move the latest page that allocation couldn't be done in to the usable
// pages to replace it with the new one.
if (page_usable_latest_.pool != VK_NULL_HANDLE) {
// Calculate the map key (the maximum number of remaining descriptors of
// any type).
uint32_t latest_page_max_descriptors_remaining =
page_usable_latest_.descriptors_remaining[0].descriptorCount;
for (uint32_t i = 1; i < descriptor_pool_size_count_; ++i) {
latest_page_max_descriptors_remaining = std::max(
latest_page_max_descriptors_remaining,
page_usable_latest_.descriptors_remaining[i].descriptorCount);
}
assert_not_zero(latest_page_max_descriptors_remaining);
pages_usable_.emplace(latest_page_max_descriptors_remaining,
std::move(page_usable_latest_));
}
page_usable_latest_.pool = new_descriptor_pool;
page_usable_latest_.descriptors_remaining =
std::unique_ptr<VkDescriptorPoolSize[]>(
new VkDescriptorPoolSize[descriptor_pool_size_count_]);
for (uint32_t i = 0; i < descriptor_pool_size_count_; ++i) {
const VkDescriptorPoolSize& descriptor_pool_size_for_type =
descriptor_pool_sizes_[i];
page_usable_latest_.descriptors_remaining[i] =
descriptor_pool_size_for_type;
for (uint32_t j = 0; j < descriptor_type_count; ++j) {
const VkDescriptorPoolSize& descriptor_count_for_type =
descriptor_counts[j];
if (descriptor_count_for_type.type !=
descriptor_pool_size_for_type.type) {
continue;
}
page_usable_latest_.descriptors_remaining[i].descriptorCount -=
descriptor_count_for_type.descriptorCount;
break;
}
}
page_usable_latest_.descriptor_sets_remaining =
descriptor_sets_per_page_ - 1;
}
return descriptor_set;
}
} // namespace vulkan
} // namespace ui
} // namespace xe

View File

@ -0,0 +1,125 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_VULKAN_LINKED_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_
#define XENIA_UI_VULKAN_LINKED_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_
#include <algorithm>
#include <cstdint>
#include <cstring>
#include <map>
#include <memory>
#include <vector>
#include "xenia/base/assert.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace ui {
namespace vulkan {
// Allocates multiple descriptors of in descriptor set layouts consisting of
// descriptors of types specified during initialization.
//
// "LinkedType" means that the allocator is designed for allocating descriptor
// sets containing descriptors of multiple types together - for instance, it
// will mark the entire page as full even if no space is left in it for just one
// of the descriptor types (not all at once).
//
// The primary usage scenario for this kind of an allocator is allocating image
// and sampler descriptors in a single descriptor set if they both are actually
// used in one. It is expected that the ratio of the numbers of descriptors per
// type specified during the initialization will roughly correspond to the ratio
// of the numbers of descriptors that will actually be allocated. For instance,
// if there are approximately 2 images for each 1 sampler, it's recommended to
// make the image count per page twice the sampler count per page.
//
// If some allocations use just one type, and some use just another, completely
// independently, it's preferable to use separate allocators rather than a
// single one.
//
// This allocator is also suitable for allocating variable-length descriptor
// sets containing descriptors of just a single type.
//
// There's no way to free these descriptors within the allocator object itself,
// per-layout free lists should be used externally.
class LinkedTypeDescriptorSetAllocator {
public:
// Multiple descriptor sizes for the same descriptor type, and zero sizes, are
// not allowed.
explicit LinkedTypeDescriptorSetAllocator(
const ui::vulkan::VulkanProvider& provider,
const VkDescriptorPoolSize* descriptor_sizes,
uint32_t descriptor_size_count, uint32_t descriptor_sets_per_page)
: provider_(provider),
descriptor_pool_sizes_(new VkDescriptorPoolSize[descriptor_size_count]),
descriptor_pool_size_count_(descriptor_size_count),
descriptor_sets_per_page_(descriptor_sets_per_page) {
assert_not_zero(descriptor_size_count);
assert_not_zero(descriptor_sets_per_page_);
#ifndef NDEBUG
for (uint32_t i = 0; i < descriptor_size_count; ++i) {
const VkDescriptorPoolSize& descriptor_size = descriptor_sizes[i];
assert_not_zero(descriptor_size.descriptorCount);
for (uint32_t j = 0; j < i; ++j) {
assert_true(descriptor_sizes[j].type != descriptor_size.type);
}
}
#endif
std::memcpy(descriptor_pool_sizes_.get(), descriptor_sizes,
sizeof(VkDescriptorPoolSize) * descriptor_size_count);
}
LinkedTypeDescriptorSetAllocator(
const LinkedTypeDescriptorSetAllocator& allocator) = delete;
LinkedTypeDescriptorSetAllocator& operator=(
const LinkedTypeDescriptorSetAllocator& allocator) = delete;
~LinkedTypeDescriptorSetAllocator() { Reset(); }
void Reset();
VkDescriptorSet Allocate(VkDescriptorSetLayout descriptor_set_layout,
const VkDescriptorPoolSize* descriptor_counts,
uint32_t descriptor_type_count);
private:
struct Page {
VkDescriptorPool pool;
std::unique_ptr<VkDescriptorPoolSize[]> descriptors_remaining;
uint32_t descriptor_sets_remaining;
};
const ui::vulkan::VulkanProvider& provider_;
std::unique_ptr<VkDescriptorPoolSize[]> descriptor_pool_sizes_;
uint32_t descriptor_pool_size_count_;
uint32_t descriptor_sets_per_page_;
std::vector<VkDescriptorPool> pages_full_;
// Because allocations must be contiguous, overflow may happen even if a page
// still has free descriptors, so multiple pages may have free space.
// To avoid removing and re-adding the page to the map that keeps them sorted
// (the key is the maximum number of free descriptors remaining across all
// types - and lookups need to be made with the maximum of the requested
// number of descriptors across all types since it's pointless to check the
// pages that can't even potentially fit the largest amount of descriptors of
// a requested type, and unlike using the minimum as the key, this doesn't
// degenerate if, for example, 0 descriptors are requested for some type - and
// it changes at every allocation from a page), instead of always looking for
// a free space in the map, maintaining one page outside the map, and
// allocation attempts will be made from that page first.
std::multimap<uint32_t, Page> pages_usable_;
// Doesn't exist if page_usable_latest_.pool == VK_NULL_HANDLE.
Page page_usable_latest_ = {};
};
} // namespace vulkan
} // namespace ui
} // namespace xe
#endif // XENIA_UI_VULKAN_CONNECTED_DESCRIPTOR_SET_ALLOCATOR_H_

View File

@ -1,216 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/vulkan/single_type_descriptor_set_allocator.h"
#include "xenia/base/logging.h"
#include "xenia/ui/vulkan/vulkan_util.h"
namespace xe {
namespace ui {
namespace vulkan {
void SingleTypeDescriptorSetAllocator::Reset() {
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorPool, device,
page_usable_latest_.pool);
for (const std::pair<uint32_t, Page>& page_pair : pages_usable_) {
dfn.vkDestroyDescriptorPool(device, page_pair.second.pool, nullptr);
}
pages_usable_.clear();
for (VkDescriptorPool pool : pages_full_) {
dfn.vkDestroyDescriptorPool(device, pool, nullptr);
}
pages_full_.clear();
}
VkDescriptorSet SingleTypeDescriptorSetAllocator::Allocate(
VkDescriptorSetLayout descriptor_set_layout, uint32_t descriptor_count) {
assert_not_zero(descriptor_count);
if (descriptor_count == 0) {
return VK_NULL_HANDLE;
}
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
VkDescriptorSetAllocateInfo descriptor_set_allocate_info;
descriptor_set_allocate_info.sType =
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
descriptor_set_allocate_info.pNext = nullptr;
descriptor_set_allocate_info.descriptorSetCount = 1;
descriptor_set_allocate_info.pSetLayouts = &descriptor_set_layout;
VkDescriptorSet descriptor_set;
if (descriptor_count > descriptor_pool_size_.descriptorCount) {
// Can't allocate in the pool, need a dedicated allocation.
VkDescriptorPoolSize dedicated_descriptor_pool_size;
dedicated_descriptor_pool_size.type = descriptor_pool_size_.type;
dedicated_descriptor_pool_size.descriptorCount = descriptor_count;
VkDescriptorPoolCreateInfo dedicated_descriptor_pool_create_info;
dedicated_descriptor_pool_create_info.sType =
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
dedicated_descriptor_pool_create_info.pNext = nullptr;
dedicated_descriptor_pool_create_info.flags = 0;
dedicated_descriptor_pool_create_info.maxSets = 1;
dedicated_descriptor_pool_create_info.poolSizeCount = 1;
dedicated_descriptor_pool_create_info.pPoolSizes =
&dedicated_descriptor_pool_size;
VkDescriptorPool dedicated_descriptor_pool;
if (dfn.vkCreateDescriptorPool(
device, &dedicated_descriptor_pool_create_info, nullptr,
&dedicated_descriptor_pool) != VK_SUCCESS) {
XELOGE(
"SingleTypeDescriptorSetAllocator: Failed to create a dedicated pool "
"for {} descriptors",
dedicated_descriptor_pool_size.descriptorCount);
return VK_NULL_HANDLE;
}
descriptor_set_allocate_info.descriptorPool = dedicated_descriptor_pool;
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
&descriptor_set) != VK_SUCCESS) {
XELOGE(
"SingleTypeDescriptorSetAllocator: Failed to allocate {} descriptors "
"in a dedicated pool",
descriptor_count);
dfn.vkDestroyDescriptorPool(device, dedicated_descriptor_pool, nullptr);
return VK_NULL_HANDLE;
}
pages_full_.push_back(dedicated_descriptor_pool);
return descriptor_set;
}
// Try allocating from the latest page an allocation has happened from, to
// avoid detaching from the map and re-attaching for every allocation.
if (page_usable_latest_.pool != VK_NULL_HANDLE) {
assert_not_zero(page_usable_latest_.descriptors_remaining);
assert_not_zero(page_usable_latest_.descriptor_sets_remaining);
if (page_usable_latest_.descriptors_remaining >= descriptor_count) {
descriptor_set_allocate_info.descriptorPool = page_usable_latest_.pool;
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
&descriptor_set) == VK_SUCCESS) {
page_usable_latest_.descriptors_remaining -= descriptor_count;
--page_usable_latest_.descriptor_sets_remaining;
if (!page_usable_latest_.descriptors_remaining ||
!page_usable_latest_.descriptor_sets_remaining) {
pages_full_.push_back(page_usable_latest_.pool);
page_usable_latest_.pool = VK_NULL_HANDLE;
}
return descriptor_set;
}
// Failed to allocate internally even though there should be enough space,
// don't try to allocate from this pool again at all.
pages_full_.push_back(page_usable_latest_.pool);
page_usable_latest_.pool = VK_NULL_HANDLE;
}
}
// If allocating from the latest pool wasn't possible, pick any that has free
// space. Prefer filling pages that have the most free space as they can more
// likely be used for more allocations later.
while (!pages_usable_.empty()) {
auto page_usable_last_it = std::prev(pages_usable_.cend());
if (page_usable_last_it->second.descriptors_remaining < descriptor_count) {
// All other pages_usable_ entries have fewer free descriptors too (the
// remaining count is the map key).
break;
}
// Remove the page from the map unconditionally - in case of a successful
// allocation, it will have a different number of free descriptors, thus a
// new map key (but it will also become page_usable_latest_ instead even),
// or will become full, and in case of a failure to allocate internally even
// though there still should be enough space, it should never be allocated
// from again.
Page map_page = page_usable_last_it->second;
pages_usable_.erase(page_usable_last_it);
descriptor_set_allocate_info.descriptorPool = map_page.pool;
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
&descriptor_set) != VK_SUCCESS) {
pages_full_.push_back(map_page.pool);
continue;
}
map_page.descriptors_remaining -= descriptor_count;
--map_page.descriptor_sets_remaining;
if (!map_page.descriptors_remaining ||
!map_page.descriptor_sets_remaining) {
pages_full_.push_back(map_page.pool);
} else {
if (page_usable_latest_.pool != VK_NULL_HANDLE) {
// Make the page with more free descriptors the next to allocate from.
if (map_page.descriptors_remaining >
page_usable_latest_.descriptors_remaining) {
pages_usable_.emplace(page_usable_latest_.descriptors_remaining,
page_usable_latest_);
page_usable_latest_ = map_page;
} else {
pages_usable_.emplace(map_page.descriptors_remaining, map_page);
}
} else {
page_usable_latest_ = map_page;
}
}
return descriptor_set;
}
// Try allocating from a new page.
VkDescriptorPoolCreateInfo new_descriptor_pool_create_info;
new_descriptor_pool_create_info.sType =
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
new_descriptor_pool_create_info.pNext = nullptr;
new_descriptor_pool_create_info.flags = 0;
new_descriptor_pool_create_info.maxSets = descriptor_sets_per_page_;
new_descriptor_pool_create_info.poolSizeCount = 1;
new_descriptor_pool_create_info.pPoolSizes = &descriptor_pool_size_;
VkDescriptorPool new_descriptor_pool;
if (dfn.vkCreateDescriptorPool(device, &new_descriptor_pool_create_info,
nullptr, &new_descriptor_pool) != VK_SUCCESS) {
XELOGE(
"SingleTypeDescriptorSetAllocator: Failed to create a pool for {} sets "
"with {} descriptors",
descriptor_sets_per_page_, descriptor_pool_size_.descriptorCount);
return VK_NULL_HANDLE;
}
descriptor_set_allocate_info.descriptorPool = new_descriptor_pool;
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
&descriptor_set) != VK_SUCCESS) {
XELOGE(
"SingleTypeDescriptorSetAllocator: Failed to allocate {} descriptors",
descriptor_count);
dfn.vkDestroyDescriptorPool(device, new_descriptor_pool, nullptr);
return VK_NULL_HANDLE;
}
Page new_page;
new_page.pool = new_descriptor_pool;
new_page.descriptors_remaining =
descriptor_pool_size_.descriptorCount - descriptor_count;
new_page.descriptor_sets_remaining = descriptor_sets_per_page_ - 1;
if (!new_page.descriptors_remaining || !new_page.descriptor_sets_remaining) {
pages_full_.push_back(new_page.pool);
} else {
if (page_usable_latest_.pool != VK_NULL_HANDLE) {
// Make the page with more free descriptors the next to allocate from.
if (new_page.descriptors_remaining >
page_usable_latest_.descriptors_remaining) {
pages_usable_.emplace(page_usable_latest_.descriptors_remaining,
page_usable_latest_);
page_usable_latest_ = new_page;
} else {
pages_usable_.emplace(new_page.descriptors_remaining, new_page);
}
} else {
page_usable_latest_ = new_page;
}
}
return descriptor_set;
}
} // namespace vulkan
} // namespace ui
} // namespace xe

View File

@ -1,84 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_
#define XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_
#include <algorithm>
#include <cstdint>
#include <map>
#include <vector>
#include "xenia/base/assert.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace ui {
namespace vulkan {
// Allocates multiple descriptors of a single type in descriptor set layouts
// consisting of descriptors of only that type. There's no way to free these
// descriptors within the SingleTypeDescriptorSetAllocator, per-layout free
// lists should be used externally.
class SingleTypeDescriptorSetAllocator {
public:
explicit SingleTypeDescriptorSetAllocator(
const ui::vulkan::VulkanProvider& provider,
VkDescriptorType descriptor_type, uint32_t descriptors_per_page,
uint32_t descriptor_sets_per_page)
: provider_(provider),
descriptor_sets_per_page_(descriptor_sets_per_page) {
assert_not_zero(descriptor_sets_per_page_);
descriptor_pool_size_.type = descriptor_type;
// Not allocating sets with 0 descriptors using the allocator - pointless to
// have the descriptor count below the set count.
descriptor_pool_size_.descriptorCount =
std::max(descriptors_per_page, descriptor_sets_per_page);
}
SingleTypeDescriptorSetAllocator(
const SingleTypeDescriptorSetAllocator& allocator) = delete;
SingleTypeDescriptorSetAllocator& operator=(
const SingleTypeDescriptorSetAllocator& allocator) = delete;
~SingleTypeDescriptorSetAllocator() { Reset(); }
void Reset();
VkDescriptorSet Allocate(VkDescriptorSetLayout descriptor_set_layout,
uint32_t descriptor_count);
private:
struct Page {
VkDescriptorPool pool;
uint32_t descriptors_remaining;
uint32_t descriptor_sets_remaining;
};
const ui::vulkan::VulkanProvider& provider_;
VkDescriptorPoolSize descriptor_pool_size_;
uint32_t descriptor_sets_per_page_;
std::vector<VkDescriptorPool> pages_full_;
// Because allocations must be contiguous, overflow may happen even if a page
// still has free descriptors, so multiple pages may have free space.
// To avoid removing and re-adding the page to the map that keeps them sorted
// (the key is the number of free descriptors remaining, and it changes at
// every allocation from a page), instead of always looking for a free space
// in the map, maintaining one page outside the map, and allocation attempts
// will be made from that page first.
std::multimap<uint32_t, Page> pages_usable_;
// Doesn't exist if page_usable_latest_.pool == VK_NULL_HANDLE.
Page page_usable_latest_ = {};
};
} // namespace vulkan
} // namespace ui
} // namespace xe
#endif // XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_

@ -1 +1 @@
Subproject commit 7eba2825887e49d3a72b30e0a7480bd427a5bab0 Subproject commit fe71eb790c7d085cd3c6a7b71a50167b4da06e69