Merge branch 'master' of https://github.com/xenia-project/xenia into canary_experimental
This commit is contained in:
commit
1d00372e6b
33
.drone.star
33
.drone.star
|
@ -38,51 +38,40 @@ def targets_android(platform):
|
|||
'imgui',
|
||||
'mspack',
|
||||
'snappy',
|
||||
'spirv-tools',
|
||||
'xxhash',
|
||||
# 'xenia-core',
|
||||
'xenia-core',
|
||||
# 'xenia-app-discord',
|
||||
# 'xenia-apu',
|
||||
# 'xenia-apu-nop',
|
||||
'xenia-apu',
|
||||
'xenia-apu-nop',
|
||||
'xenia-base',
|
||||
'xenia-base-tests',
|
||||
# 'xenia-cpu',
|
||||
'xenia-cpu',
|
||||
# 'xenia-cpu-tests',
|
||||
# 'xenia-cpu-ppc-tests',
|
||||
# 'xenia-cpu-backend-x64',
|
||||
# 'xenia-debug-ui',
|
||||
# 'xenia-gpu',
|
||||
# 'xenia-gpu-shader-compiler',
|
||||
# 'xenia-gpu-null',
|
||||
# 'xenia-gpu-vulkan',
|
||||
'xenia-gpu',
|
||||
'xenia-gpu-shader-compiler',
|
||||
'xenia-gpu-null',
|
||||
'xenia-gpu-vulkan',
|
||||
# 'xenia-gpu-vulkan-trace-viewer',
|
||||
# 'xenia-gpu-vulkan-trace-dump',
|
||||
'xenia-gpu-vulkan-trace-dump',
|
||||
'xenia-hid',
|
||||
# 'xenia-hid-demo',
|
||||
'xenia-hid-nop',
|
||||
# 'xenia-kernel',
|
||||
'xenia-kernel',
|
||||
'xenia-ui',
|
||||
'xenia-ui-spirv',
|
||||
# 'xenia-ui-vulkan',
|
||||
'xenia-ui-vulkan',
|
||||
# 'xenia-ui-window-vulkan-demo',
|
||||
'xenia-vfs',
|
||||
'xenia-vfs-dump',
|
||||
]
|
||||
if platform == 'Android-x86_64':
|
||||
targets.extend([
|
||||
'xenia-core',
|
||||
'xenia-apu',
|
||||
'xenia-apu-nop',
|
||||
'xenia-cpu',
|
||||
'xenia-cpu-tests',
|
||||
'xenia-cpu-ppc-tests',
|
||||
'xenia-cpu-backend-x64',
|
||||
'xenia-debug-ui',
|
||||
'xenia-gpu',
|
||||
'xenia-gpu-null',
|
||||
'xenia-gpu-vulkan',
|
||||
'xenia-gpu-shader-compiler',
|
||||
'xenia-kernel',
|
||||
])
|
||||
return targets
|
||||
|
||||
|
|
|
@ -15,7 +15,6 @@ project("xenia-app")
|
|||
"xenia-base",
|
||||
"xenia-core",
|
||||
"xenia-cpu",
|
||||
"xenia-cpu-backend-x64",
|
||||
"xenia-debug-ui",
|
||||
"xenia-gpu",
|
||||
"xenia-gpu-null",
|
||||
|
@ -60,6 +59,11 @@ project("xenia-app")
|
|||
project_root,
|
||||
})
|
||||
|
||||
filter("architecture:x86_64")
|
||||
links({
|
||||
"xenia-cpu-backend-x64",
|
||||
})
|
||||
|
||||
filter("platforms:Windows")
|
||||
files({
|
||||
"main_resources.rc",
|
||||
|
|
|
@ -477,7 +477,7 @@ void EmulatorApp::EmulatorThread() {
|
|||
// Setup and initialize all subsystems. If we can't do something
|
||||
// (unsupported system, memory issues, etc) this will fail early.
|
||||
X_STATUS result = emulator_->Setup(
|
||||
emulator_window_->window(), emulator_window_->imgui_drawer(),
|
||||
emulator_window_->window(), emulator_window_->imgui_drawer(), true,
|
||||
CreateAudioSystem, CreateGraphicsSystem, CreateInputDrivers);
|
||||
if (XFAILED(result)) {
|
||||
XELOGE("Failed to setup emulator: {:08X}", result);
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/base/exception_handler.h"
|
||||
|
||||
namespace xe {
|
||||
|
||||
// Based on VIXL Instruction::IsLoad and IsStore.
|
||||
// https://github.com/Linaro/vixl/blob/d48909dd0ac62197edb75d26ed50927e4384a199/src/aarch64/instructions-aarch64.cc#L484
|
||||
//
|
||||
// Copyright 2015, VIXL authors
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of ARM Limited nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
|
||||
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
bool IsArm64LoadPrefetchStore(uint32_t instruction, bool& is_store_out) {
|
||||
if ((instruction & kArm64LoadLiteralFMask) == kArm64LoadLiteralFixed) {
|
||||
return true;
|
||||
}
|
||||
if ((instruction & kArm64LoadStoreAnyFMask) != kArm64LoadStoreAnyFixed) {
|
||||
return false;
|
||||
}
|
||||
if ((instruction & kArm64LoadStorePairAnyFMask) ==
|
||||
kArm64LoadStorePairAnyFixed) {
|
||||
is_store_out = !(instruction & kArm64LoadStorePairLoadBit);
|
||||
return true;
|
||||
}
|
||||
switch (Arm64LoadStoreOp(instruction & kArm64LoadStoreMask)) {
|
||||
case Arm64LoadStoreOp::kLDRB_w:
|
||||
case Arm64LoadStoreOp::kLDRH_w:
|
||||
case Arm64LoadStoreOp::kLDR_w:
|
||||
case Arm64LoadStoreOp::kLDR_x:
|
||||
case Arm64LoadStoreOp::kLDRSB_x:
|
||||
case Arm64LoadStoreOp::kLDRSH_x:
|
||||
case Arm64LoadStoreOp::kLDRSW_x:
|
||||
case Arm64LoadStoreOp::kLDRSB_w:
|
||||
case Arm64LoadStoreOp::kLDRSH_w:
|
||||
case Arm64LoadStoreOp::kLDR_b:
|
||||
case Arm64LoadStoreOp::kLDR_h:
|
||||
case Arm64LoadStoreOp::kLDR_s:
|
||||
case Arm64LoadStoreOp::kLDR_d:
|
||||
case Arm64LoadStoreOp::kLDR_q:
|
||||
case Arm64LoadStoreOp::kPRFM:
|
||||
is_store_out = false;
|
||||
return true;
|
||||
case Arm64LoadStoreOp::kSTRB_w:
|
||||
case Arm64LoadStoreOp::kSTRH_w:
|
||||
case Arm64LoadStoreOp::kSTR_w:
|
||||
case Arm64LoadStoreOp::kSTR_x:
|
||||
case Arm64LoadStoreOp::kSTR_b:
|
||||
case Arm64LoadStoreOp::kSTR_h:
|
||||
case Arm64LoadStoreOp::kSTR_s:
|
||||
case Arm64LoadStoreOp::kSTR_d:
|
||||
case Arm64LoadStoreOp::kSTR_q:
|
||||
is_store_out = true;
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace xe
|
|
@ -2,7 +2,7 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
@ -10,14 +10,97 @@
|
|||
#ifndef XENIA_BASE_EXCEPTION_HANDLER_H_
|
||||
#define XENIA_BASE_EXCEPTION_HANDLER_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/x64_context.h"
|
||||
#include "xenia/base/host_thread_context.h"
|
||||
|
||||
namespace xe {
|
||||
|
||||
// AArch64 load and store decoding based on VIXL.
|
||||
// https://github.com/Linaro/vixl/blob/ae5957cd66517b3f31dbf37e9bf39db6594abfe3/src/aarch64/constants-aarch64.h
|
||||
//
|
||||
// Copyright 2015, VIXL authors
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
// * Neither the name of ARM Limited nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without
|
||||
// specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
|
||||
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// `Instruction address + literal offset` loads.
|
||||
// This includes PRFM_lit.
|
||||
constexpr uint32_t kArm64LoadLiteralFMask = UINT32_C(0x3B000000);
|
||||
constexpr uint32_t kArm64LoadLiteralFixed = UINT32_C(0x18000000);
|
||||
|
||||
constexpr uint32_t kArm64LoadStoreAnyFMask = UINT32_C(0x0A000000);
|
||||
constexpr uint32_t kArm64LoadStoreAnyFixed = UINT32_C(0x08000000);
|
||||
|
||||
constexpr uint32_t kArm64LoadStorePairAnyFMask = UINT32_C(0x3A000000);
|
||||
constexpr uint32_t kArm64LoadStorePairAnyFixed = UINT32_C(0x28000000);
|
||||
constexpr uint32_t kArm64LoadStorePairLoadBit = UINT32_C(1) << 22;
|
||||
|
||||
constexpr uint32_t kArm64LoadStoreMask = UINT32_C(0xC4C00000);
|
||||
enum class Arm64LoadStoreOp : uint32_t {
|
||||
kSTRB_w = UINT32_C(0x00000000),
|
||||
kSTRH_w = UINT32_C(0x40000000),
|
||||
kSTR_w = UINT32_C(0x80000000),
|
||||
kSTR_x = UINT32_C(0xC0000000),
|
||||
kLDRB_w = UINT32_C(0x00400000),
|
||||
kLDRH_w = UINT32_C(0x40400000),
|
||||
kLDR_w = UINT32_C(0x80400000),
|
||||
kLDR_x = UINT32_C(0xC0400000),
|
||||
kLDRSB_x = UINT32_C(0x00800000),
|
||||
kLDRSH_x = UINT32_C(0x40800000),
|
||||
kLDRSW_x = UINT32_C(0x80800000),
|
||||
kLDRSB_w = UINT32_C(0x00C00000),
|
||||
kLDRSH_w = UINT32_C(0x40C00000),
|
||||
kSTR_b = UINT32_C(0x04000000),
|
||||
kSTR_h = UINT32_C(0x44000000),
|
||||
kSTR_s = UINT32_C(0x84000000),
|
||||
kSTR_d = UINT32_C(0xC4000000),
|
||||
kSTR_q = UINT32_C(0x04800000),
|
||||
kLDR_b = UINT32_C(0x04400000),
|
||||
kLDR_h = UINT32_C(0x44400000),
|
||||
kLDR_s = UINT32_C(0x84400000),
|
||||
kLDR_d = UINT32_C(0xC4400000),
|
||||
kLDR_q = UINT32_C(0x04C00000),
|
||||
kPRFM = UINT32_C(0xC0800000),
|
||||
};
|
||||
|
||||
constexpr uint32_t kArm64LoadStoreOffsetFMask = UINT32_C(0x3B200C00);
|
||||
enum class Arm64LoadStoreOffsetFixed : uint32_t {
|
||||
kUnscaledOffset = UINT32_C(0x38000000),
|
||||
kPostIndex = UINT32_C(0x38000400),
|
||||
kPreIndex = UINT32_C(0x38000C00),
|
||||
kRegisterOffset = UINT32_C(0x38200800),
|
||||
};
|
||||
|
||||
constexpr uint32_t kArm64LoadStoreUnsignedOffsetFMask = UINT32_C(0x3B000000);
|
||||
constexpr uint32_t kArm64LoadStoreUnsignedOffsetFixed = UINT32_C(0x39000000);
|
||||
|
||||
bool IsArm64LoadPrefetchStore(uint32_t instruction, bool& is_store_out);
|
||||
|
||||
class Exception {
|
||||
public:
|
||||
enum class Code {
|
||||
|
@ -32,7 +115,7 @@ class Exception {
|
|||
kWrite,
|
||||
};
|
||||
|
||||
void InitializeAccessViolation(X64Context* thread_context,
|
||||
void InitializeAccessViolation(HostThreadContext* thread_context,
|
||||
uint64_t fault_address,
|
||||
AccessViolationOperation operation) {
|
||||
code_ = Code::kAccessViolation;
|
||||
|
@ -40,7 +123,7 @@ class Exception {
|
|||
fault_address_ = fault_address;
|
||||
access_violation_operation_ = operation;
|
||||
}
|
||||
void InitializeIllegalInstruction(X64Context* thread_context) {
|
||||
void InitializeIllegalInstruction(HostThreadContext* thread_context) {
|
||||
code_ = Code::kIllegalInstruction;
|
||||
thread_context_ = thread_context;
|
||||
}
|
||||
|
@ -48,24 +131,67 @@ class Exception {
|
|||
Code code() const { return code_; }
|
||||
|
||||
// Returns the platform-specific thread context info.
|
||||
X64Context* thread_context() const { return thread_context_; }
|
||||
// Note that certain registers must be modified through Modify* proxy
|
||||
// functions rather than directly:
|
||||
// x86-64:
|
||||
// - General-purpose registers (r##, r8-r15).
|
||||
// - XMM registers.
|
||||
// AArch64:
|
||||
// - General-purpose registers (Xn), including FP and LR.
|
||||
// - SIMD and floating-point registers (Vn).
|
||||
HostThreadContext* thread_context() const { return thread_context_; }
|
||||
|
||||
#if XE_ARCH_AMD64
|
||||
// Returns the program counter where the exception occurred.
|
||||
// RIP on x64.
|
||||
uint64_t pc() const { return thread_context_->rip; }
|
||||
// Sets the program counter where execution will resume.
|
||||
void set_resume_pc(uint64_t pc) { thread_context_->rip = pc; }
|
||||
#else
|
||||
// Returns the program counter where the exception occurred.
|
||||
// RIP on x64.
|
||||
uint64_t pc() const {
|
||||
#if XE_ARCH_AMD64
|
||||
return thread_context_->rip;
|
||||
#elif XE_ARCH_ARM64
|
||||
return thread_context_->pc;
|
||||
#else
|
||||
assert_always();
|
||||
return 0;
|
||||
#endif // XE_ARCH
|
||||
}
|
||||
|
||||
// Sets the program counter where execution will resume.
|
||||
void set_resume_pc(uint64_t pc) { assert_always(); }
|
||||
#endif
|
||||
void set_resume_pc(uint64_t pc) {
|
||||
#if XE_ARCH_AMD64
|
||||
thread_context_->rip = pc;
|
||||
#elif XE_ARCH_ARM64
|
||||
thread_context_->pc = pc;
|
||||
#else
|
||||
assert_always();
|
||||
#endif // XE_ARCH
|
||||
}
|
||||
|
||||
#if XE_ARCH_AMD64
|
||||
// The index is relative to X64Register::kIntRegisterFirst.
|
||||
uint64_t& ModifyIntRegister(uint32_t index) {
|
||||
assert_true(index <= 15);
|
||||
modified_int_registers_ |= UINT16_C(1) << index;
|
||||
return thread_context_->int_registers[index];
|
||||
}
|
||||
uint16_t modified_int_registers() const { return modified_int_registers_; }
|
||||
vec128_t& ModifyXmmRegister(uint32_t index) {
|
||||
assert_true(index <= 15);
|
||||
modified_xmm_registers_ |= UINT16_C(1) << index;
|
||||
return thread_context_->xmm_registers[index];
|
||||
}
|
||||
uint16_t modified_xmm_registers() const { return modified_xmm_registers_; }
|
||||
#elif XE_ARCH_ARM64
|
||||
uint64_t& ModifyXRegister(uint32_t index) {
|
||||
assert_true(index <= 30);
|
||||
modified_x_registers_ |= UINT32_C(1) << index;
|
||||
return thread_context_->x[index];
|
||||
}
|
||||
uint32_t modified_x_registers() const { return modified_x_registers_; }
|
||||
vec128_t& ModifyVRegister(uint32_t index) {
|
||||
assert_true(index <= 31);
|
||||
modified_v_registers_ |= UINT32_C(1) << index;
|
||||
return thread_context_->v[index];
|
||||
}
|
||||
uint32_t modified_v_registers() const { return modified_v_registers_; }
|
||||
#endif // XE_ARCH
|
||||
|
||||
// In case of AV, address that was read from/written to.
|
||||
uint64_t fault_address() const { return fault_address_; }
|
||||
|
@ -77,7 +203,14 @@ class Exception {
|
|||
|
||||
private:
|
||||
Code code_ = Code::kInvalidException;
|
||||
X64Context* thread_context_ = nullptr;
|
||||
HostThreadContext* thread_context_ = nullptr;
|
||||
#if XE_ARCH_AMD64
|
||||
uint16_t modified_int_registers_ = 0;
|
||||
uint16_t modified_xmm_registers_ = 0;
|
||||
#elif XE_ARCH_ARM64
|
||||
uint32_t modified_x_registers_ = 0;
|
||||
uint32_t modified_v_registers_ = 0;
|
||||
#endif // XE_ARCH
|
||||
uint64_t fault_address_ = 0;
|
||||
AccessViolationOperation access_violation_operation_ =
|
||||
AccessViolationOperation::kUnknown;
|
||||
|
|
|
@ -1,35 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/base/exception_handler.h"
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/platform_linux.h"
|
||||
|
||||
namespace xe {
|
||||
|
||||
// This can be as large as needed, but isn't often needed.
|
||||
// As we will be sometimes firing many exceptions we want to avoid having to
|
||||
// scan the table too much or invoke many custom handlers.
|
||||
constexpr size_t kMaxHandlerCount = 8;
|
||||
|
||||
// All custom handlers, left-aligned and null terminated.
|
||||
// Executed in order.
|
||||
std::pair<ExceptionHandler::Handler, void*> handlers_[kMaxHandlerCount];
|
||||
|
||||
void ExceptionHandler::Install(Handler fn, void* data) {
|
||||
// TODO(dougvj) stub
|
||||
}
|
||||
|
||||
void ExceptionHandler::Uninstall(Handler fn, void* data) {
|
||||
// TODO(dougvj) stub
|
||||
}
|
||||
|
||||
} // namespace xe
|
|
@ -2,17 +2,285 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2017 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/base/exception_handler.h"
|
||||
|
||||
#include <signal.h>
|
||||
#include <ucontext.h>
|
||||
#include <cstdint>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/host_thread_context.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/platform.h"
|
||||
|
||||
namespace xe {
|
||||
|
||||
// TODO(DrChat): Exception handling on linux.
|
||||
void ExceptionHandler::Install(Handler fn, void* data) {}
|
||||
void ExceptionHandler::Uninstall(Handler fn, void* data) {}
|
||||
bool signal_handlers_installed_ = false;
|
||||
struct sigaction original_sigill_handler_;
|
||||
struct sigaction original_sigsegv_handler_;
|
||||
|
||||
} // namespace xe
|
||||
// This can be as large as needed, but isn't often needed.
|
||||
// As we will be sometimes firing many exceptions we want to avoid having to
|
||||
// scan the table too much or invoke many custom handlers.
|
||||
constexpr size_t kMaxHandlerCount = 8;
|
||||
|
||||
// All custom handlers, left-aligned and null terminated.
|
||||
// Executed in order.
|
||||
std::pair<ExceptionHandler::Handler, void*> handlers_[kMaxHandlerCount];
|
||||
|
||||
static void ExceptionHandlerCallback(int signal_number, siginfo_t* signal_info,
|
||||
void* signal_context) {
|
||||
mcontext_t& mcontext =
|
||||
reinterpret_cast<ucontext_t*>(signal_context)->uc_mcontext;
|
||||
|
||||
HostThreadContext thread_context;
|
||||
|
||||
#if XE_ARCH_AMD64
|
||||
thread_context.rip = uint64_t(mcontext.gregs[REG_RIP]);
|
||||
thread_context.eflags = uint32_t(mcontext.gregs[REG_EFL]);
|
||||
// The REG_ order may be different than the register indices in the
|
||||
// instruction encoding.
|
||||
thread_context.rax = uint64_t(mcontext.gregs[REG_RAX]);
|
||||
thread_context.rcx = uint64_t(mcontext.gregs[REG_RCX]);
|
||||
thread_context.rdx = uint64_t(mcontext.gregs[REG_RDX]);
|
||||
thread_context.rbx = uint64_t(mcontext.gregs[REG_RBX]);
|
||||
thread_context.rsp = uint64_t(mcontext.gregs[REG_RSP]);
|
||||
thread_context.rbp = uint64_t(mcontext.gregs[REG_RBP]);
|
||||
thread_context.rsi = uint64_t(mcontext.gregs[REG_RSI]);
|
||||
thread_context.rdi = uint64_t(mcontext.gregs[REG_RDI]);
|
||||
thread_context.r8 = uint64_t(mcontext.gregs[REG_R8]);
|
||||
thread_context.r9 = uint64_t(mcontext.gregs[REG_R9]);
|
||||
thread_context.r10 = uint64_t(mcontext.gregs[REG_R10]);
|
||||
thread_context.r11 = uint64_t(mcontext.gregs[REG_R11]);
|
||||
thread_context.r12 = uint64_t(mcontext.gregs[REG_R12]);
|
||||
thread_context.r13 = uint64_t(mcontext.gregs[REG_R13]);
|
||||
thread_context.r14 = uint64_t(mcontext.gregs[REG_R14]);
|
||||
thread_context.r15 = uint64_t(mcontext.gregs[REG_R15]);
|
||||
std::memcpy(thread_context.xmm_registers, mcontext.fpregs->_xmm,
|
||||
sizeof(thread_context.xmm_registers));
|
||||
#elif XE_ARCH_ARM64
|
||||
std::memcpy(thread_context.x, mcontext.regs, sizeof(thread_context.x));
|
||||
thread_context.sp = mcontext.sp;
|
||||
thread_context.pc = mcontext.pc;
|
||||
thread_context.pstate = mcontext.pstate;
|
||||
struct fpsimd_context* mcontext_fpsimd = nullptr;
|
||||
struct esr_context* mcontext_esr = nullptr;
|
||||
for (struct _aarch64_ctx* mcontext_extension =
|
||||
reinterpret_cast<struct _aarch64_ctx*>(mcontext.__reserved);
|
||||
mcontext_extension->magic;
|
||||
mcontext_extension = reinterpret_cast<struct _aarch64_ctx*>(
|
||||
reinterpret_cast<uint8_t*>(mcontext_extension) +
|
||||
mcontext_extension->size)) {
|
||||
switch (mcontext_extension->magic) {
|
||||
case FPSIMD_MAGIC:
|
||||
mcontext_fpsimd =
|
||||
reinterpret_cast<struct fpsimd_context*>(mcontext_extension);
|
||||
break;
|
||||
case ESR_MAGIC:
|
||||
mcontext_esr =
|
||||
reinterpret_cast<struct esr_context*>(mcontext_extension);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert_not_null(mcontext_fpsimd);
|
||||
if (mcontext_fpsimd) {
|
||||
thread_context.fpsr = mcontext_fpsimd->fpsr;
|
||||
thread_context.fpcr = mcontext_fpsimd->fpcr;
|
||||
std::memcpy(thread_context.v, mcontext_fpsimd->vregs,
|
||||
sizeof(thread_context.v));
|
||||
}
|
||||
#endif // XE_ARCH
|
||||
|
||||
Exception ex;
|
||||
switch (signal_number) {
|
||||
case SIGILL:
|
||||
ex.InitializeIllegalInstruction(&thread_context);
|
||||
break;
|
||||
case SIGSEGV: {
|
||||
Exception::AccessViolationOperation access_violation_operation;
|
||||
#if XE_ARCH_AMD64
|
||||
// x86_pf_error_code::X86_PF_WRITE
|
||||
constexpr uint64_t kX86PageFaultErrorCodeWrite = UINT64_C(1) << 1;
|
||||
access_violation_operation =
|
||||
(uint64_t(mcontext.gregs[REG_ERR]) & kX86PageFaultErrorCodeWrite)
|
||||
? Exception::AccessViolationOperation::kWrite
|
||||
: Exception::AccessViolationOperation::kRead;
|
||||
#elif XE_ARCH_ARM64
|
||||
// For a Data Abort (EC - ESR_EL1 bits 31:26 - 0b100100 from a lower
|
||||
// Exception Level, 0b100101 without a change in the Exception Level),
|
||||
// bit 6 is 0 for reading from a memory location, 1 for writing to a
|
||||
// memory location.
|
||||
if (mcontext_esr && ((mcontext_esr->esr >> 26) & 0b111110) == 0b100100) {
|
||||
access_violation_operation =
|
||||
(mcontext_esr->esr & (UINT64_C(1) << 6))
|
||||
? Exception::AccessViolationOperation::kWrite
|
||||
: Exception::AccessViolationOperation::kRead;
|
||||
} else {
|
||||
// Determine the memory access direction based on which instruction has
|
||||
// requested it.
|
||||
// esr_context may be unavailable on certain hosts (for instance, on
|
||||
// Android, it was added only in NDK r16 - which is the first NDK
|
||||
// version to support the Android API level 27, while NDK r15 doesn't
|
||||
// have esr_context in its API 26 sigcontext.h).
|
||||
// On AArch64 (unlike on AArch32), the program counter is the address of
|
||||
// the currently executing instruction.
|
||||
bool instruction_is_store;
|
||||
if (IsArm64LoadPrefetchStore(
|
||||
*reinterpret_cast<const uint32_t*>(mcontext.pc),
|
||||
instruction_is_store)) {
|
||||
access_violation_operation =
|
||||
instruction_is_store ? Exception::AccessViolationOperation::kWrite
|
||||
: Exception::AccessViolationOperation::kRead;
|
||||
} else {
|
||||
assert_always(
|
||||
"No ESR in the exception thread context, or it's not a Data "
|
||||
"Abort, and the faulting instruction is not a known load, "
|
||||
"prefetch or store instruction");
|
||||
access_violation_operation =
|
||||
Exception::AccessViolationOperation::kUnknown;
|
||||
}
|
||||
}
|
||||
#else
|
||||
access_violation_operation =
|
||||
Exception::AccessViolationOperation::kUnknown;
|
||||
#endif // XE_ARCH
|
||||
ex.InitializeAccessViolation(
|
||||
&thread_context, reinterpret_cast<uint64_t>(signal_info->si_addr),
|
||||
access_violation_operation);
|
||||
} break;
|
||||
default:
|
||||
assert_unhandled_case(signal_number);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < xe::countof(handlers_) && handlers_[i].first; ++i) {
|
||||
if (handlers_[i].first(&ex, handlers_[i].second)) {
|
||||
// Exception handled.
|
||||
#if XE_ARCH_AMD64
|
||||
mcontext.gregs[REG_RIP] = greg_t(thread_context.rip);
|
||||
mcontext.gregs[REG_EFL] = greg_t(thread_context.eflags);
|
||||
uint32_t modified_register_index;
|
||||
// The order must match the order in X64Register.
|
||||
static const size_t kIntRegisterMap[] = {
|
||||
REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP,
|
||||
REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11,
|
||||
REG_R12, REG_R13, REG_R14, REG_R15,
|
||||
};
|
||||
uint16_t modified_int_registers_remaining = ex.modified_int_registers();
|
||||
while (xe::bit_scan_forward(modified_int_registers_remaining,
|
||||
&modified_register_index)) {
|
||||
modified_int_registers_remaining &=
|
||||
~(UINT16_C(1) << modified_register_index);
|
||||
mcontext.gregs[kIntRegisterMap[modified_register_index]] =
|
||||
thread_context.int_registers[modified_register_index];
|
||||
}
|
||||
uint16_t modified_xmm_registers_remaining = ex.modified_xmm_registers();
|
||||
while (xe::bit_scan_forward(modified_xmm_registers_remaining,
|
||||
&modified_register_index)) {
|
||||
modified_xmm_registers_remaining &=
|
||||
~(UINT16_C(1) << modified_register_index);
|
||||
std::memcpy(&mcontext.fpregs->_xmm[modified_register_index],
|
||||
&thread_context.xmm_registers[modified_register_index],
|
||||
sizeof(vec128_t));
|
||||
}
|
||||
#elif XE_ARCH_ARM64
|
||||
uint32_t modified_register_index;
|
||||
uint32_t modified_x_registers_remaining = ex.modified_x_registers();
|
||||
while (xe::bit_scan_forward(modified_x_registers_remaining,
|
||||
&modified_register_index)) {
|
||||
modified_x_registers_remaining &=
|
||||
~(UINT32_C(1) << modified_register_index);
|
||||
mcontext.regs[modified_register_index] =
|
||||
thread_context.x[modified_register_index];
|
||||
}
|
||||
mcontext.sp = thread_context.sp;
|
||||
mcontext.pc = thread_context.pc;
|
||||
mcontext.pstate = thread_context.pstate;
|
||||
if (mcontext_fpsimd) {
|
||||
mcontext_fpsimd->fpsr = thread_context.fpsr;
|
||||
mcontext_fpsimd->fpcr = thread_context.fpcr;
|
||||
uint32_t modified_v_registers_remaining = ex.modified_v_registers();
|
||||
while (xe::bit_scan_forward(modified_v_registers_remaining,
|
||||
&modified_register_index)) {
|
||||
modified_v_registers_remaining &=
|
||||
~(UINT32_C(1) << modified_register_index);
|
||||
std::memcpy(&mcontext_fpsimd->vregs[modified_register_index],
|
||||
&thread_context.v[modified_register_index],
|
||||
sizeof(vec128_t));
|
||||
mcontext.regs[modified_register_index] =
|
||||
thread_context.x[modified_register_index];
|
||||
}
|
||||
}
|
||||
#endif // XE_ARCH
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ExceptionHandler::Install(Handler fn, void* data) {
|
||||
if (!signal_handlers_installed_) {
|
||||
struct sigaction signal_handler;
|
||||
|
||||
std::memset(&signal_handler, 0, sizeof(signal_handler));
|
||||
signal_handler.sa_sigaction = ExceptionHandlerCallback;
|
||||
signal_handler.sa_flags = SA_SIGINFO;
|
||||
|
||||
if (sigaction(SIGILL, &signal_handler, &original_sigill_handler_) != 0) {
|
||||
assert_always("Failed to install new SIGILL handler");
|
||||
}
|
||||
if (sigaction(SIGSEGV, &signal_handler, &original_sigsegv_handler_) != 0) {
|
||||
assert_always("Failed to install new SIGSEGV handler");
|
||||
}
|
||||
signal_handlers_installed_ = true;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < xe::countof(handlers_); ++i) {
|
||||
if (!handlers_[i].first) {
|
||||
handlers_[i].first = fn;
|
||||
handlers_[i].second = data;
|
||||
return;
|
||||
}
|
||||
}
|
||||
assert_always("Too many exception handlers installed");
|
||||
}
|
||||
|
||||
void ExceptionHandler::Uninstall(Handler fn, void* data) {
|
||||
for (size_t i = 0; i < xe::countof(handlers_); ++i) {
|
||||
if (handlers_[i].first == fn && handlers_[i].second == data) {
|
||||
for (; i < xe::countof(handlers_) - 1; ++i) {
|
||||
handlers_[i] = handlers_[i + 1];
|
||||
}
|
||||
handlers_[i].first = nullptr;
|
||||
handlers_[i].second = nullptr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool has_any = false;
|
||||
for (size_t i = 0; i < xe::countof(handlers_); ++i) {
|
||||
if (handlers_[i].first) {
|
||||
has_any = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!has_any) {
|
||||
if (signal_handlers_installed_) {
|
||||
if (sigaction(SIGILL, &original_sigill_handler_, NULL) != 0) {
|
||||
assert_always("Failed to restore original SIGILL handler");
|
||||
}
|
||||
if (sigaction(SIGSEGV, &original_sigsegv_handler_, NULL) != 0) {
|
||||
assert_always("Failed to restore original SIGSEGV handler");
|
||||
}
|
||||
signal_handlers_installed_ = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace xe
|
||||
|
|
|
@ -35,8 +35,7 @@ LONG CALLBACK ExceptionHandlerCallback(PEXCEPTION_POINTERS ex_info) {
|
|||
return EXCEPTION_CONTINUE_SEARCH;
|
||||
}
|
||||
|
||||
// TODO(benvanik): avoid this by mapping X64Context virtual?
|
||||
X64Context thread_context;
|
||||
HostThreadContext thread_context;
|
||||
thread_context.rip = ex_info->ContextRecord->Rip;
|
||||
thread_context.eflags = ex_info->ContextRecord->EFlags;
|
||||
std::memcpy(thread_context.int_registers, &ex_info->ContextRecord->Rax,
|
||||
|
@ -79,8 +78,26 @@ LONG CALLBACK ExceptionHandlerCallback(PEXCEPTION_POINTERS ex_info) {
|
|||
for (size_t i = 0; i < xe::countof(handlers_) && handlers_[i].first; ++i) {
|
||||
if (handlers_[i].first(&ex, handlers_[i].second)) {
|
||||
// Exception handled.
|
||||
// TODO(benvanik): update all thread state? Dirty flags?
|
||||
ex_info->ContextRecord->Rip = thread_context.rip;
|
||||
ex_info->ContextRecord->EFlags = thread_context.eflags;
|
||||
uint32_t modified_register_index;
|
||||
uint16_t modified_int_registers_remaining = ex.modified_int_registers();
|
||||
while (xe::bit_scan_forward(modified_int_registers_remaining,
|
||||
&modified_register_index)) {
|
||||
modified_int_registers_remaining &=
|
||||
~(UINT16_C(1) << modified_register_index);
|
||||
(&ex_info->ContextRecord->Rax)[modified_register_index] =
|
||||
thread_context.int_registers[modified_register_index];
|
||||
}
|
||||
uint16_t modified_xmm_registers_remaining = ex.modified_xmm_registers();
|
||||
while (xe::bit_scan_forward(modified_xmm_registers_remaining,
|
||||
&modified_register_index)) {
|
||||
modified_xmm_registers_remaining &=
|
||||
~(UINT16_C(1) << modified_register_index);
|
||||
std::memcpy(&ex_info->ContextRecord->Xmm0 + modified_register_index,
|
||||
&thread_context.xmm_registers[modified_register_index],
|
||||
sizeof(vec128_t));
|
||||
}
|
||||
return EXCEPTION_CONTINUE_EXECUTION;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/base/host_thread_context.h"
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/platform.h"
|
||||
#include "xenia/base/string_util.h"
|
||||
|
||||
namespace xe {
|
||||
|
||||
// NOTE: this order matches 1:1 with the HostRegister enums.
|
||||
static const char* kRegisterNames[] = {
|
||||
#if XE_ARCH_AMD64
|
||||
"rip", "eflags", "rax", "rcx", "rdx", "rbx", "rsp",
|
||||
"rbp", "rsi", "rdi", "r8", "r9", "r10", "r11",
|
||||
"r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2",
|
||||
"xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9",
|
||||
"xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
|
||||
#elif XE_ARCH_ARM64
|
||||
"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",
|
||||
"x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19",
|
||||
"x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29",
|
||||
"x30", "sp", "pc", "pstate", "fpsr", "fpcr", "v0", "v1", "v2", "v3",
|
||||
"v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13",
|
||||
"v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
|
||||
"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
|
||||
#endif // XE_ARCH
|
||||
};
|
||||
|
||||
const char* HostThreadContext::GetRegisterName(HostRegister reg) {
|
||||
return kRegisterNames[int(reg)];
|
||||
}
|
||||
|
||||
std::string HostThreadContext::GetStringFromValue(HostRegister reg,
|
||||
bool hex) const {
|
||||
#if XE_ARCH_AMD64
|
||||
switch (reg) {
|
||||
case X64Register::kRip:
|
||||
return hex ? string_util::to_hex_string(rip) : std::to_string(rip);
|
||||
case X64Register::kEflags:
|
||||
return hex ? string_util::to_hex_string(eflags) : std::to_string(eflags);
|
||||
default:
|
||||
if (reg >= X64Register::kIntRegisterFirst &&
|
||||
reg <= X64Register::kIntRegisterLast) {
|
||||
auto value =
|
||||
int_registers[int(reg) - int(X64Register::kIntRegisterFirst)];
|
||||
return hex ? string_util::to_hex_string(value) : std::to_string(value);
|
||||
} else if (reg >= X64Register::kXmm0 && reg <= X64Register::kXmm15) {
|
||||
auto value = xmm_registers[int(reg) - int(X64Register::kXmm0)];
|
||||
return hex ? string_util::to_hex_string(value) : xe::to_string(value);
|
||||
} else {
|
||||
assert_unhandled_case(reg);
|
||||
return std::string();
|
||||
}
|
||||
}
|
||||
#elif XE_ARCH_ARM64
|
||||
switch (reg) {
|
||||
case Arm64Register::kSp:
|
||||
return hex ? string_util::to_hex_string(sp) : std::to_string(sp);
|
||||
case Arm64Register::kPc:
|
||||
return hex ? string_util::to_hex_string(pc) : std::to_string(pc);
|
||||
case Arm64Register::kPstate:
|
||||
return hex ? string_util::to_hex_string(pstate) : std::to_string(pstate);
|
||||
case Arm64Register::kFpsr:
|
||||
return hex ? string_util::to_hex_string(fpsr) : std::to_string(fpsr);
|
||||
case Arm64Register::kFpcr:
|
||||
return hex ? string_util::to_hex_string(fpcr) : std::to_string(fpcr);
|
||||
default:
|
||||
if (reg >= Arm64Register::kX0 && reg <= Arm64Register::kX30) {
|
||||
auto value = x[int(reg) - int(Arm64Register::kX0)];
|
||||
return hex ? string_util::to_hex_string(value) : std::to_string(value);
|
||||
} else if (reg >= Arm64Register::kV0 && reg <= Arm64Register::kV31) {
|
||||
auto value = v[int(reg) - int(Arm64Register::kV0)];
|
||||
return hex ? string_util::to_hex_string(value) : xe::to_string(value);
|
||||
} else {
|
||||
assert_unhandled_case(reg);
|
||||
return std::string();
|
||||
}
|
||||
}
|
||||
#else
|
||||
assert_always(
|
||||
"HostThreadContext::GetStringFromValue not implemented for the target "
|
||||
"CPU architecture");
|
||||
return std::string();
|
||||
#endif // XE_ARCH
|
||||
}
|
||||
|
||||
} // namespace xe
|
|
@ -2,13 +2,13 @@
|
|||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_BASE_X64_CONTEXT_H_
|
||||
#define XENIA_BASE_X64_CONTEXT_H_
|
||||
#ifndef XENIA_BASE_HOST_THREAD_CONTEXT_H_
|
||||
#define XENIA_BASE_HOST_THREAD_CONTEXT_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
@ -22,15 +22,18 @@
|
|||
|
||||
namespace xe {
|
||||
|
||||
class X64Context;
|
||||
// NOTE: The order of the registers in the enumerations must match the order in
|
||||
// the string table in host_thread_context.cc, as well as remapping tables in
|
||||
// exception handler implementations.
|
||||
|
||||
#if XE_ARCH_AMD64
|
||||
enum class X64Register {
|
||||
// NOTE: this order matches 1:1 with the order in the X64Context.
|
||||
// NOTE: this order matches 1:1 with a string table in the x64_context.cc.
|
||||
kRip,
|
||||
kEflags,
|
||||
kRax,
|
||||
|
||||
kIntRegisterFirst,
|
||||
// The order matches the indices in the instruction encoding, as well as the
|
||||
// Windows CONTEXT structure.
|
||||
kRax = kIntRegisterFirst,
|
||||
kRcx,
|
||||
kRdx,
|
||||
kRbx,
|
||||
|
@ -46,6 +49,8 @@ enum class X64Register {
|
|||
kR13,
|
||||
kR14,
|
||||
kR15,
|
||||
kIntRegisterLast = kR15,
|
||||
|
||||
kXmm0,
|
||||
kXmm1,
|
||||
kXmm2,
|
||||
|
@ -64,8 +69,91 @@ enum class X64Register {
|
|||
kXmm15,
|
||||
};
|
||||
|
||||
class X64Context {
|
||||
enum class Arm64Register {
|
||||
kX0,
|
||||
kX1,
|
||||
kX2,
|
||||
kX3,
|
||||
kX4,
|
||||
kX5,
|
||||
kX6,
|
||||
kX7,
|
||||
kX8,
|
||||
kX9,
|
||||
kX10,
|
||||
kX11,
|
||||
kX12,
|
||||
kX13,
|
||||
kX14,
|
||||
kX15,
|
||||
kX16,
|
||||
kX17,
|
||||
kX18,
|
||||
kX19,
|
||||
kX20,
|
||||
kX21,
|
||||
kX22,
|
||||
kX23,
|
||||
kX24,
|
||||
kX25,
|
||||
kX26,
|
||||
kX27,
|
||||
kX28,
|
||||
// FP (frame pointer).
|
||||
kX29,
|
||||
// LR (link register).
|
||||
kX30,
|
||||
kSp,
|
||||
kPc,
|
||||
kPstate,
|
||||
kFpsr,
|
||||
kFpcr,
|
||||
// The whole 128 bits of a Vn register are also known as Qn (quadword).
|
||||
kV0,
|
||||
kV1,
|
||||
kV2,
|
||||
kV3,
|
||||
kV4,
|
||||
kV5,
|
||||
kV6,
|
||||
kV7,
|
||||
kV8,
|
||||
kV9,
|
||||
kV10,
|
||||
kV11,
|
||||
kV12,
|
||||
kV13,
|
||||
kV14,
|
||||
kV15,
|
||||
kV16,
|
||||
kV17,
|
||||
kV18,
|
||||
kV19,
|
||||
kV20,
|
||||
kV21,
|
||||
kV22,
|
||||
kV23,
|
||||
kV24,
|
||||
kV25,
|
||||
kV26,
|
||||
kV27,
|
||||
kV28,
|
||||
kV29,
|
||||
kV30,
|
||||
kV31,
|
||||
};
|
||||
|
||||
#if XE_ARCH_AMD64
|
||||
using HostRegister = X64Register;
|
||||
#elif XE_ARCH_ARM64
|
||||
using HostRegister = Arm64Register;
|
||||
#else
|
||||
enum class HostRegister {};
|
||||
#endif // XE_ARCH
|
||||
|
||||
class HostThreadContext {
|
||||
public:
|
||||
#if XE_ARCH_AMD64
|
||||
uint64_t rip;
|
||||
uint32_t eflags;
|
||||
union {
|
||||
|
@ -89,7 +177,6 @@ class X64Context {
|
|||
};
|
||||
uint64_t int_registers[16];
|
||||
};
|
||||
|
||||
union {
|
||||
struct {
|
||||
vec128_t xmm0;
|
||||
|
@ -111,12 +198,19 @@ class X64Context {
|
|||
};
|
||||
vec128_t xmm_registers[16];
|
||||
};
|
||||
#elif XE_ARCH_ARM64
|
||||
uint64_t x[31];
|
||||
uint64_t sp;
|
||||
uint64_t pc;
|
||||
uint64_t pstate;
|
||||
uint32_t fpsr;
|
||||
uint32_t fpcr;
|
||||
vec128_t v[32];
|
||||
#endif // XE_ARCH
|
||||
|
||||
static const char* GetRegisterName(X64Register reg);
|
||||
std::string GetStringFromValue(X64Register reg, bool hex) const;
|
||||
void SetValueFromString(X64Register reg, std::string value, bool hex);
|
||||
static const char* GetRegisterName(HostRegister reg);
|
||||
std::string GetStringFromValue(HostRegister reg, bool hex) const;
|
||||
};
|
||||
#endif // XE_ARCH_AMD64
|
||||
|
||||
} // namespace xe
|
||||
|
|
@ -1,67 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/base/x64_context.h"
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/platform.h"
|
||||
#include "xenia/base/string_util.h"
|
||||
|
||||
namespace xe {
|
||||
|
||||
#if XE_ARCH_AMD64
|
||||
|
||||
// NOTE: this order matches 1:1 with the X64Register enum.
|
||||
static const char* kRegisterNames[] = {
|
||||
"rip", "eflags", "rax", "rcx", "rdx", "rbx", "rsp",
|
||||
"rbp", "rsi", "rdi", "r8", "r9", "r10", "r11",
|
||||
"r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2",
|
||||
"xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9",
|
||||
"xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
|
||||
};
|
||||
|
||||
const char* X64Context::GetRegisterName(X64Register reg) {
|
||||
return kRegisterNames[static_cast<int>(reg)];
|
||||
}
|
||||
|
||||
std::string X64Context::GetStringFromValue(X64Register reg, bool hex) const {
|
||||
switch (reg) {
|
||||
case X64Register::kRip:
|
||||
return hex ? string_util::to_hex_string(rip) : std::to_string(rip);
|
||||
case X64Register::kEflags:
|
||||
return hex ? string_util::to_hex_string(eflags) : std::to_string(eflags);
|
||||
default:
|
||||
if (static_cast<int>(reg) >= static_cast<int>(X64Register::kRax) &&
|
||||
static_cast<int>(reg) <= static_cast<int>(X64Register::kR15)) {
|
||||
auto value = int_registers[static_cast<int>(reg) -
|
||||
static_cast<int>(X64Register::kRax)];
|
||||
return hex ? string_util::to_hex_string(value) : std::to_string(value);
|
||||
} else if (static_cast<int>(reg) >=
|
||||
static_cast<int>(X64Register::kXmm0) &&
|
||||
static_cast<int>(reg) <=
|
||||
static_cast<int>(X64Register::kXmm15)) {
|
||||
auto value = xmm_registers[static_cast<int>(reg) -
|
||||
static_cast<int>(X64Register::kXmm0)];
|
||||
return hex ? string_util::to_hex_string(value) : xe::to_string(value);
|
||||
} else {
|
||||
assert_unhandled_case(reg);
|
||||
return "";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void X64Context::SetValueFromString(X64Register reg, std::string value,
|
||||
bool hex) {
|
||||
// TODO(benvanik): set value from string.
|
||||
assert_always(false);
|
||||
}
|
||||
|
||||
#endif // XE_ARCH_AMD64
|
||||
|
||||
} // namespace xe
|
|
@ -0,0 +1,36 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/cpu/backend/null_backend.h"
|
||||
|
||||
#include "xenia/cpu/backend/assembler.h"
|
||||
#include "xenia/cpu/function.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
|
||||
void NullBackend::CommitExecutableRange(uint32_t guest_low,
|
||||
uint32_t guest_high) {}
|
||||
|
||||
std::unique_ptr<Assembler> NullBackend::CreateAssembler() { return nullptr; }
|
||||
|
||||
std::unique_ptr<GuestFunction> NullBackend::CreateGuestFunction(
|
||||
Module* module, uint32_t address) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint64_t NullBackend::CalculateNextHostInstruction(ThreadDebugInfo* thread_info,
|
||||
uint64_t current_pc) {
|
||||
return current_pc;
|
||||
}
|
||||
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,36 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_CPU_BACKEND_NULL_BACKEND_H_
|
||||
#define XENIA_CPU_BACKEND_NULL_BACKEND_H_
|
||||
|
||||
#include "xenia/cpu/backend/backend.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
|
||||
class NullBackend : public Backend {
|
||||
public:
|
||||
void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high) override;
|
||||
|
||||
std::unique_ptr<Assembler> CreateAssembler() override;
|
||||
|
||||
std::unique_ptr<GuestFunction> CreateGuestFunction(Module* module,
|
||||
uint32_t address) override;
|
||||
|
||||
uint64_t CalculateNextHostInstruction(ThreadDebugInfo* thread_info,
|
||||
uint64_t current_pc) override;
|
||||
};
|
||||
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_CPU_BACKEND_NULL_BACKEND_H_
|
|
@ -163,7 +163,7 @@ std::unique_ptr<GuestFunction> X64Backend::CreateGuestFunction(
|
|||
return std::make_unique<X64Function>(module, address);
|
||||
}
|
||||
|
||||
uint64_t ReadCapstoneReg(X64Context* context, x86_reg reg) {
|
||||
uint64_t ReadCapstoneReg(HostThreadContext* context, x86_reg reg) {
|
||||
switch (reg) {
|
||||
case X86_REG_RAX:
|
||||
return context->rax;
|
||||
|
|
|
@ -27,8 +27,6 @@ namespace x64 {
|
|||
|
||||
class X64CodeCache;
|
||||
|
||||
#define XENIA_HAS_X64_BACKEND 1
|
||||
|
||||
typedef void* (*HostToGuestThunk)(void* target, void* arg0, void* arg1);
|
||||
typedef void* (*GuestToHostThunk)(void* target, void* arg0, void* arg1);
|
||||
typedef void (*ResolveFunctionThunk)();
|
||||
|
|
|
@ -1414,14 +1414,17 @@ void Value::DotProduct3(Value* other) {
|
|||
assert_true(this->type == VEC128_TYPE && other->type == VEC128_TYPE);
|
||||
switch (type) {
|
||||
case VEC128_TYPE: {
|
||||
alignas(16) float result[4];
|
||||
__m128 src1 = _mm_load_ps(constant.v128.f32);
|
||||
__m128 src2 = _mm_load_ps(other->constant.v128.f32);
|
||||
__m128 dest = _mm_dp_ps(src1, src2, 0b01110001);
|
||||
_mm_store_ps(result, dest);
|
||||
// TODO(rick): is this sane?
|
||||
type = FLOAT32_TYPE;
|
||||
constant.f32 = result[0];
|
||||
// Using x86 DPPS ordering for consistency with x86-64 code generation:
|
||||
// (X1 * X2 + Y1 * Y2) + (Z1 * Z2 + 0.0f)
|
||||
// (+ 0.0f for zero sign, as zero imm8[4:7] bits result in zero terms,
|
||||
// not in complete exclusion of them)
|
||||
// TODO(Triang3l): NaN on overflow.
|
||||
constant.f32 =
|
||||
(constant.v128.f32[0] * other->constant.v128.f32[0] +
|
||||
constant.v128.f32[1] * other->constant.v128.f32[1]) +
|
||||
(constant.v128.f32[2] * other->constant.v128.f32[2] + 0.0f);
|
||||
} break;
|
||||
default:
|
||||
assert_unhandled_case(type);
|
||||
|
@ -1433,14 +1436,15 @@ void Value::DotProduct4(Value* other) {
|
|||
assert_true(this->type == VEC128_TYPE && other->type == VEC128_TYPE);
|
||||
switch (type) {
|
||||
case VEC128_TYPE: {
|
||||
alignas(16) float result[4];
|
||||
__m128 src1 = _mm_load_ps(constant.v128.f32);
|
||||
__m128 src2 = _mm_load_ps(other->constant.v128.f32);
|
||||
__m128 dest = _mm_dp_ps(src1, src2, 0b11110001);
|
||||
_mm_store_ps(result, dest);
|
||||
// TODO(rick): is this sane?
|
||||
type = FLOAT32_TYPE;
|
||||
constant.f32 = result[0];
|
||||
// Using x86 DPPS ordering for consistency with x86-64 code generation:
|
||||
// (X1 * X2 + Y1 * Y2) + (Z1 * Z2 + W1 * W2)
|
||||
// TODO(Triang3l): NaN on overflow.
|
||||
constant.f32 = (constant.v128.f32[0] * other->constant.v128.f32[0] +
|
||||
constant.v128.f32[1] * other->constant.v128.f32[1]) +
|
||||
(constant.v128.f32[2] * other->constant.v128.f32[2] +
|
||||
constant.v128.f32[3] * other->constant.v128.f32[3]);
|
||||
} break;
|
||||
default:
|
||||
assert_unhandled_case(type);
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "xenia/base/exception_handler.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/base/platform.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
|
@ -114,28 +115,10 @@ bool MMIOHandler::CheckStore(uint32_t virtual_address, uint32_t value) {
|
|||
return false;
|
||||
}
|
||||
|
||||
struct DecodedMov {
|
||||
size_t length;
|
||||
// Inidicates this is a load (or conversely a store).
|
||||
bool is_load;
|
||||
// Indicates the memory must be swapped.
|
||||
bool byte_swap;
|
||||
// Source (for store) or target (for load) register.
|
||||
// AX CX DX BX SP BP SI DI // REX.R=0
|
||||
// R8 R9 R10 R11 R12 R13 R14 R15 // REX.R=1
|
||||
uint32_t value_reg;
|
||||
// [base + (index * scale) + displacement]
|
||||
bool mem_has_base;
|
||||
uint8_t mem_base_reg;
|
||||
bool mem_has_index;
|
||||
uint8_t mem_index_reg;
|
||||
uint8_t mem_scale;
|
||||
int32_t mem_displacement;
|
||||
bool is_constant;
|
||||
int32_t constant;
|
||||
};
|
||||
|
||||
bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
|
||||
bool MMIOHandler::TryDecodeLoadStore(const uint8_t* p,
|
||||
DecodedLoadStore& decoded_out) {
|
||||
std::memset(&decoded_out, 0, sizeof(decoded_out));
|
||||
#if XE_ARCH_AMD64
|
||||
uint8_t i = 0; // Current byte decode index.
|
||||
uint8_t rex = 0;
|
||||
if ((p[i] & 0xF0) == 0x40) {
|
||||
|
@ -148,8 +131,8 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
|
|||
// 44 0f 38 f1 a4 02 00 movbe DWORD PTR [rdx+rax*1+0x0],r12d
|
||||
// 42 0f 38 f1 8c 22 00 movbe DWORD PTR [rdx+r12*1+0x0],ecx
|
||||
// 0f 38 f1 8c 02 00 00 movbe DWORD PTR [rdx + rax * 1 + 0x0], ecx
|
||||
mov->is_load = false;
|
||||
mov->byte_swap = true;
|
||||
decoded_out.is_load = false;
|
||||
decoded_out.byte_swap = true;
|
||||
i += 3;
|
||||
} else if (p[i] == 0x0F && p[i + 1] == 0x38 && p[i + 2] == 0xF0) {
|
||||
// MOVBE r32, m32 (load)
|
||||
|
@ -159,8 +142,8 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
|
|||
// 46 0f 38 f0 a4 22 00 movbe r12d,DWORD PTR [rdx+r12*1+0x0]
|
||||
// 0f 38 f0 8c 02 00 00 movbe ecx,DWORD PTR [rdx+rax*1+0x0]
|
||||
// 0F 38 F0 1C 02 movbe ebx,dword ptr [rdx+rax]
|
||||
mov->is_load = true;
|
||||
mov->byte_swap = true;
|
||||
decoded_out.is_load = true;
|
||||
decoded_out.byte_swap = true;
|
||||
i += 3;
|
||||
} else if (p[i] == 0x89) {
|
||||
// MOV m32, r32 (store)
|
||||
|
@ -168,8 +151,8 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
|
|||
// 44 89 24 02 mov DWORD PTR[rdx + rax * 1], r12d
|
||||
// 42 89 0c 22 mov DWORD PTR[rdx + r12 * 1], ecx
|
||||
// 89 0c 02 mov DWORD PTR[rdx + rax * 1], ecx
|
||||
mov->is_load = false;
|
||||
mov->byte_swap = false;
|
||||
decoded_out.is_load = false;
|
||||
decoded_out.byte_swap = false;
|
||||
++i;
|
||||
} else if (p[i] == 0x8B) {
|
||||
// MOV r32, m32 (load)
|
||||
|
@ -178,16 +161,16 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
|
|||
// 42 8b 0c 22 mov ecx, DWORD PTR[rdx + r12 * 1]
|
||||
// 46 8b 24 22 mov r12d, DWORD PTR[rdx + r12 * 1]
|
||||
// 8b 0c 02 mov ecx, DWORD PTR[rdx + rax * 1]
|
||||
mov->is_load = true;
|
||||
mov->byte_swap = false;
|
||||
decoded_out.is_load = true;
|
||||
decoded_out.byte_swap = false;
|
||||
++i;
|
||||
} else if (p[i] == 0xC7) {
|
||||
// MOV m32, simm32
|
||||
// https://web.archive.org/web/20161017042413/https://www.asmpedia.org/index.php?title=MOV
|
||||
// C7 04 02 02 00 00 00 mov dword ptr [rdx+rax],2
|
||||
mov->is_load = false;
|
||||
mov->byte_swap = false;
|
||||
mov->is_constant = true;
|
||||
decoded_out.is_load = false;
|
||||
decoded_out.byte_swap = false;
|
||||
decoded_out.is_constant = true;
|
||||
++i;
|
||||
} else {
|
||||
return false;
|
||||
|
@ -204,13 +187,13 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
|
|||
uint8_t mod = (modrm & 0b11000000) >> 6;
|
||||
uint8_t reg = (modrm & 0b00111000) >> 3;
|
||||
uint8_t rm = (modrm & 0b00000111);
|
||||
mov->value_reg = reg + (rex_r ? 8 : 0);
|
||||
mov->mem_has_base = false;
|
||||
mov->mem_base_reg = 0;
|
||||
mov->mem_has_index = false;
|
||||
mov->mem_index_reg = 0;
|
||||
mov->mem_scale = 1;
|
||||
mov->mem_displacement = 0;
|
||||
decoded_out.value_reg = reg + (rex_r ? 8 : 0);
|
||||
decoded_out.mem_has_base = false;
|
||||
decoded_out.mem_base_reg = 0;
|
||||
decoded_out.mem_has_index = false;
|
||||
decoded_out.mem_index_reg = 0;
|
||||
decoded_out.mem_scale = 1;
|
||||
decoded_out.mem_displacement = 0;
|
||||
bool has_sib = false;
|
||||
switch (rm) {
|
||||
case 0b100: // SIB
|
||||
|
@ -221,17 +204,17 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
|
|||
// RIP-relative not supported.
|
||||
return false;
|
||||
}
|
||||
mov->mem_has_base = true;
|
||||
mov->mem_base_reg = rm + (rex_b ? 8 : 0);
|
||||
decoded_out.mem_has_base = true;
|
||||
decoded_out.mem_base_reg = rm + (rex_b ? 8 : 0);
|
||||
break;
|
||||
default:
|
||||
mov->mem_has_base = true;
|
||||
mov->mem_base_reg = rm + (rex_b ? 8 : 0);
|
||||
decoded_out.mem_has_base = true;
|
||||
decoded_out.mem_base_reg = rm + (rex_b ? 8 : 0);
|
||||
break;
|
||||
}
|
||||
if (has_sib) {
|
||||
uint8_t sib = p[i++];
|
||||
mov->mem_scale = 1 << ((sib & 0b11000000) >> 8);
|
||||
decoded_out.mem_scale = 1 << ((sib & 0b11000000) >> 8);
|
||||
uint8_t sib_index = (sib & 0b00111000) >> 3;
|
||||
uint8_t sib_base = (sib & 0b00000111);
|
||||
switch (sib_index) {
|
||||
|
@ -239,8 +222,9 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
|
|||
// No index.
|
||||
break;
|
||||
default:
|
||||
mov->mem_has_index = true;
|
||||
mov->mem_index_reg = sib_index + (rex_x ? 8 : 0);
|
||||
decoded_out.mem_has_index = true;
|
||||
decoded_out.mem_index_reg = sib_index + (rex_x ? 8 : 0);
|
||||
decoded_out.mem_index_size = sizeof(uint64_t);
|
||||
break;
|
||||
}
|
||||
switch (sib_base) {
|
||||
|
@ -249,29 +233,162 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
|
|||
assert_zero(mod);
|
||||
return false;
|
||||
default:
|
||||
mov->mem_has_base = true;
|
||||
mov->mem_base_reg = sib_base + (rex_b ? 8 : 0);
|
||||
decoded_out.mem_has_base = true;
|
||||
decoded_out.mem_base_reg = sib_base + (rex_b ? 8 : 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
switch (mod) {
|
||||
case 0b00: {
|
||||
mov->mem_displacement += 0;
|
||||
decoded_out.mem_displacement += 0;
|
||||
} break;
|
||||
case 0b01: {
|
||||
mov->mem_displacement += int8_t(p[i++]);
|
||||
decoded_out.mem_displacement += int8_t(p[i++]);
|
||||
} break;
|
||||
case 0b10: {
|
||||
mov->mem_displacement += xe::load<int32_t>(p + i);
|
||||
decoded_out.mem_displacement += xe::load<int32_t>(p + i);
|
||||
i += 4;
|
||||
} break;
|
||||
}
|
||||
if (mov->is_constant) {
|
||||
mov->constant = xe::load<int32_t>(p + i);
|
||||
if (decoded_out.is_constant) {
|
||||
decoded_out.constant = xe::load<int32_t>(p + i);
|
||||
i += 4;
|
||||
}
|
||||
mov->length = i;
|
||||
decoded_out.length = i;
|
||||
return true;
|
||||
|
||||
#elif XE_ARCH_ARM64
|
||||
decoded_out.length = sizeof(uint32_t);
|
||||
uint32_t instruction = *reinterpret_cast<const uint32_t*>(p);
|
||||
|
||||
// Literal loading (PC-relative) is not handled.
|
||||
|
||||
if ((instruction & kArm64LoadStoreAnyFMask) != kArm64LoadStoreAnyFixed) {
|
||||
// Not a load or a store instruction.
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((instruction & kArm64LoadStorePairAnyFMask) ==
|
||||
kArm64LoadStorePairAnyFixed) {
|
||||
// Handling MMIO only for single 32-bit values, not for pairs.
|
||||
return false;
|
||||
}
|
||||
|
||||
uint8_t value_reg_base;
|
||||
switch (Arm64LoadStoreOp(instruction & kArm64LoadStoreMask)) {
|
||||
case Arm64LoadStoreOp::kSTR_w:
|
||||
decoded_out.is_load = false;
|
||||
value_reg_base = DecodedLoadStore::kArm64ValueRegX0;
|
||||
break;
|
||||
case Arm64LoadStoreOp::kLDR_w:
|
||||
decoded_out.is_load = true;
|
||||
value_reg_base = DecodedLoadStore::kArm64ValueRegX0;
|
||||
break;
|
||||
case Arm64LoadStoreOp::kSTR_s:
|
||||
decoded_out.is_load = false;
|
||||
value_reg_base = DecodedLoadStore::kArm64ValueRegV0;
|
||||
break;
|
||||
case Arm64LoadStoreOp::kLDR_s:
|
||||
decoded_out.is_load = true;
|
||||
value_reg_base = DecodedLoadStore::kArm64ValueRegV0;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
// `Rt` field (load / store register).
|
||||
decoded_out.value_reg = value_reg_base + (instruction & 31);
|
||||
if (decoded_out.is_load &&
|
||||
decoded_out.value_reg == DecodedLoadStore::kArm64ValueRegZero) {
|
||||
// Zero constant rather than a register read.
|
||||
decoded_out.is_constant = true;
|
||||
decoded_out.constant = 0;
|
||||
}
|
||||
|
||||
decoded_out.mem_has_base = true;
|
||||
// The base is Xn (for 0...30) or SP (for 31).
|
||||
// `Rn` field (first source register).
|
||||
decoded_out.mem_base_reg = (instruction >> 5) & 31;
|
||||
|
||||
bool is_unsigned_offset =
|
||||
(instruction & kArm64LoadStoreUnsignedOffsetFMask) ==
|
||||
kArm64LoadStoreUnsignedOffsetFixed;
|
||||
if (is_unsigned_offset) {
|
||||
// LDR|STR Wt|St, [Xn|SP{, #pimm}]
|
||||
// pimm (positive immediate) is scaled by the size of the data (4 for
|
||||
// words).
|
||||
// `ImmLSUnsigned` field.
|
||||
uint32_t unsigned_offset = (instruction >> 10) & 4095;
|
||||
decoded_out.mem_displacement =
|
||||
ptrdiff_t(sizeof(uint32_t) * unsigned_offset);
|
||||
} else {
|
||||
Arm64LoadStoreOffsetFixed offset =
|
||||
Arm64LoadStoreOffsetFixed(instruction & kArm64LoadStoreOffsetFMask);
|
||||
// simm (signed immediate) is not scaled.
|
||||
// Only applicable to kUnscaledOffset, kPostIndex and kPreIndex.
|
||||
// `ImmLS` field.
|
||||
int32_t signed_offset = int32_t(instruction << (32 - (9 + 12))) >> (32 - 9);
|
||||
// For both post- and pre-indexing, the new address is written to the
|
||||
// register after the data register write, thus if Xt and Xn are the same,
|
||||
// the final value in the register will be the new address.
|
||||
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
|
||||
switch (offset) {
|
||||
case Arm64LoadStoreOffsetFixed::kUnscaledOffset: {
|
||||
// LDUR|STUR Wt|St, [Xn|SP{, #simm}]
|
||||
decoded_out.mem_displacement = signed_offset;
|
||||
} break;
|
||||
case Arm64LoadStoreOffsetFixed::kPostIndex: {
|
||||
// LDR|STR Wt|St, [Xn|SP], #simm
|
||||
decoded_out.mem_base_writeback = true;
|
||||
decoded_out.mem_base_writeback_offset = signed_offset;
|
||||
} break;
|
||||
case Arm64LoadStoreOffsetFixed::kPreIndex: {
|
||||
// LDR|STR Wt|St, [Xn|SP, #simm]!
|
||||
decoded_out.mem_base_writeback = true;
|
||||
decoded_out.mem_base_writeback_offset = signed_offset;
|
||||
decoded_out.mem_displacement = signed_offset;
|
||||
} break;
|
||||
case Arm64LoadStoreOffsetFixed::kRegisterOffset: {
|
||||
// LDR|STR Wt|St, [Xn|SP, (Wm|Xm){, extend {amount}}]
|
||||
// `Rm` field.
|
||||
decoded_out.mem_index_reg = (instruction >> 16) & 31;
|
||||
if (decoded_out.mem_index_reg != DecodedLoadStore::kArm64RegZero) {
|
||||
decoded_out.mem_has_index = true;
|
||||
// Allowed extend types in the `option` field are UXTW (0b010), LSL
|
||||
// (0b011 - identical to UXTX), SXTW (0b110), SXTX (0b111).
|
||||
// The shift (0 or 2 for 32-bit LDR/STR) can be applied regardless of
|
||||
// the extend type ("LSL" is just a term for assembly readability,
|
||||
// internally it's treated simply as UXTX).
|
||||
// If bit 0 of the `option` field is 0 (UXTW, SXTW), the index
|
||||
// register is treated as 32-bit (Wm) extended to 64-bit. If it's 1
|
||||
// (LSL aka UXTX, SXTX), the index register is treated as 64-bit (Xm).
|
||||
// `ExtendMode` (`option`) field.
|
||||
uint32_t extend_mode = (instruction >> 13) & 0b111;
|
||||
if (!(extend_mode & 0b010)) {
|
||||
// Sub-word index - undefined.
|
||||
return false;
|
||||
}
|
||||
decoded_out.mem_index_size =
|
||||
(extend_mode & 0b001) ? sizeof(uint64_t) : sizeof(uint32_t);
|
||||
decoded_out.mem_index_sign_extend = (extend_mode & 0b100) != 0;
|
||||
// Shift is either 0 or log2(sizeof(load or store size)).
|
||||
// Supporting MMIO only for 4-byte words.
|
||||
// `ImmShiftLS` field.
|
||||
decoded_out.mem_scale =
|
||||
(instruction & (UINT32_C(1) << 12)) ? sizeof(uint32_t) : 1;
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
#else
|
||||
#error TryDecodeLoadStore not implemented for the target CPU architecture.
|
||||
return false;
|
||||
#endif // XE_ARCH
|
||||
}
|
||||
|
||||
bool MMIOHandler::ExceptionCallbackThunk(Exception* ex, void* data) {
|
||||
|
@ -300,11 +417,13 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) {
|
|||
// Access violations are pretty rare, so we can do a linear search here.
|
||||
// Only check if in the virtual range, as we only support virtual ranges.
|
||||
const MMIORange* range = nullptr;
|
||||
uint32_t fault_guest_virtual_address = 0;
|
||||
if (ex->fault_address() < uint64_t(physical_membase_)) {
|
||||
uint32_t fault_virtual_address = host_to_guest_virtual_(
|
||||
fault_guest_virtual_address = host_to_guest_virtual_(
|
||||
host_to_guest_virtual_context_, fault_host_address);
|
||||
for (const auto& test_range : mapped_ranges_) {
|
||||
if ((fault_virtual_address & test_range.mask) == test_range.address) {
|
||||
if ((fault_guest_virtual_address & test_range.mask) ==
|
||||
test_range.address) {
|
||||
// Address is within the range of this mapping.
|
||||
range = &test_range;
|
||||
break;
|
||||
|
@ -336,44 +455,114 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) {
|
|||
|
||||
auto rip = ex->pc();
|
||||
auto p = reinterpret_cast<const uint8_t*>(rip);
|
||||
DecodedMov mov = {0};
|
||||
bool decoded = TryDecodeMov(p, &mov);
|
||||
if (!decoded) {
|
||||
XELOGE("Unable to decode MMIO mov at {}", p);
|
||||
DecodedLoadStore decoded_load_store;
|
||||
if (!TryDecodeLoadStore(p, decoded_load_store)) {
|
||||
XELOGE("Unable to decode MMIO load or store instruction at {}", p);
|
||||
assert_always("Unknown MMIO instruction type");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mov.is_load) {
|
||||
HostThreadContext& thread_context = *ex->thread_context();
|
||||
|
||||
#if XE_ARCH_ARM64
|
||||
// Preserve the base address with the pre- or the post-index offset to write
|
||||
// it after writing the result (since the base address register and the
|
||||
// register to load to may be the same, in which case it should receive the
|
||||
// original base address with the offset).
|
||||
uintptr_t mem_base_writeback_address = 0;
|
||||
if (decoded_load_store.mem_has_base &&
|
||||
decoded_load_store.mem_base_writeback) {
|
||||
if (decoded_load_store.mem_base_reg ==
|
||||
DecodedLoadStore::kArm64MemBaseRegSp) {
|
||||
mem_base_writeback_address = thread_context.sp;
|
||||
} else {
|
||||
assert_true(decoded_load_store.mem_base_reg <= 30);
|
||||
mem_base_writeback_address =
|
||||
thread_context.x[decoded_load_store.mem_base_reg];
|
||||
}
|
||||
mem_base_writeback_address += decoded_load_store.mem_base_writeback_offset;
|
||||
}
|
||||
#endif // XE_ARCH_ARM64
|
||||
|
||||
uint8_t value_reg = decoded_load_store.value_reg;
|
||||
if (decoded_load_store.is_load) {
|
||||
// Load of a memory value - read from range, swap, and store in the
|
||||
// register.
|
||||
uint32_t value = range->read(nullptr, range->callback_context,
|
||||
static_cast<uint32_t>(ex->fault_address()));
|
||||
uint64_t* reg_ptr = &ex->thread_context()->int_registers[mov.value_reg];
|
||||
if (!mov.byte_swap) {
|
||||
fault_guest_virtual_address);
|
||||
if (!decoded_load_store.byte_swap) {
|
||||
// We swap only if it's not a movbe, as otherwise we are swapping twice.
|
||||
value = xe::byte_swap(value);
|
||||
}
|
||||
*reg_ptr = value;
|
||||
#if XE_ARCH_AMD64
|
||||
ex->ModifyIntRegister(value_reg) = value;
|
||||
#elif XE_ARCH_ARM64
|
||||
if (value_reg >= DecodedLoadStore::kArm64ValueRegX0 &&
|
||||
value_reg <= (DecodedLoadStore::kArm64ValueRegX0 + 30)) {
|
||||
ex->ModifyXRegister(value_reg - DecodedLoadStore::kArm64ValueRegX0) =
|
||||
value;
|
||||
} else if (value_reg >= DecodedLoadStore::kArm64ValueRegV0 &&
|
||||
value_reg <= (DecodedLoadStore::kArm64ValueRegV0 + 31)) {
|
||||
ex->ModifyVRegister(value_reg - DecodedLoadStore::kArm64ValueRegV0)
|
||||
.u32[0] = value;
|
||||
} else {
|
||||
assert_true(value_reg == DecodedLoadStore::kArm64ValueRegZero);
|
||||
// Register write is ignored for X31.
|
||||
}
|
||||
#else
|
||||
#error Register value writing not implemented for the target CPU architecture.
|
||||
#endif // XE_ARCH
|
||||
} else {
|
||||
// Store of a register value - read register, swap, write to range.
|
||||
int32_t value;
|
||||
if (mov.is_constant) {
|
||||
value = uint32_t(mov.constant);
|
||||
uint32_t value;
|
||||
if (decoded_load_store.is_constant) {
|
||||
value = uint32_t(decoded_load_store.constant);
|
||||
} else {
|
||||
uint64_t* reg_ptr = &ex->thread_context()->int_registers[mov.value_reg];
|
||||
value = static_cast<uint32_t>(*reg_ptr);
|
||||
if (!mov.byte_swap) {
|
||||
#if XE_ARCH_AMD64
|
||||
value = uint32_t(thread_context.int_registers[value_reg]);
|
||||
#elif XE_ARCH_ARM64
|
||||
if (value_reg >= DecodedLoadStore::kArm64ValueRegX0 &&
|
||||
value_reg <= (DecodedLoadStore::kArm64ValueRegX0 + 30)) {
|
||||
value = uint32_t(
|
||||
thread_context.x[value_reg - DecodedLoadStore::kArm64ValueRegX0]);
|
||||
} else if (value_reg >= DecodedLoadStore::kArm64ValueRegV0 &&
|
||||
value_reg <= (DecodedLoadStore::kArm64ValueRegV0 + 31)) {
|
||||
value = thread_context.v[value_reg - DecodedLoadStore::kArm64ValueRegV0]
|
||||
.u32[0];
|
||||
} else {
|
||||
assert_true(value_reg == DecodedLoadStore::kArm64ValueRegZero);
|
||||
value = 0;
|
||||
}
|
||||
#else
|
||||
#error Register value reading not implemented for the target CPU architecture.
|
||||
#endif // XE_ARCH
|
||||
if (!decoded_load_store.byte_swap) {
|
||||
// We swap only if it's not a movbe, as otherwise we are swapping twice.
|
||||
value = xe::byte_swap(static_cast<uint32_t>(value));
|
||||
value = xe::byte_swap(value);
|
||||
}
|
||||
}
|
||||
range->write(nullptr, range->callback_context,
|
||||
static_cast<uint32_t>(ex->fault_address()), value);
|
||||
range->write(nullptr, range->callback_context, fault_guest_virtual_address,
|
||||
value);
|
||||
}
|
||||
|
||||
#if XE_ARCH_ARM64
|
||||
// Write the base address with the pre- or the post-index offset, overwriting
|
||||
// the register to load to if it's the same.
|
||||
if (decoded_load_store.mem_has_base &&
|
||||
decoded_load_store.mem_base_writeback) {
|
||||
if (decoded_load_store.mem_base_reg ==
|
||||
DecodedLoadStore::kArm64MemBaseRegSp) {
|
||||
thread_context.sp = mem_base_writeback_address;
|
||||
} else {
|
||||
assert_true(decoded_load_store.mem_base_reg <= 30);
|
||||
ex->ModifyXRegister(decoded_load_store.mem_base_reg) =
|
||||
mem_base_writeback_address;
|
||||
}
|
||||
}
|
||||
#endif // XE_ARCH_ARM64
|
||||
|
||||
// Advance RIP to the next instruction so that we resume properly.
|
||||
ex->set_resume_pc(rip + mov.length);
|
||||
ex->set_resume_pc(rip + decoded_load_store.length);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -15,10 +15,11 @@
|
|||
#include <vector>
|
||||
|
||||
#include "xenia/base/mutex.h"
|
||||
#include "xenia/base/platform.h"
|
||||
|
||||
namespace xe {
|
||||
class Exception;
|
||||
class X64Context;
|
||||
class HostThreadContext;
|
||||
} // namespace xe
|
||||
|
||||
namespace xe {
|
||||
|
@ -93,6 +94,61 @@ class MMIOHandler {
|
|||
static MMIOHandler* global_handler_;
|
||||
|
||||
xe::global_critical_region global_critical_region_;
|
||||
|
||||
private:
|
||||
struct DecodedLoadStore {
|
||||
// Matches the Xn/Wn register number for 0 reads and ignored writes in many
|
||||
// usage cases.
|
||||
static constexpr uint8_t kArm64RegZero = 31;
|
||||
|
||||
// Matches the actual register number encoding for an SP base in AArch64
|
||||
// load and store instructions.
|
||||
static constexpr uint8_t kArm64MemBaseRegSp = kArm64RegZero;
|
||||
|
||||
static constexpr uint8_t kArm64ValueRegX0 = 0;
|
||||
static constexpr uint8_t kArm64ValueRegZero =
|
||||
kArm64ValueRegX0 + kArm64RegZero;
|
||||
static constexpr uint8_t kArm64ValueRegV0 = 32;
|
||||
|
||||
size_t length;
|
||||
// Inidicates this is a load (or conversely a store).
|
||||
bool is_load;
|
||||
// Indicates the memory must be swapped.
|
||||
bool byte_swap;
|
||||
// Source (for store) or target (for load) register.
|
||||
// For x86-64:
|
||||
// AX CX DX BX SP BP SI DI // REX.R=0
|
||||
// R8 R9 R10 R11 R12 R13 R14 R15 // REX.R=1
|
||||
// For AArch64:
|
||||
// - kArm64ValueRegX0 + [0...30]: Xn (Wn for 32 bits - upper 32 bits of Xn
|
||||
// are zeroed on Wn write).
|
||||
// - kArm64ValueRegZero: Zero constant for register read, ignored register
|
||||
// write (though memory must still be accessed - a MMIO load may have side
|
||||
// effects even if the result is discarded).
|
||||
// - kArm64ValueRegV0 + [0...31]: Vn (Sn for 32 bits).
|
||||
uint8_t value_reg;
|
||||
// [base + (index * scale) + displacement]
|
||||
bool mem_has_base;
|
||||
// On AArch64, if mem_base_reg is kArm64MemBaseRegSp, the base register is
|
||||
// SP, not Xn.
|
||||
uint8_t mem_base_reg;
|
||||
// For AArch64 pre- and post-indexing. In case of a load, the base register
|
||||
// is written back after the loaded data is written to the register,
|
||||
// overwriting the value register if it's the same.
|
||||
bool mem_base_writeback;
|
||||
int32_t mem_base_writeback_offset;
|
||||
bool mem_has_index;
|
||||
uint8_t mem_index_reg;
|
||||
uint8_t mem_index_size;
|
||||
bool mem_index_sign_extend;
|
||||
uint8_t mem_scale;
|
||||
ptrdiff_t mem_displacement;
|
||||
bool is_constant;
|
||||
int32_t constant;
|
||||
};
|
||||
|
||||
static bool TryDecodeLoadStore(const uint8_t* p,
|
||||
DecodedLoadStore& decoded_out);
|
||||
};
|
||||
|
||||
} // namespace cpu
|
||||
|
|
|
@ -15,13 +15,16 @@
|
|||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/platform.h"
|
||||
#include "xenia/base/string_buffer.h"
|
||||
#include "xenia/cpu/backend/x64/x64_backend.h"
|
||||
#include "xenia/cpu/cpu_flags.h"
|
||||
#include "xenia/cpu/ppc/ppc_context.h"
|
||||
#include "xenia/cpu/ppc/ppc_frontend.h"
|
||||
#include "xenia/cpu/processor.h"
|
||||
#include "xenia/cpu/raw_module.h"
|
||||
|
||||
#if XE_ARCH_AMD64
|
||||
#include "xenia/cpu/backend/x64/x64_backend.h"
|
||||
#endif // XE_ARCH
|
||||
|
||||
#if XE_COMPILER_MSVC
|
||||
#include "xenia/base/platform_win.h"
|
||||
#endif // XE_COMPILER_MSVC
|
||||
|
@ -196,17 +199,17 @@ class TestRunner {
|
|||
|
||||
std::unique_ptr<xe::cpu::backend::Backend> backend;
|
||||
if (!backend) {
|
||||
#if defined(XENIA_HAS_X64_BACKEND) && XENIA_HAS_X64_BACKEND
|
||||
#if XE_ARCH_AMD64
|
||||
if (cvars::cpu == "x64") {
|
||||
backend.reset(new xe::cpu::backend::x64::X64Backend());
|
||||
}
|
||||
#endif // XENIA_HAS_X64_BACKEND
|
||||
#endif // XE_ARCH
|
||||
if (cvars::cpu == "any") {
|
||||
#if defined(XENIA_HAS_X64_BACKEND) && XENIA_HAS_X64_BACKEND
|
||||
if (!backend) {
|
||||
#if XE_ARCH_AMD64
|
||||
backend.reset(new xe::cpu::backend::x64::X64Backend());
|
||||
#endif // XE_ARCH
|
||||
}
|
||||
#endif // XENIA_HAS_X64_BACKEND
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -11,7 +11,6 @@ project("xenia-cpu-ppc-tests")
|
|||
"fmt",
|
||||
"mspack",
|
||||
"xenia-core",
|
||||
"xenia-cpu-backend-x64",
|
||||
"xenia-cpu",
|
||||
"xenia-base",
|
||||
})
|
||||
|
@ -24,6 +23,10 @@ project("xenia-cpu-ppc-tests")
|
|||
})
|
||||
filter("files:*.s")
|
||||
flags({"ExcludeFromBuild"})
|
||||
filter("architecture:x86_64")
|
||||
links({
|
||||
"xenia-cpu-backend-x64",
|
||||
})
|
||||
filter("platforms:Windows")
|
||||
debugdir(project_root)
|
||||
debugargs({
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "xenia/base/literals.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/base/platform.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/base/threading.h"
|
||||
#include "xenia/cpu/breakpoint.h"
|
||||
|
@ -133,7 +134,11 @@ bool Processor::Setup(std::unique_ptr<backend::Backend> backend) {
|
|||
// Stack walker is used when profiling, debugging, and dumping.
|
||||
// Note that creation may fail, in which case we'll have to disable those
|
||||
// features.
|
||||
stack_walker_ = StackWalker::Create(backend_->code_cache());
|
||||
// The code cache may be unavailable in case of a "null" backend.
|
||||
cpu::backend::CodeCache* code_cache = backend_->code_cache();
|
||||
if (code_cache) {
|
||||
stack_walker_ = StackWalker::Create(code_cache);
|
||||
}
|
||||
if (!stack_walker_) {
|
||||
// TODO(benvanik): disable features.
|
||||
if (cvars::debug) {
|
||||
|
@ -698,7 +703,13 @@ bool Processor::OnThreadBreakpointHit(Exception* ex) {
|
|||
|
||||
// Apply thread context changes.
|
||||
// TODO(benvanik): apply to all threads?
|
||||
#if XE_ARCH_AMD64
|
||||
ex->set_resume_pc(thread_info->host_context.rip);
|
||||
#elif XE_ARCH_ARM64
|
||||
ex->set_resume_pc(thread_info->host_context.pc);
|
||||
#else
|
||||
#error Instruction pointer not specified for the target CPU architecture.
|
||||
#endif // XE_ARCH
|
||||
|
||||
// Resume execution.
|
||||
return true;
|
||||
|
@ -828,8 +839,8 @@ bool Processor::ResumeAllThreads() {
|
|||
return true;
|
||||
}
|
||||
|
||||
void Processor::UpdateThreadExecutionStates(uint32_t override_thread_id,
|
||||
X64Context* override_context) {
|
||||
void Processor::UpdateThreadExecutionStates(
|
||||
uint32_t override_thread_id, HostThreadContext* override_context) {
|
||||
auto global_lock = global_critical_region_.Acquire();
|
||||
uint64_t frame_host_pcs[64];
|
||||
xe::cpu::StackFrame cpu_frames[64];
|
||||
|
@ -851,7 +862,7 @@ void Processor::UpdateThreadExecutionStates(uint32_t override_thread_id,
|
|||
|
||||
// Grab stack trace and X64 context then resolve all symbols.
|
||||
uint64_t hash;
|
||||
X64Context* in_host_context = nullptr;
|
||||
HostThreadContext* in_host_context = nullptr;
|
||||
if (override_thread_id == thread_info->thread_id) {
|
||||
// If we were passed an override context we use that. Otherwise, ask the
|
||||
// stack walker for a new context.
|
||||
|
|
|
@ -215,8 +215,9 @@ class Processor {
|
|||
// Updates all cached thread execution info (state, call stacks, etc).
|
||||
// The given override thread handle and context will be used in place of
|
||||
// sampled values for that thread.
|
||||
void UpdateThreadExecutionStates(uint32_t override_handle = 0,
|
||||
X64Context* override_context = nullptr);
|
||||
void UpdateThreadExecutionStates(
|
||||
uint32_t override_handle = 0,
|
||||
HostThreadContext* override_context = nullptr);
|
||||
|
||||
// Suspends all breakpoints, uninstalling them as required.
|
||||
// No breakpoints will be triggered until they are resumed.
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "xenia/base/x64_context.h"
|
||||
#include "xenia/base/host_thread_context.h"
|
||||
#include "xenia/cpu/function.h"
|
||||
|
||||
namespace xe {
|
||||
|
@ -83,8 +83,8 @@ class StackWalker {
|
|||
virtual size_t CaptureStackTrace(void* thread_handle,
|
||||
uint64_t* frame_host_pcs,
|
||||
size_t frame_offset, size_t frame_count,
|
||||
const X64Context* in_host_context,
|
||||
X64Context* out_host_context,
|
||||
const HostThreadContext* in_host_context,
|
||||
HostThreadContext* out_host_context,
|
||||
uint64_t* out_stack_hash = nullptr) = 0;
|
||||
|
||||
// Resolves symbol information for the given stack frames.
|
||||
|
|
|
@ -153,8 +153,8 @@ class Win32StackWalker : public StackWalker {
|
|||
|
||||
size_t CaptureStackTrace(void* thread_handle, uint64_t* frame_host_pcs,
|
||||
size_t frame_offset, size_t frame_count,
|
||||
const X64Context* in_host_context,
|
||||
X64Context* out_host_context,
|
||||
const HostThreadContext* in_host_context,
|
||||
HostThreadContext* out_host_context,
|
||||
uint64_t* out_stack_hash) override {
|
||||
// TODO(benvanik): use xstate?
|
||||
// https://msdn.microsoft.com/en-us/library/windows/desktop/hh134240(v=vs.85).aspx
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/x64_context.h"
|
||||
#include "xenia/base/host_thread_context.h"
|
||||
#include "xenia/cpu/thread.h"
|
||||
#include "xenia/cpu/thread_state.h"
|
||||
|
||||
|
@ -70,10 +70,10 @@ struct ThreadDebugInfo {
|
|||
// Last-sampled PPC context.
|
||||
// This is updated whenever the debugger stops.
|
||||
ppc::PPCContext guest_context;
|
||||
// Last-sampled host x64 context.
|
||||
// Last-sampled host context.
|
||||
// This is updated whenever the debugger stops and must be used instead of any
|
||||
// value taken from the StackWalker as it properly respects exception stacks.
|
||||
X64Context host_context;
|
||||
HostThreadContext host_context;
|
||||
|
||||
// A single frame in a call stack.
|
||||
struct Frame {
|
||||
|
|
|
@ -960,7 +960,7 @@ void DebugWindow::DrawRegistersPane() {
|
|||
auto reg = static_cast<X64Register>(i);
|
||||
ImGui::BeginGroup();
|
||||
ImGui::AlignTextToFramePadding();
|
||||
ImGui::Text("%3s", X64Context::GetRegisterName(reg));
|
||||
ImGui::Text("%3s", HostThreadContext::GetRegisterName(reg));
|
||||
ImGui::SameLine();
|
||||
ImGui::Dummy(ImVec2(4, 0));
|
||||
ImGui::SameLine();
|
||||
|
@ -985,7 +985,7 @@ void DebugWindow::DrawRegistersPane() {
|
|||
static_cast<X64Register>(static_cast<int>(X64Register::kXmm0) + i);
|
||||
ImGui::BeginGroup();
|
||||
ImGui::AlignTextToFramePadding();
|
||||
ImGui::Text("%5s", X64Context::GetRegisterName(reg));
|
||||
ImGui::Text("%5s", HostThreadContext::GetRegisterName(reg));
|
||||
ImGui::SameLine();
|
||||
ImGui::Dummy(ImVec2(4, 0));
|
||||
ImGui::SameLine();
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/x64_context.h"
|
||||
#include "xenia/base/host_thread_context.h"
|
||||
#include "xenia/cpu/breakpoint.h"
|
||||
#include "xenia/cpu/debug_listener.h"
|
||||
#include "xenia/cpu/processor.h"
|
||||
|
|
|
@ -24,9 +24,10 @@
|
|||
#include "xenia/base/literals.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/mapped_memory.h"
|
||||
#include "xenia/base/platform.h"
|
||||
#include "xenia/base/string.h"
|
||||
#include "xenia/cpu/backend/code_cache.h"
|
||||
#include "xenia/cpu/backend/x64/x64_backend.h"
|
||||
#include "xenia/cpu/backend/null_backend.h"
|
||||
#include "xenia/cpu/cpu_flags.h"
|
||||
#include "xenia/cpu/thread_state.h"
|
||||
#include "xenia/gpu/graphics_system.h"
|
||||
|
@ -50,6 +51,10 @@
|
|||
#include "xenia/vfs/devices/null_device.h"
|
||||
#include "xenia/vfs/devices/stfs_container_device.h"
|
||||
|
||||
#if XE_ARCH_AMD64
|
||||
#include "xenia/cpu/backend/x64/x64_backend.h"
|
||||
#endif // XE_ARCH
|
||||
|
||||
DEFINE_double(time_scalar, 1.0,
|
||||
"Scalar used to speed or slow time (1x, 2x, 1/2x, etc).",
|
||||
"General");
|
||||
|
@ -127,6 +132,7 @@ Emulator::~Emulator() {
|
|||
|
||||
X_STATUS Emulator::Setup(
|
||||
ui::Window* display_window, ui::ImGuiDrawer* imgui_drawer,
|
||||
bool require_cpu_backend,
|
||||
std::function<std::unique_ptr<apu::AudioSystem>(cpu::Processor*)>
|
||||
audio_system_factory,
|
||||
std::function<std::unique_ptr<gpu::GraphicsSystem>()>
|
||||
|
@ -160,19 +166,20 @@ X_STATUS Emulator::Setup(
|
|||
export_resolver_ = std::make_unique<xe::cpu::ExportResolver>();
|
||||
|
||||
std::unique_ptr<xe::cpu::backend::Backend> backend;
|
||||
if (!backend) {
|
||||
#if defined(XENIA_HAS_X64_BACKEND) && XENIA_HAS_X64_BACKEND
|
||||
if (cvars::cpu == "x64") {
|
||||
#if XE_ARCH_AMD64
|
||||
if (cvars::cpu == "x64") {
|
||||
backend.reset(new xe::cpu::backend::x64::X64Backend());
|
||||
}
|
||||
#endif // XE_ARCH
|
||||
if (cvars::cpu == "any") {
|
||||
if (!backend) {
|
||||
#if XE_ARCH_AMD64
|
||||
backend.reset(new xe::cpu::backend::x64::X64Backend());
|
||||
#endif // XE_ARCH
|
||||
}
|
||||
#endif // XENIA_HAS_X64_BACKEND
|
||||
if (cvars::cpu == "any") {
|
||||
#if defined(XENIA_HAS_X64_BACKEND) && XENIA_HAS_X64_BACKEND
|
||||
if (!backend) {
|
||||
backend.reset(new xe::cpu::backend::x64::X64Backend());
|
||||
}
|
||||
#endif // XENIA_HAS_X64_BACKEND
|
||||
}
|
||||
}
|
||||
if (!backend && !require_cpu_backend) {
|
||||
backend.reset(new xe::cpu::backend::NullBackend());
|
||||
}
|
||||
|
||||
// Initialize the CPU.
|
||||
|
|
|
@ -165,6 +165,7 @@ class Emulator {
|
|||
// functions.
|
||||
X_STATUS Setup(
|
||||
ui::Window* display_window, ui::ImGuiDrawer* imgui_drawer,
|
||||
bool require_cpu_backend,
|
||||
std::function<std::unique_ptr<apu::AudioSystem>(cpu::Processor*)>
|
||||
audio_system_factory,
|
||||
std::function<std::unique_ptr<gpu::GraphicsSystem>()>
|
||||
|
|
|
@ -497,7 +497,7 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
|
|||
TransferInvocation(const Transfer& transfer,
|
||||
const TransferShaderKey& shader_key)
|
||||
: transfer(transfer), shader_key(shader_key) {}
|
||||
bool operator<(const TransferInvocation& other_invocation) {
|
||||
bool operator<(const TransferInvocation& other_invocation) const {
|
||||
// TODO(Triang3l): See if it may be better to sort by the source in the
|
||||
// first place, especially when reading the same data multiple times (like
|
||||
// to write the stencil bits after depth) for better read locality.
|
||||
|
@ -639,7 +639,7 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
|
|||
DumpInvocation(const ResolveCopyDumpRectangle& rectangle,
|
||||
const DumpPipelineKey& pipeline_key)
|
||||
: rectangle(rectangle), pipeline_key(pipeline_key) {}
|
||||
bool operator<(const DumpInvocation& other_invocation) {
|
||||
bool operator<(const DumpInvocation& other_invocation) const {
|
||||
// Sort by the pipeline key primarily to reduce pipeline state (context)
|
||||
// switches.
|
||||
if (pipeline_key != other_invocation.pipeline_key) {
|
||||
|
|
|
@ -30,7 +30,6 @@ project("xenia-gpu-d3d12-trace-viewer")
|
|||
"xenia-base",
|
||||
"xenia-core",
|
||||
"xenia-cpu",
|
||||
"xenia-cpu-backend-x64",
|
||||
"xenia-gpu",
|
||||
"xenia-gpu-d3d12",
|
||||
"xenia-hid",
|
||||
|
@ -68,6 +67,11 @@ project("xenia-gpu-d3d12-trace-viewer")
|
|||
})
|
||||
end
|
||||
|
||||
filter("architecture:x86_64")
|
||||
links({
|
||||
"xenia-cpu-backend-x64",
|
||||
})
|
||||
|
||||
group("src")
|
||||
project("xenia-gpu-d3d12-trace-dump")
|
||||
uuid("686b859c-0046-44c4-a02c-41fc3fb75698")
|
||||
|
@ -79,7 +83,6 @@ project("xenia-gpu-d3d12-trace-dump")
|
|||
"xenia-base",
|
||||
"xenia-core",
|
||||
"xenia-cpu",
|
||||
"xenia-cpu-backend-x64",
|
||||
"xenia-gpu",
|
||||
"xenia-gpu-d3d12",
|
||||
"xenia-hid",
|
||||
|
@ -115,3 +118,8 @@ project("xenia-gpu-d3d12-trace-dump")
|
|||
"1>scratch/stdout-trace-dump.txt",
|
||||
})
|
||||
end
|
||||
|
||||
filter("architecture:x86_64")
|
||||
links({
|
||||
"xenia-cpu-backend-x64",
|
||||
})
|
||||
|
|
|
@ -942,7 +942,7 @@ void PrimitiveProcessor::Get16BitResetIndexUsage(
|
|||
is_ffff_simd =
|
||||
_mm_or_si128(is_ffff_simd, _mm_cmpeq_epi16(source_simd, ffff_simd));
|
||||
#elif XE_ARCH_ARM64
|
||||
is_reset_simd = vcorrq_u16(
|
||||
is_reset_simd = vorrq_u16(
|
||||
is_reset_simd, vceqq_u16(source_simd, reset_index_guest_endian_simd));
|
||||
is_ffff_simd = vmaxq_u16(is_ffff_simd, source_simd);
|
||||
#else
|
||||
|
|
|
@ -374,8 +374,14 @@ void RenderTargetCache::InitializeCommon() {
|
|||
RenderTargetKey(), RenderTargetKey()));
|
||||
}
|
||||
|
||||
void RenderTargetCache::ShutdownCommon() {
|
||||
void RenderTargetCache::DestroyAllRenderTargets(bool shutting_down) {
|
||||
ownership_ranges_.clear();
|
||||
if (!shutting_down) {
|
||||
ownership_ranges_.emplace(
|
||||
std::piecewise_construct, std::forward_as_tuple(uint32_t(0)),
|
||||
std::forward_as_tuple(xenos::kEdramTileCount, RenderTargetKey(),
|
||||
RenderTargetKey(), RenderTargetKey()));
|
||||
}
|
||||
|
||||
for (const auto& render_target_pair : render_targets_) {
|
||||
if (render_target_pair.second) {
|
||||
|
@ -385,6 +391,8 @@ void RenderTargetCache::ShutdownCommon() {
|
|||
render_targets_.clear();
|
||||
}
|
||||
|
||||
void RenderTargetCache::ShutdownCommon() { DestroyAllRenderTargets(true); }
|
||||
|
||||
void RenderTargetCache::ClearCache() {
|
||||
// Keep only render targets currently owning any EDRAM data.
|
||||
if (!render_targets_.empty()) {
|
||||
|
|
|
@ -193,6 +193,10 @@ class RenderTargetCache {
|
|||
// Call last in implementation-specific initialization (when things like path
|
||||
// are initialized by the implementation).
|
||||
void InitializeCommon();
|
||||
// May be called from the destructor, or from the implementation shutdown to
|
||||
// destroy all render targets before destroying what they depend on in the
|
||||
// implementation.
|
||||
void DestroyAllRenderTargets(bool shutting_down);
|
||||
// Call last in implementation-specific shutdown, also callable from the
|
||||
// destructor.
|
||||
void ShutdownCommon();
|
||||
|
|
|
@ -75,9 +75,6 @@ SpirvShaderTranslator::Features::Features(
|
|||
}
|
||||
}
|
||||
|
||||
const std::string SpirvShaderTranslator::kInterpolatorNamePrefix =
|
||||
"xe_interpolator_";
|
||||
|
||||
SpirvShaderTranslator::SpirvShaderTranslator(const Features& features)
|
||||
: features_(features) {}
|
||||
|
||||
|
@ -164,6 +161,8 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
type_float2_ = builder_->makeVectorType(type_float_, 2);
|
||||
type_float3_ = builder_->makeVectorType(type_float_, 3);
|
||||
type_float4_ = builder_->makeVectorType(type_float_, 4);
|
||||
type_interpolators_ = builder_->makeArrayType(
|
||||
type_float4_, builder_->makeUintConstant(xenos::kMaxInterpolators), 0);
|
||||
|
||||
const_int_0_ = builder_->makeIntConstant(0);
|
||||
id_vector_temp_.clear();
|
||||
|
@ -257,8 +256,9 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
"xe_uniform_system_constants");
|
||||
builder_->addDecoration(uniform_system_constants_,
|
||||
spv::DecorationDescriptorSet,
|
||||
kDescriptorSetSystemConstants);
|
||||
builder_->addDecoration(uniform_system_constants_, spv::DecorationBinding, 0);
|
||||
int(kDescriptorSetConstants));
|
||||
builder_->addDecoration(uniform_system_constants_, spv::DecorationBinding,
|
||||
int(kConstantBufferSystem));
|
||||
if (features_.spirv_version >= spv::Spv_1_4) {
|
||||
main_interface_.push_back(uniform_system_constants_);
|
||||
}
|
||||
|
@ -285,12 +285,13 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
uniform_float_constants_ = builder_->createVariable(
|
||||
spv::NoPrecision, spv::StorageClassUniform, type_float_constants,
|
||||
"xe_uniform_float_constants");
|
||||
builder_->addDecoration(uniform_float_constants_,
|
||||
spv::DecorationDescriptorSet,
|
||||
int(kDescriptorSetConstants));
|
||||
builder_->addDecoration(
|
||||
uniform_float_constants_, spv::DecorationDescriptorSet,
|
||||
int(is_pixel_shader() ? kDescriptorSetFloatConstantsPixel
|
||||
: kDescriptorSetFloatConstantsVertex));
|
||||
builder_->addDecoration(uniform_float_constants_, spv::DecorationBinding,
|
||||
0);
|
||||
uniform_float_constants_, spv::DecorationBinding,
|
||||
int(is_pixel_shader() ? kConstantBufferFloatPixel
|
||||
: kConstantBufferFloatVertex));
|
||||
if (features_.spirv_version >= spv::Spv_1_4) {
|
||||
main_interface_.push_back(uniform_float_constants_);
|
||||
}
|
||||
|
@ -326,9 +327,9 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
"xe_uniform_bool_loop_constants");
|
||||
builder_->addDecoration(uniform_bool_loop_constants_,
|
||||
spv::DecorationDescriptorSet,
|
||||
int(kDescriptorSetBoolLoopConstants));
|
||||
int(kDescriptorSetConstants));
|
||||
builder_->addDecoration(uniform_bool_loop_constants_, spv::DecorationBinding,
|
||||
0);
|
||||
int(kConstantBufferBoolLoop));
|
||||
if (features_.spirv_version >= spv::Spv_1_4) {
|
||||
main_interface_.push_back(uniform_bool_loop_constants_);
|
||||
}
|
||||
|
@ -352,8 +353,9 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
"xe_uniform_fetch_constants");
|
||||
builder_->addDecoration(uniform_fetch_constants_,
|
||||
spv::DecorationDescriptorSet,
|
||||
int(kDescriptorSetFetchConstants));
|
||||
builder_->addDecoration(uniform_fetch_constants_, spv::DecorationBinding, 0);
|
||||
int(kDescriptorSetConstants));
|
||||
builder_->addDecoration(uniform_fetch_constants_, spv::DecorationBinding,
|
||||
int(kConstantBufferFetch));
|
||||
if (features_.spirv_version >= spv::Spv_1_4) {
|
||||
main_interface_.push_back(uniform_fetch_constants_);
|
||||
}
|
||||
|
@ -639,6 +641,16 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
|
|||
entry_point->addIdOperand(interface_id);
|
||||
}
|
||||
|
||||
// Specify the binding indices for samplers when the number of textures is
|
||||
// known, as samplers are located after images in the texture descriptor set.
|
||||
size_t texture_binding_count = texture_bindings_.size();
|
||||
size_t sampler_binding_count = sampler_bindings_.size();
|
||||
for (size_t i = 0; i < sampler_binding_count; ++i) {
|
||||
builder_->addDecoration(sampler_bindings_[i].variable,
|
||||
spv::DecorationBinding,
|
||||
int(texture_binding_count + i));
|
||||
}
|
||||
|
||||
// TODO(Triang3l): Avoid copy?
|
||||
std::vector<unsigned int> module_uints;
|
||||
builder_->dump(module_uints);
|
||||
|
@ -1056,17 +1068,15 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderBeforeMain() {
|
|||
main_interface_.push_back(input_vertex_index_);
|
||||
}
|
||||
|
||||
// Create the Xenia-specific outputs.
|
||||
// TODO(Triang3l): Change to an interpolator array.
|
||||
for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) {
|
||||
spv::Id interpolator = builder_->createVariable(
|
||||
spv::NoPrecision, spv::StorageClassOutput, type_float4_,
|
||||
(kInterpolatorNamePrefix + std::to_string(i)).c_str());
|
||||
input_output_interpolators_[i] = interpolator;
|
||||
builder_->addDecoration(interpolator, spv::DecorationLocation, int(i));
|
||||
builder_->addDecoration(interpolator, spv::DecorationInvariant);
|
||||
main_interface_.push_back(interpolator);
|
||||
}
|
||||
// Create the interpolator output.
|
||||
input_output_interpolators_ =
|
||||
builder_->createVariable(spv::NoPrecision, spv::StorageClassOutput,
|
||||
type_interpolators_, "xe_out_interpolators");
|
||||
builder_->addDecoration(input_output_interpolators_, spv::DecorationLocation,
|
||||
0);
|
||||
builder_->addDecoration(input_output_interpolators_,
|
||||
spv::DecorationInvariant);
|
||||
main_interface_.push_back(input_output_interpolators_);
|
||||
|
||||
// Create the gl_PerVertex output for used system outputs.
|
||||
std::vector<spv::Id> struct_per_vertex_members;
|
||||
|
@ -1095,7 +1105,12 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
|
|||
|
||||
// Zero the interpolators.
|
||||
for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) {
|
||||
builder_->createStore(const_float4_0_, input_output_interpolators_[i]);
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(builder_->makeIntConstant(int(i)));
|
||||
builder_->createStore(const_float4_0_,
|
||||
builder_->createAccessChain(
|
||||
spv::StorageClassOutput,
|
||||
input_output_interpolators_, id_vector_temp_));
|
||||
}
|
||||
|
||||
// Load the vertex index or the tessellation parameters.
|
||||
|
@ -1269,17 +1284,13 @@ void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() {
|
|||
}
|
||||
|
||||
void SpirvShaderTranslator::StartFragmentShaderBeforeMain() {
|
||||
// Interpolator inputs.
|
||||
uint32_t interpolator_count =
|
||||
std::min(xenos::kMaxInterpolators, register_count());
|
||||
for (uint32_t i = 0; i < interpolator_count; ++i) {
|
||||
spv::Id interpolator = builder_->createVariable(
|
||||
spv::NoPrecision, spv::StorageClassInput, type_float4_,
|
||||
(kInterpolatorNamePrefix + std::to_string(i)).c_str());
|
||||
input_output_interpolators_[i] = interpolator;
|
||||
builder_->addDecoration(interpolator, spv::DecorationLocation, int(i));
|
||||
main_interface_.push_back(interpolator);
|
||||
}
|
||||
// Interpolator input.
|
||||
input_output_interpolators_ =
|
||||
builder_->createVariable(spv::NoPrecision, spv::StorageClassInput,
|
||||
type_interpolators_, "xe_in_interpolators");
|
||||
builder_->addDecoration(input_output_interpolators_, spv::DecorationLocation,
|
||||
0);
|
||||
main_interface_.push_back(input_output_interpolators_);
|
||||
|
||||
bool param_gen_needed = GetPsParamGenInterpolator() != UINT32_MAX;
|
||||
|
||||
|
@ -1347,7 +1358,10 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
|
|||
// Register array element.
|
||||
id_vector_temp_.push_back(builder_->makeIntConstant(int(i)));
|
||||
builder_->createStore(
|
||||
builder_->createLoad(input_output_interpolators_[i], spv::NoPrecision),
|
||||
builder_->createLoad(builder_->createAccessChain(
|
||||
spv::StorageClassInput,
|
||||
input_output_interpolators_, id_vector_temp_),
|
||||
spv::NoPrecision),
|
||||
builder_->createAccessChain(spv::StorageClassFunction,
|
||||
var_main_registers_, id_vector_temp_));
|
||||
}
|
||||
|
@ -1824,7 +1838,12 @@ void SpirvShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
} break;
|
||||
case InstructionStorageTarget::kInterpolator:
|
||||
assert_true(is_vertex_shader());
|
||||
target_pointer = input_output_interpolators_[result.storage_index];
|
||||
id_vector_temp_util_.clear();
|
||||
id_vector_temp_util_.push_back(
|
||||
builder_->makeIntConstant(int(result.storage_index)));
|
||||
target_pointer = builder_->createAccessChain(spv::StorageClassOutput,
|
||||
input_output_interpolators_,
|
||||
id_vector_temp_util_);
|
||||
break;
|
||||
case InstructionStorageTarget::kPosition:
|
||||
assert_true(is_vertex_shader());
|
||||
|
|
|
@ -131,6 +131,16 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
float color_exp_bias[4];
|
||||
};
|
||||
|
||||
enum ConstantBuffer : uint32_t {
|
||||
kConstantBufferSystem,
|
||||
kConstantBufferFloatVertex,
|
||||
kConstantBufferFloatPixel,
|
||||
kConstantBufferBoolLoop,
|
||||
kConstantBufferFetch,
|
||||
|
||||
kConstantBufferCount,
|
||||
};
|
||||
|
||||
// The minimum limit for maxPerStageDescriptorStorageBuffers is 4, and for
|
||||
// maxStorageBufferRange it's 128 MB. These are the values of those limits on
|
||||
// Arm Mali as of November 2020. Xenia needs 512 MB shared memory to be bound,
|
||||
|
@ -159,31 +169,28 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
|
||||
// Never changed.
|
||||
kDescriptorSetSharedMemoryAndEdram,
|
||||
// Pretty rarely used and rarely changed - flow control constants.
|
||||
kDescriptorSetBoolLoopConstants,
|
||||
// May stay the same across many draws.
|
||||
kDescriptorSetSystemConstants,
|
||||
// Less frequently changed (per-material).
|
||||
kDescriptorSetFloatConstantsPixel,
|
||||
// Quite frequently changed (for one object drawn multiple times, for
|
||||
// instance - may contain projection matrices).
|
||||
kDescriptorSetFloatConstantsVertex,
|
||||
// Very frequently changed, especially for UI draws, and for models drawn in
|
||||
// multiple parts - contains vertex and texture fetch constants.
|
||||
kDescriptorSetFetchConstants,
|
||||
// Changed in case of changes in the data.
|
||||
kDescriptorSetConstants,
|
||||
|
||||
// Mutable part of the pipeline layout:
|
||||
kDescriptorSetMutableLayoutsStart,
|
||||
|
||||
// Rarely used at all, but may be changed at an unpredictable rate when
|
||||
// vertex textures are used.
|
||||
kDescriptorSetSamplersVertex = kDescriptorSetMutableLayoutsStart,
|
||||
kDescriptorSetTexturesVertex,
|
||||
// vertex textures are used (for example, for bones of an object, which may
|
||||
// consist of multiple draw commands with different materials).
|
||||
kDescriptorSetTexturesVertex = kDescriptorSetMutableLayoutsStart,
|
||||
// Per-material textures.
|
||||
kDescriptorSetSamplersPixel,
|
||||
kDescriptorSetTexturesPixel,
|
||||
|
||||
kDescriptorSetCount,
|
||||
};
|
||||
static_assert(
|
||||
kDescriptorSetCount <= 4,
|
||||
"The number of descriptor sets used by translated shaders must be within "
|
||||
"the minimum Vulkan maxBoundDescriptorSets requirement of 4, which is "
|
||||
"the limit on most GPUs used in Android devices - Arm Mali, Imagination "
|
||||
"PowerVR, Qualcomm Adreno 6xx and older, as well as on old PC Nvidia "
|
||||
"drivers");
|
||||
|
||||
// "Xenia Emulator Microcode Translator".
|
||||
// https://github.com/KhronosGroup/SPIRV-Headers/blob/c43a43c7cc3af55910b9bec2a71e3e8a622443cf/include/spirv/spir-v.xml#L79
|
||||
|
@ -522,6 +529,8 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
spv::Id type_float_vectors_[4];
|
||||
};
|
||||
|
||||
spv::Id type_interpolators_;
|
||||
|
||||
spv::Id const_int_0_;
|
||||
spv::Id const_int4_0_;
|
||||
spv::Id const_uint_0_;
|
||||
|
@ -582,11 +591,12 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
// PS, only when needed - bool.
|
||||
spv::Id input_front_facing_;
|
||||
|
||||
// In vertex or tessellation evaluation shaders - outputs, always
|
||||
// xenos::kMaxInterpolators.
|
||||
// In pixel shaders - inputs, min(xenos::kMaxInterpolators, register_count()).
|
||||
spv::Id input_output_interpolators_[xenos::kMaxInterpolators];
|
||||
static const std::string kInterpolatorNamePrefix;
|
||||
// VS output or PS input, only when needed - type_interpolators_.
|
||||
// The Qualcomm Adreno driver has strict requirements for stage linkage - if
|
||||
// this is an array in one stage, it must be an array in the other (in case of
|
||||
// Xenia, including geometry shaders); it must not be an array in one and just
|
||||
// elements in consecutive locations in another.
|
||||
spv::Id input_output_interpolators_;
|
||||
|
||||
enum OutputPerVertexMember : unsigned int {
|
||||
kOutputPerVertexMemberPosition,
|
||||
|
|
|
@ -2573,10 +2573,10 @@ size_t SpirvShaderTranslator::FindOrAddSamplerBinding(
|
|||
builder_->makeSamplerType(), name.str().c_str());
|
||||
builder_->addDecoration(
|
||||
new_sampler_binding.variable, spv::DecorationDescriptorSet,
|
||||
int(is_vertex_shader() ? kDescriptorSetSamplersVertex
|
||||
: kDescriptorSetSamplersPixel));
|
||||
builder_->addDecoration(new_sampler_binding.variable, spv::DecorationBinding,
|
||||
int(new_sampler_binding_index));
|
||||
int(is_vertex_shader() ? kDescriptorSetTexturesVertex
|
||||
: kDescriptorSetTexturesPixel));
|
||||
// The binding indices will be specified later after all textures are added as
|
||||
// samplers are located after images in the descriptor set.
|
||||
if (features_.spirv_version >= spv::Spv_1_4) {
|
||||
main_interface_.push_back(new_sampler_binding.variable);
|
||||
}
|
||||
|
|
|
@ -95,8 +95,8 @@ bool TraceDump::Setup() {
|
|||
// Create the emulator but don't initialize so we can setup the window.
|
||||
emulator_ = std::make_unique<Emulator>("", "", "", "");
|
||||
X_STATUS result = emulator_->Setup(
|
||||
nullptr, nullptr, nullptr, [this]() { return CreateGraphicsSystem(); },
|
||||
nullptr);
|
||||
nullptr, nullptr, false, nullptr,
|
||||
[this]() { return CreateGraphicsSystem(); }, nullptr);
|
||||
if (XFAILED(result)) {
|
||||
XELOGE("Failed to setup emulator: {:08X}", result);
|
||||
return false;
|
||||
|
|
|
@ -125,7 +125,7 @@ bool TraceViewer::Setup() {
|
|||
// Create the emulator but don't initialize so we can setup the window.
|
||||
emulator_ = std::make_unique<Emulator>("", "", "", "");
|
||||
X_STATUS result = emulator_->Setup(
|
||||
window_.get(), nullptr, nullptr,
|
||||
window_.get(), nullptr, false, nullptr,
|
||||
[this]() { return CreateGraphicsSystem(); }, nullptr);
|
||||
if (XFAILED(result)) {
|
||||
XELOGE("Failed to setup emulator: {:08X}", result);
|
||||
|
|
|
@ -34,7 +34,6 @@ project("xenia-gpu-vulkan-trace-viewer")
|
|||
"xenia-base",
|
||||
"xenia-core",
|
||||
"xenia-cpu",
|
||||
"xenia-cpu-backend-x64",
|
||||
"xenia-gpu",
|
||||
"xenia-gpu-vulkan",
|
||||
"xenia-hid",
|
||||
|
@ -66,6 +65,11 @@ project("xenia-gpu-vulkan-trace-viewer")
|
|||
"../../ui/windowed_app_main_"..platform_suffix..".cc",
|
||||
})
|
||||
|
||||
filter("architecture:x86_64")
|
||||
links({
|
||||
"xenia-cpu-backend-x64",
|
||||
})
|
||||
|
||||
filter("platforms:Linux")
|
||||
links({
|
||||
"X11",
|
||||
|
@ -95,7 +99,6 @@ project("xenia-gpu-vulkan-trace-dump")
|
|||
"xenia-base",
|
||||
"xenia-core",
|
||||
"xenia-cpu",
|
||||
"xenia-cpu-backend-x64",
|
||||
"xenia-gpu",
|
||||
"xenia-gpu-vulkan",
|
||||
"xenia-hid",
|
||||
|
@ -126,6 +129,11 @@ project("xenia-gpu-vulkan-trace-dump")
|
|||
"../../base/console_app_main_"..platform_suffix..".cc",
|
||||
})
|
||||
|
||||
filter("architecture:x86_64")
|
||||
links({
|
||||
"xenia-cpu-backend-x64",
|
||||
})
|
||||
|
||||
filter("platforms:Linux")
|
||||
links({
|
||||
"X11",
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -36,7 +36,7 @@
|
|||
#include "xenia/gpu/vulkan/vulkan_texture_cache.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/kernel/kernel_state.h"
|
||||
#include "xenia/ui/vulkan/single_type_descriptor_set_allocator.h"
|
||||
#include "xenia/ui/vulkan/linked_type_descriptor_set_allocator.h"
|
||||
#include "xenia/ui/vulkan/vulkan_presenter.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h"
|
||||
|
@ -49,10 +49,6 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
public:
|
||||
// Single-descriptor layouts for use within a single frame.
|
||||
enum class SingleTransientDescriptorLayout {
|
||||
kUniformBufferGuestVertex,
|
||||
kUniformBufferFragment,
|
||||
kUniformBufferGuestShader,
|
||||
kUniformBufferSystemConstants,
|
||||
kUniformBufferCompute,
|
||||
kStorageBufferCompute,
|
||||
kCount,
|
||||
|
@ -231,9 +227,9 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
VkDescriptorSet& descriptor_set_out);
|
||||
|
||||
// The returned reference is valid until a cache clear.
|
||||
VkDescriptorSetLayout GetTextureDescriptorSetLayout(bool is_samplers,
|
||||
bool is_vertex,
|
||||
size_t binding_count);
|
||||
VkDescriptorSetLayout GetTextureDescriptorSetLayout(bool is_vertex,
|
||||
size_t texture_count,
|
||||
size_t sampler_count);
|
||||
// The returned reference is valid until a cache clear.
|
||||
const VulkanPipelineCache::PipelineLayoutProvider* GetPipelineLayout(
|
||||
size_t texture_count_pixel, size_t sampler_count_pixel,
|
||||
|
@ -298,12 +294,11 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
union TextureDescriptorSetLayoutKey {
|
||||
uint32_t key;
|
||||
struct {
|
||||
// 0 - sampled image descriptors, 1 - sampler descriptors.
|
||||
uint32_t is_samplers : 1;
|
||||
// If texture and sampler counts are both 0, use
|
||||
// descriptor_set_layout_empty_ instead as these are owning references.
|
||||
uint32_t texture_count : 16;
|
||||
uint32_t sampler_count : 15;
|
||||
uint32_t is_vertex : 1;
|
||||
// For 0, use descriptor_set_layout_empty_ instead as these are owning
|
||||
// references.
|
||||
uint32_t binding_count : 30;
|
||||
};
|
||||
|
||||
TextureDescriptorSetLayoutKey() : key(0) {
|
||||
|
@ -354,40 +349,26 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
explicit PipelineLayout(
|
||||
VkPipelineLayout pipeline_layout,
|
||||
VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref,
|
||||
VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref,
|
||||
VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref,
|
||||
VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref)
|
||||
VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref)
|
||||
: pipeline_layout_(pipeline_layout),
|
||||
descriptor_set_layout_textures_vertex_ref_(
|
||||
descriptor_set_layout_textures_vertex_ref),
|
||||
descriptor_set_layout_samplers_vertex_ref_(
|
||||
descriptor_set_layout_samplers_vertex_ref),
|
||||
descriptor_set_layout_textures_pixel_ref_(
|
||||
descriptor_set_layout_textures_pixel_ref),
|
||||
descriptor_set_layout_samplers_pixel_ref_(
|
||||
descriptor_set_layout_samplers_pixel_ref) {}
|
||||
descriptor_set_layout_textures_pixel_ref) {}
|
||||
VkPipelineLayout GetPipelineLayout() const override {
|
||||
return pipeline_layout_;
|
||||
}
|
||||
VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref() const {
|
||||
return descriptor_set_layout_textures_vertex_ref_;
|
||||
}
|
||||
VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref() const {
|
||||
return descriptor_set_layout_samplers_vertex_ref_;
|
||||
}
|
||||
VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref() const {
|
||||
return descriptor_set_layout_textures_pixel_ref_;
|
||||
}
|
||||
VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref() const {
|
||||
return descriptor_set_layout_samplers_pixel_ref_;
|
||||
}
|
||||
|
||||
private:
|
||||
VkPipelineLayout pipeline_layout_;
|
||||
VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref_;
|
||||
VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref_;
|
||||
VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref_;
|
||||
VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref_;
|
||||
};
|
||||
|
||||
struct UsedSingleTransientDescriptor {
|
||||
|
@ -458,16 +439,20 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
uint32_t used_texture_mask);
|
||||
bool UpdateBindings(const VulkanShader* vertex_shader,
|
||||
const VulkanShader* pixel_shader);
|
||||
// Allocates a descriptor set and fills the VkWriteDescriptorSet structure.
|
||||
// The descriptor set layout must be the one for the given is_samplers,
|
||||
// is_vertex, binding_count (from GetTextureDescriptorSetLayout - may be
|
||||
// Allocates a descriptor set and fills one or two VkWriteDescriptorSet
|
||||
// structure instances (for images and samplers).
|
||||
// The descriptor set layout must be the one for the given is_vertex,
|
||||
// texture_count, sampler_count (from GetTextureDescriptorSetLayout - may be
|
||||
// already available at the moment of the call, no need to locate it again).
|
||||
// Returns whether the allocation was successful.
|
||||
bool WriteTransientTextureBindings(
|
||||
bool is_samplers, bool is_vertex, uint32_t binding_count,
|
||||
// Returns how many VkWriteDescriptorSet structure instances have been
|
||||
// written, or 0 if there was a failure to allocate the descriptor set or no
|
||||
// bindings were requested.
|
||||
uint32_t WriteTransientTextureBindings(
|
||||
bool is_vertex, uint32_t texture_count, uint32_t sampler_count,
|
||||
VkDescriptorSetLayout descriptor_set_layout,
|
||||
const VkDescriptorImageInfo* image_info,
|
||||
VkWriteDescriptorSet& write_descriptor_set_out);
|
||||
const VkDescriptorImageInfo* texture_image_info,
|
||||
const VkDescriptorImageInfo* sampler_image_info,
|
||||
VkWriteDescriptorSet* descriptor_set_writes_out);
|
||||
|
||||
bool device_lost_ = false;
|
||||
|
||||
|
@ -530,6 +515,7 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ =
|
||||
VK_NULL_HANDLE;
|
||||
VkDescriptorSetLayout descriptor_set_layout_constants_ = VK_NULL_HANDLE;
|
||||
std::array<VkDescriptorSetLayout,
|
||||
size_t(SingleTransientDescriptorLayout::kCount)>
|
||||
descriptor_set_layouts_single_transient_{};
|
||||
|
@ -543,19 +529,27 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
PipelineLayoutKey::Hasher>
|
||||
pipeline_layouts_;
|
||||
|
||||
ui::vulkan::SingleTypeDescriptorSetAllocator
|
||||
// No specific reason for 32768, just the "too much" descriptor count from
|
||||
// Direct3D 12 PIX warnings.
|
||||
static constexpr uint32_t kLinkedTypeDescriptorPoolSetCount = 32768;
|
||||
static const VkDescriptorPoolSize kDescriptorPoolSizeUniformBuffer;
|
||||
static const VkDescriptorPoolSize kDescriptorPoolSizeStorageBuffer;
|
||||
static const VkDescriptorPoolSize kDescriptorPoolSizeTextures[2];
|
||||
ui::vulkan::LinkedTypeDescriptorSetAllocator
|
||||
transient_descriptor_allocator_uniform_buffer_;
|
||||
ui::vulkan::SingleTypeDescriptorSetAllocator
|
||||
ui::vulkan::LinkedTypeDescriptorSetAllocator
|
||||
transient_descriptor_allocator_storage_buffer_;
|
||||
std::deque<UsedSingleTransientDescriptor> single_transient_descriptors_used_;
|
||||
std::array<std::vector<VkDescriptorSet>,
|
||||
size_t(SingleTransientDescriptorLayout::kCount)>
|
||||
single_transient_descriptors_free_;
|
||||
// <Usage frame, set>.
|
||||
std::deque<std::pair<uint64_t, VkDescriptorSet>>
|
||||
constants_transient_descriptors_used_;
|
||||
std::vector<VkDescriptorSet> constants_transient_descriptors_free_;
|
||||
|
||||
ui::vulkan::SingleTypeDescriptorSetAllocator
|
||||
transient_descriptor_allocator_sampled_image_;
|
||||
ui::vulkan::SingleTypeDescriptorSetAllocator
|
||||
transient_descriptor_allocator_sampler_;
|
||||
ui::vulkan::LinkedTypeDescriptorSetAllocator
|
||||
transient_descriptor_allocator_textures_;
|
||||
std::deque<UsedTextureTransientDescriptorSet>
|
||||
texture_transient_descriptor_sets_used_;
|
||||
std::unordered_map<TextureDescriptorSetLayoutKey,
|
||||
|
@ -701,6 +695,11 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
|
||||
// Pipeline layout of the current guest graphics pipeline.
|
||||
const PipelineLayout* current_guest_graphics_pipeline_layout_;
|
||||
VkDescriptorBufferInfo current_constant_buffer_infos_
|
||||
[SpirvShaderTranslator::kConstantBufferCount];
|
||||
// Whether up-to-date data has been written to constant (uniform) buffers, and
|
||||
// the buffer infos in current_constant_buffer_infos_ point to them.
|
||||
uint32_t current_constant_buffers_up_to_date_;
|
||||
VkDescriptorSet current_graphics_descriptor_sets_
|
||||
[SpirvShaderTranslator::kDescriptorSetCount];
|
||||
// Whether descriptor sets in current_graphics_descriptor_sets_ point to
|
||||
|
|
|
@ -661,6 +661,12 @@ void VulkanRenderTargetCache::Shutdown(bool from_destructor) {
|
|||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
|
||||
// Destroy all render targets before the descriptor set pool is destroyed -
|
||||
// may happen if shutting down the VulkanRenderTargetCache by destroying it,
|
||||
// so ShutdownCommon is called by the RenderTargetCache destructor, when it's
|
||||
// already too late.
|
||||
DestroyAllRenderTargets(true);
|
||||
|
||||
for (const auto& dump_pipeline_pair : dump_pipelines_) {
|
||||
// May be null to prevent recreation attempts.
|
||||
if (dump_pipeline_pair.second != VK_NULL_HANDLE) {
|
||||
|
|
|
@ -647,7 +647,7 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
|||
TransferInvocation(const Transfer& transfer,
|
||||
const TransferShaderKey& shader_key)
|
||||
: transfer(transfer), shader_key(shader_key) {}
|
||||
bool operator<(const TransferInvocation& other_invocation) {
|
||||
bool operator<(const TransferInvocation& other_invocation) const {
|
||||
// TODO(Triang3l): See if it may be better to sort by the source in the
|
||||
// first place, especially when reading the same data multiple times (like
|
||||
// to write the stencil bits after depth) for better read locality.
|
||||
|
@ -784,7 +784,7 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
|||
DumpInvocation(const ResolveCopyDumpRectangle& rectangle,
|
||||
const DumpPipelineKey& pipeline_key)
|
||||
: rectangle(rectangle), pipeline_key(pipeline_key) {}
|
||||
bool operator<(const DumpInvocation& other_invocation) {
|
||||
bool operator<(const DumpInvocation& other_invocation) const {
|
||||
// Sort by the pipeline key primarily to reduce pipeline state (context)
|
||||
// switches.
|
||||
if (pipeline_key != other_invocation.pipeline_key) {
|
||||
|
|
|
@ -0,0 +1,415 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/ui/vulkan/linked_type_descriptor_set_allocator.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <utility>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
void LinkedTypeDescriptorSetAllocator::Reset() {
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorPool, device,
|
||||
page_usable_latest_.pool);
|
||||
page_usable_latest_.descriptors_remaining.reset();
|
||||
for (const std::pair<const uint32_t, Page>& page_pair : pages_usable_) {
|
||||
dfn.vkDestroyDescriptorPool(device, page_pair.second.pool, nullptr);
|
||||
}
|
||||
pages_usable_.clear();
|
||||
for (VkDescriptorPool pool : pages_full_) {
|
||||
dfn.vkDestroyDescriptorPool(device, pool, nullptr);
|
||||
}
|
||||
pages_full_.clear();
|
||||
}
|
||||
|
||||
VkDescriptorSet LinkedTypeDescriptorSetAllocator::Allocate(
|
||||
VkDescriptorSetLayout descriptor_set_layout,
|
||||
const VkDescriptorPoolSize* descriptor_counts,
|
||||
uint32_t descriptor_type_count) {
|
||||
assert_not_zero(descriptor_type_count);
|
||||
#ifndef NDEBUG
|
||||
for (uint32_t i = 0; i < descriptor_type_count; ++i) {
|
||||
const VkDescriptorPoolSize& descriptor_count_for_type =
|
||||
descriptor_counts[i];
|
||||
assert_not_zero(descriptor_count_for_type.descriptorCount);
|
||||
for (uint32_t j = 0; j < i; ++j) {
|
||||
assert_true(descriptor_counts[j].type != descriptor_count_for_type.type);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
|
||||
VkDescriptorSetAllocateInfo descriptor_set_allocate_info;
|
||||
descriptor_set_allocate_info.sType =
|
||||
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
|
||||
descriptor_set_allocate_info.pNext = nullptr;
|
||||
descriptor_set_allocate_info.descriptorSetCount = 1;
|
||||
descriptor_set_allocate_info.pSetLayouts = &descriptor_set_layout;
|
||||
VkDescriptorSet descriptor_set;
|
||||
|
||||
// Check if more descriptors have been requested than a page can hold, or
|
||||
// descriptors of types not provided by this allocator, and if that's the
|
||||
// case, create a dedicated pool for this allocation.
|
||||
bool dedicated_descriptor_pool_needed = false;
|
||||
for (uint32_t i = 0; i < descriptor_type_count; ++i) {
|
||||
const VkDescriptorPoolSize& descriptor_count_for_type =
|
||||
descriptor_counts[i];
|
||||
// If the type is one that's not supported by the allocator, a dedicated
|
||||
// pool is required. If it's supported, and the allocator has large enough
|
||||
// pools to hold the requested number of descriptors,
|
||||
// dedicated_descriptor_pool_needed will be set to false for this iteration,
|
||||
// and the loop will continue. Otherwise, if that doesn't happen, a
|
||||
// dedicated pool is required.
|
||||
dedicated_descriptor_pool_needed = true;
|
||||
for (uint32_t j = 0; j < descriptor_pool_size_count_; ++j) {
|
||||
const VkDescriptorPoolSize& descriptor_pool_size =
|
||||
descriptor_pool_sizes_[j];
|
||||
if (descriptor_count_for_type.type != descriptor_pool_size.type) {
|
||||
continue;
|
||||
}
|
||||
if (descriptor_count_for_type.descriptorCount <=
|
||||
descriptor_pool_size.descriptorCount) {
|
||||
// For this type, pages can hold enough descriptors.
|
||||
dedicated_descriptor_pool_needed = false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (dedicated_descriptor_pool_needed) {
|
||||
// For at least one requested type, pages can't hold enough descriptors.
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (dedicated_descriptor_pool_needed) {
|
||||
VkDescriptorPoolCreateInfo dedicated_descriptor_pool_create_info;
|
||||
dedicated_descriptor_pool_create_info.sType =
|
||||
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
||||
dedicated_descriptor_pool_create_info.pNext = nullptr;
|
||||
dedicated_descriptor_pool_create_info.flags = 0;
|
||||
dedicated_descriptor_pool_create_info.maxSets = 1;
|
||||
dedicated_descriptor_pool_create_info.poolSizeCount = descriptor_type_count;
|
||||
dedicated_descriptor_pool_create_info.pPoolSizes = descriptor_counts;
|
||||
VkDescriptorPool dedicated_descriptor_pool;
|
||||
if (dfn.vkCreateDescriptorPool(
|
||||
device, &dedicated_descriptor_pool_create_info, nullptr,
|
||||
&dedicated_descriptor_pool) != VK_SUCCESS) {
|
||||
XELOGE(
|
||||
"LinkedTypeDescriptorSetAllocator: Failed to create a dedicated "
|
||||
"descriptor pool for a descriptor set that is too large for a pool "
|
||||
"page");
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
descriptor_set_allocate_info.descriptorPool = dedicated_descriptor_pool;
|
||||
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
|
||||
&descriptor_set) != VK_SUCCESS) {
|
||||
XELOGE(
|
||||
"LinkedTypeDescriptorSetAllocator: Failed to allocate descriptors in "
|
||||
"a dedicated pool");
|
||||
dfn.vkDestroyDescriptorPool(device, dedicated_descriptor_pool, nullptr);
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
pages_full_.push_back(dedicated_descriptor_pool);
|
||||
return descriptor_set;
|
||||
}
|
||||
|
||||
// Try allocating from the latest page an allocation has happened from, to
|
||||
// avoid detaching from the map and re-attaching for every allocation.
|
||||
if (page_usable_latest_.pool != VK_NULL_HANDLE) {
|
||||
assert_not_zero(page_usable_latest_.descriptor_sets_remaining);
|
||||
bool allocate_from_latest_page = true;
|
||||
bool latest_page_becomes_full =
|
||||
page_usable_latest_.descriptor_sets_remaining == 1;
|
||||
for (uint32_t i = 0; i < descriptor_type_count; ++i) {
|
||||
const VkDescriptorPoolSize& descriptor_count_for_type =
|
||||
descriptor_counts[i];
|
||||
for (uint32_t j = 0; j < descriptor_pool_size_count_; ++j) {
|
||||
const VkDescriptorPoolSize& descriptors_remaining_for_type =
|
||||
page_usable_latest_.descriptors_remaining[j];
|
||||
if (descriptor_count_for_type.type !=
|
||||
descriptors_remaining_for_type.type) {
|
||||
continue;
|
||||
}
|
||||
if (descriptor_count_for_type.descriptorCount >=
|
||||
descriptors_remaining_for_type.descriptorCount) {
|
||||
if (descriptor_count_for_type.descriptorCount >
|
||||
descriptors_remaining_for_type.descriptorCount) {
|
||||
allocate_from_latest_page = false;
|
||||
break;
|
||||
}
|
||||
latest_page_becomes_full = true;
|
||||
}
|
||||
}
|
||||
if (!allocate_from_latest_page) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (allocate_from_latest_page) {
|
||||
descriptor_set_allocate_info.descriptorPool = page_usable_latest_.pool;
|
||||
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
|
||||
&descriptor_set) != VK_SUCCESS) {
|
||||
descriptor_set = VK_NULL_HANDLE;
|
||||
// Failed to allocate internally even though there should be enough
|
||||
// space, don't try to allocate from this pool again at all.
|
||||
latest_page_becomes_full = true;
|
||||
}
|
||||
if (latest_page_becomes_full) {
|
||||
pages_full_.push_back(page_usable_latest_.pool);
|
||||
page_usable_latest_.pool = VK_NULL_HANDLE;
|
||||
page_usable_latest_.descriptors_remaining.reset();
|
||||
} else {
|
||||
--page_usable_latest_.descriptor_sets_remaining;
|
||||
for (uint32_t i = 0; i < descriptor_type_count; ++i) {
|
||||
const VkDescriptorPoolSize& descriptor_count_for_type =
|
||||
descriptor_counts[i];
|
||||
for (uint32_t j = 0; j < descriptor_pool_size_count_; ++j) {
|
||||
VkDescriptorPoolSize& descriptors_remaining_for_type =
|
||||
page_usable_latest_.descriptors_remaining[j];
|
||||
if (descriptor_count_for_type.type !=
|
||||
descriptors_remaining_for_type.type) {
|
||||
continue;
|
||||
}
|
||||
descriptors_remaining_for_type.descriptorCount -=
|
||||
descriptor_count_for_type.descriptorCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (descriptor_set != VK_NULL_HANDLE) {
|
||||
return descriptor_set;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Count the maximum number of descriptors requested for any type to stop
|
||||
// searching for pages once they can't satisfy this requirement.
|
||||
uint32_t max_descriptors_per_type = descriptor_counts[0].descriptorCount;
|
||||
for (uint32_t i = 1; i < descriptor_type_count; ++i) {
|
||||
max_descriptors_per_type = std::max(max_descriptors_per_type,
|
||||
descriptor_counts[i].descriptorCount);
|
||||
}
|
||||
|
||||
// If allocating from the latest pool wasn't possible, pick any that has
|
||||
// enough free space. Prefer filling pages that have the most free space as
|
||||
// they can more likely be used for more allocations later.
|
||||
auto page_usable_it_next = pages_usable_.rbegin();
|
||||
while (page_usable_it_next != pages_usable_.rend()) {
|
||||
auto page_usable_it = page_usable_it_next;
|
||||
++page_usable_it_next;
|
||||
if (page_usable_it->first < max_descriptors_per_type) {
|
||||
// All other pages_usable_ entries have smaller maximum number of free
|
||||
// descriptor for any type (it's the map key).
|
||||
break;
|
||||
}
|
||||
// Check if the page has enough free descriptors for all requested types,
|
||||
// and whether allocating the requested number of descriptors in it will
|
||||
// result in the page becoming full.
|
||||
bool map_page_has_sufficient_space = true;
|
||||
bool map_page_becomes_full =
|
||||
page_usable_it->second.descriptor_sets_remaining == 1;
|
||||
for (uint32_t i = 0; i < descriptor_type_count; ++i) {
|
||||
const VkDescriptorPoolSize& descriptor_count_for_type =
|
||||
descriptor_counts[i];
|
||||
for (uint32_t j = 0; j < descriptor_pool_size_count_; ++j) {
|
||||
const VkDescriptorPoolSize& descriptors_remaining_for_type =
|
||||
page_usable_it->second.descriptors_remaining[j];
|
||||
if (descriptor_count_for_type.type !=
|
||||
descriptors_remaining_for_type.type) {
|
||||
continue;
|
||||
}
|
||||
if (descriptor_count_for_type.descriptorCount >=
|
||||
descriptors_remaining_for_type.descriptorCount) {
|
||||
if (descriptor_count_for_type.descriptorCount >
|
||||
descriptors_remaining_for_type.descriptorCount) {
|
||||
map_page_has_sufficient_space = false;
|
||||
break;
|
||||
}
|
||||
map_page_becomes_full = true;
|
||||
}
|
||||
}
|
||||
if (!map_page_has_sufficient_space) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!map_page_has_sufficient_space) {
|
||||
// Even though the coarse (maximum number of descriptors for any type)
|
||||
// check has passed, for the exact types requested this page doesn't have
|
||||
// sufficient space - try another one.
|
||||
continue;
|
||||
}
|
||||
// Remove the page from the map unconditionally - in case of a successful
|
||||
// allocation, it will have a different number of free descriptors for
|
||||
// different types, thus potentially a new map key (but it will also become
|
||||
// page_usable_latest_ instead even), or will become full, and in case of a
|
||||
// failure to allocate internally even though there still should be enough
|
||||
// space, it should never be allocated from again.
|
||||
Page map_page = std::move(page_usable_it->second);
|
||||
// Convert the reverse iterator to a forward iterator for erasing.
|
||||
pages_usable_.erase(std::next(page_usable_it).base());
|
||||
descriptor_set_allocate_info.descriptorPool = map_page.pool;
|
||||
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
|
||||
&descriptor_set) != VK_SUCCESS) {
|
||||
descriptor_set = VK_NULL_HANDLE;
|
||||
// Failed to allocate internally even though there should be enough space,
|
||||
// don't try to allocate from this pool again at all.
|
||||
map_page_becomes_full = true;
|
||||
}
|
||||
if (map_page_becomes_full) {
|
||||
map_page.descriptors_remaining.reset();
|
||||
pages_full_.push_back(map_page.pool);
|
||||
} else {
|
||||
--map_page.descriptor_sets_remaining;
|
||||
for (uint32_t i = 0; i < descriptor_type_count; ++i) {
|
||||
const VkDescriptorPoolSize& descriptor_count_for_type =
|
||||
descriptor_counts[i];
|
||||
for (uint32_t j = 0; j < descriptor_pool_size_count_; ++j) {
|
||||
VkDescriptorPoolSize& descriptors_remaining_for_type =
|
||||
map_page.descriptors_remaining[j];
|
||||
if (descriptor_count_for_type.type !=
|
||||
descriptors_remaining_for_type.type) {
|
||||
continue;
|
||||
}
|
||||
descriptors_remaining_for_type.descriptorCount -=
|
||||
descriptor_count_for_type.descriptorCount;
|
||||
}
|
||||
}
|
||||
// Move the latest page that allocation couldn't be done in to the usable
|
||||
// pages to replace it with the new one.
|
||||
if (page_usable_latest_.pool != VK_NULL_HANDLE) {
|
||||
// Calculate the map key (the maximum number of remaining descriptors of
|
||||
// any type).
|
||||
uint32_t latest_page_max_descriptors_remaining =
|
||||
page_usable_latest_.descriptors_remaining[0].descriptorCount;
|
||||
for (uint32_t i = 1; i < descriptor_pool_size_count_; ++i) {
|
||||
latest_page_max_descriptors_remaining = std::max(
|
||||
latest_page_max_descriptors_remaining,
|
||||
page_usable_latest_.descriptors_remaining[i].descriptorCount);
|
||||
}
|
||||
assert_not_zero(latest_page_max_descriptors_remaining);
|
||||
pages_usable_.emplace(latest_page_max_descriptors_remaining,
|
||||
std::move(page_usable_latest_));
|
||||
}
|
||||
page_usable_latest_ = std::move(map_page);
|
||||
}
|
||||
if (descriptor_set != VK_NULL_HANDLE) {
|
||||
return descriptor_set;
|
||||
}
|
||||
}
|
||||
|
||||
// Try allocating from a new page.
|
||||
// See if the new page has instantly become full.
|
||||
bool new_page_becomes_full = descriptor_sets_per_page_ == 1;
|
||||
for (uint32_t i = 0; !new_page_becomes_full && i < descriptor_type_count;
|
||||
++i) {
|
||||
const VkDescriptorPoolSize& descriptor_count_for_type =
|
||||
descriptor_counts[i];
|
||||
for (uint32_t j = 0; j < descriptor_pool_size_count_; ++j) {
|
||||
const VkDescriptorPoolSize& descriptors_remaining_for_type =
|
||||
descriptor_pool_sizes_[j];
|
||||
if (descriptor_count_for_type.type !=
|
||||
descriptors_remaining_for_type.type) {
|
||||
continue;
|
||||
}
|
||||
assert_true(descriptor_count_for_type.descriptorCount <=
|
||||
descriptors_remaining_for_type.descriptorCount);
|
||||
if (descriptor_count_for_type.descriptorCount >=
|
||||
descriptors_remaining_for_type.descriptorCount) {
|
||||
new_page_becomes_full = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Allocate from a new page. However, if the new page becomes full
|
||||
// immediately, create a dedicated pool instead for the exact number of
|
||||
// descriptors not to leave any unused space in the pool.
|
||||
VkDescriptorPoolCreateInfo new_descriptor_pool_create_info;
|
||||
new_descriptor_pool_create_info.sType =
|
||||
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
||||
new_descriptor_pool_create_info.pNext = nullptr;
|
||||
new_descriptor_pool_create_info.flags = 0;
|
||||
if (new_page_becomes_full) {
|
||||
new_descriptor_pool_create_info.maxSets = 1;
|
||||
new_descriptor_pool_create_info.poolSizeCount = descriptor_type_count;
|
||||
new_descriptor_pool_create_info.pPoolSizes = descriptor_counts;
|
||||
} else {
|
||||
new_descriptor_pool_create_info.maxSets = descriptor_sets_per_page_;
|
||||
new_descriptor_pool_create_info.poolSizeCount = descriptor_pool_size_count_;
|
||||
new_descriptor_pool_create_info.pPoolSizes = descriptor_pool_sizes_.get();
|
||||
}
|
||||
VkDescriptorPool new_descriptor_pool;
|
||||
if (dfn.vkCreateDescriptorPool(device, &new_descriptor_pool_create_info,
|
||||
nullptr, &new_descriptor_pool) != VK_SUCCESS) {
|
||||
XELOGE(
|
||||
"LinkedTypeDescriptorSetAllocator: Failed to create a descriptor pool");
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
descriptor_set_allocate_info.descriptorPool = new_descriptor_pool;
|
||||
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
|
||||
&descriptor_set) != VK_SUCCESS) {
|
||||
XELOGE("LinkedTypeDescriptorSetAllocator: Failed to allocate descriptors");
|
||||
dfn.vkDestroyDescriptorPool(device, new_descriptor_pool, nullptr);
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
if (new_page_becomes_full) {
|
||||
pages_full_.push_back(new_descriptor_pool);
|
||||
} else {
|
||||
// Move the latest page that allocation couldn't be done in to the usable
|
||||
// pages to replace it with the new one.
|
||||
if (page_usable_latest_.pool != VK_NULL_HANDLE) {
|
||||
// Calculate the map key (the maximum number of remaining descriptors of
|
||||
// any type).
|
||||
uint32_t latest_page_max_descriptors_remaining =
|
||||
page_usable_latest_.descriptors_remaining[0].descriptorCount;
|
||||
for (uint32_t i = 1; i < descriptor_pool_size_count_; ++i) {
|
||||
latest_page_max_descriptors_remaining = std::max(
|
||||
latest_page_max_descriptors_remaining,
|
||||
page_usable_latest_.descriptors_remaining[i].descriptorCount);
|
||||
}
|
||||
assert_not_zero(latest_page_max_descriptors_remaining);
|
||||
pages_usable_.emplace(latest_page_max_descriptors_remaining,
|
||||
std::move(page_usable_latest_));
|
||||
}
|
||||
page_usable_latest_.pool = new_descriptor_pool;
|
||||
page_usable_latest_.descriptors_remaining =
|
||||
std::unique_ptr<VkDescriptorPoolSize[]>(
|
||||
new VkDescriptorPoolSize[descriptor_pool_size_count_]);
|
||||
for (uint32_t i = 0; i < descriptor_pool_size_count_; ++i) {
|
||||
const VkDescriptorPoolSize& descriptor_pool_size_for_type =
|
||||
descriptor_pool_sizes_[i];
|
||||
page_usable_latest_.descriptors_remaining[i] =
|
||||
descriptor_pool_size_for_type;
|
||||
for (uint32_t j = 0; j < descriptor_type_count; ++j) {
|
||||
const VkDescriptorPoolSize& descriptor_count_for_type =
|
||||
descriptor_counts[j];
|
||||
if (descriptor_count_for_type.type !=
|
||||
descriptor_pool_size_for_type.type) {
|
||||
continue;
|
||||
}
|
||||
page_usable_latest_.descriptors_remaining[i].descriptorCount -=
|
||||
descriptor_count_for_type.descriptorCount;
|
||||
break;
|
||||
}
|
||||
}
|
||||
page_usable_latest_.descriptor_sets_remaining =
|
||||
descriptor_sets_per_page_ - 1;
|
||||
}
|
||||
return descriptor_set;
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
|
@ -0,0 +1,125 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_UI_VULKAN_LINKED_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_
|
||||
#define XENIA_UI_VULKAN_LINKED_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
// Allocates multiple descriptors of in descriptor set layouts consisting of
|
||||
// descriptors of types specified during initialization.
|
||||
//
|
||||
// "LinkedType" means that the allocator is designed for allocating descriptor
|
||||
// sets containing descriptors of multiple types together - for instance, it
|
||||
// will mark the entire page as full even if no space is left in it for just one
|
||||
// of the descriptor types (not all at once).
|
||||
//
|
||||
// The primary usage scenario for this kind of an allocator is allocating image
|
||||
// and sampler descriptors in a single descriptor set if they both are actually
|
||||
// used in one. It is expected that the ratio of the numbers of descriptors per
|
||||
// type specified during the initialization will roughly correspond to the ratio
|
||||
// of the numbers of descriptors that will actually be allocated. For instance,
|
||||
// if there are approximately 2 images for each 1 sampler, it's recommended to
|
||||
// make the image count per page twice the sampler count per page.
|
||||
//
|
||||
// If some allocations use just one type, and some use just another, completely
|
||||
// independently, it's preferable to use separate allocators rather than a
|
||||
// single one.
|
||||
//
|
||||
// This allocator is also suitable for allocating variable-length descriptor
|
||||
// sets containing descriptors of just a single type.
|
||||
//
|
||||
// There's no way to free these descriptors within the allocator object itself,
|
||||
// per-layout free lists should be used externally.
|
||||
class LinkedTypeDescriptorSetAllocator {
|
||||
public:
|
||||
// Multiple descriptor sizes for the same descriptor type, and zero sizes, are
|
||||
// not allowed.
|
||||
explicit LinkedTypeDescriptorSetAllocator(
|
||||
const ui::vulkan::VulkanProvider& provider,
|
||||
const VkDescriptorPoolSize* descriptor_sizes,
|
||||
uint32_t descriptor_size_count, uint32_t descriptor_sets_per_page)
|
||||
: provider_(provider),
|
||||
descriptor_pool_sizes_(new VkDescriptorPoolSize[descriptor_size_count]),
|
||||
descriptor_pool_size_count_(descriptor_size_count),
|
||||
descriptor_sets_per_page_(descriptor_sets_per_page) {
|
||||
assert_not_zero(descriptor_size_count);
|
||||
assert_not_zero(descriptor_sets_per_page_);
|
||||
#ifndef NDEBUG
|
||||
for (uint32_t i = 0; i < descriptor_size_count; ++i) {
|
||||
const VkDescriptorPoolSize& descriptor_size = descriptor_sizes[i];
|
||||
assert_not_zero(descriptor_size.descriptorCount);
|
||||
for (uint32_t j = 0; j < i; ++j) {
|
||||
assert_true(descriptor_sizes[j].type != descriptor_size.type);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
std::memcpy(descriptor_pool_sizes_.get(), descriptor_sizes,
|
||||
sizeof(VkDescriptorPoolSize) * descriptor_size_count);
|
||||
}
|
||||
LinkedTypeDescriptorSetAllocator(
|
||||
const LinkedTypeDescriptorSetAllocator& allocator) = delete;
|
||||
LinkedTypeDescriptorSetAllocator& operator=(
|
||||
const LinkedTypeDescriptorSetAllocator& allocator) = delete;
|
||||
~LinkedTypeDescriptorSetAllocator() { Reset(); }
|
||||
|
||||
void Reset();
|
||||
|
||||
VkDescriptorSet Allocate(VkDescriptorSetLayout descriptor_set_layout,
|
||||
const VkDescriptorPoolSize* descriptor_counts,
|
||||
uint32_t descriptor_type_count);
|
||||
|
||||
private:
|
||||
struct Page {
|
||||
VkDescriptorPool pool;
|
||||
std::unique_ptr<VkDescriptorPoolSize[]> descriptors_remaining;
|
||||
uint32_t descriptor_sets_remaining;
|
||||
};
|
||||
|
||||
const ui::vulkan::VulkanProvider& provider_;
|
||||
|
||||
std::unique_ptr<VkDescriptorPoolSize[]> descriptor_pool_sizes_;
|
||||
uint32_t descriptor_pool_size_count_;
|
||||
uint32_t descriptor_sets_per_page_;
|
||||
|
||||
std::vector<VkDescriptorPool> pages_full_;
|
||||
// Because allocations must be contiguous, overflow may happen even if a page
|
||||
// still has free descriptors, so multiple pages may have free space.
|
||||
// To avoid removing and re-adding the page to the map that keeps them sorted
|
||||
// (the key is the maximum number of free descriptors remaining across all
|
||||
// types - and lookups need to be made with the maximum of the requested
|
||||
// number of descriptors across all types since it's pointless to check the
|
||||
// pages that can't even potentially fit the largest amount of descriptors of
|
||||
// a requested type, and unlike using the minimum as the key, this doesn't
|
||||
// degenerate if, for example, 0 descriptors are requested for some type - and
|
||||
// it changes at every allocation from a page), instead of always looking for
|
||||
// a free space in the map, maintaining one page outside the map, and
|
||||
// allocation attempts will be made from that page first.
|
||||
std::multimap<uint32_t, Page> pages_usable_;
|
||||
// Doesn't exist if page_usable_latest_.pool == VK_NULL_HANDLE.
|
||||
Page page_usable_latest_ = {};
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_UI_VULKAN_CONNECTED_DESCRIPTOR_SET_ALLOCATOR_H_
|
|
@ -1,216 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/ui/vulkan/single_type_descriptor_set_allocator.h"
|
||||
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
void SingleTypeDescriptorSetAllocator::Reset() {
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorPool, device,
|
||||
page_usable_latest_.pool);
|
||||
for (const std::pair<uint32_t, Page>& page_pair : pages_usable_) {
|
||||
dfn.vkDestroyDescriptorPool(device, page_pair.second.pool, nullptr);
|
||||
}
|
||||
pages_usable_.clear();
|
||||
for (VkDescriptorPool pool : pages_full_) {
|
||||
dfn.vkDestroyDescriptorPool(device, pool, nullptr);
|
||||
}
|
||||
pages_full_.clear();
|
||||
}
|
||||
|
||||
VkDescriptorSet SingleTypeDescriptorSetAllocator::Allocate(
|
||||
VkDescriptorSetLayout descriptor_set_layout, uint32_t descriptor_count) {
|
||||
assert_not_zero(descriptor_count);
|
||||
if (descriptor_count == 0) {
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
|
||||
VkDevice device = provider_.device();
|
||||
|
||||
VkDescriptorSetAllocateInfo descriptor_set_allocate_info;
|
||||
descriptor_set_allocate_info.sType =
|
||||
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
|
||||
descriptor_set_allocate_info.pNext = nullptr;
|
||||
descriptor_set_allocate_info.descriptorSetCount = 1;
|
||||
descriptor_set_allocate_info.pSetLayouts = &descriptor_set_layout;
|
||||
VkDescriptorSet descriptor_set;
|
||||
|
||||
if (descriptor_count > descriptor_pool_size_.descriptorCount) {
|
||||
// Can't allocate in the pool, need a dedicated allocation.
|
||||
VkDescriptorPoolSize dedicated_descriptor_pool_size;
|
||||
dedicated_descriptor_pool_size.type = descriptor_pool_size_.type;
|
||||
dedicated_descriptor_pool_size.descriptorCount = descriptor_count;
|
||||
VkDescriptorPoolCreateInfo dedicated_descriptor_pool_create_info;
|
||||
dedicated_descriptor_pool_create_info.sType =
|
||||
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
||||
dedicated_descriptor_pool_create_info.pNext = nullptr;
|
||||
dedicated_descriptor_pool_create_info.flags = 0;
|
||||
dedicated_descriptor_pool_create_info.maxSets = 1;
|
||||
dedicated_descriptor_pool_create_info.poolSizeCount = 1;
|
||||
dedicated_descriptor_pool_create_info.pPoolSizes =
|
||||
&dedicated_descriptor_pool_size;
|
||||
VkDescriptorPool dedicated_descriptor_pool;
|
||||
if (dfn.vkCreateDescriptorPool(
|
||||
device, &dedicated_descriptor_pool_create_info, nullptr,
|
||||
&dedicated_descriptor_pool) != VK_SUCCESS) {
|
||||
XELOGE(
|
||||
"SingleTypeDescriptorSetAllocator: Failed to create a dedicated pool "
|
||||
"for {} descriptors",
|
||||
dedicated_descriptor_pool_size.descriptorCount);
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
descriptor_set_allocate_info.descriptorPool = dedicated_descriptor_pool;
|
||||
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
|
||||
&descriptor_set) != VK_SUCCESS) {
|
||||
XELOGE(
|
||||
"SingleTypeDescriptorSetAllocator: Failed to allocate {} descriptors "
|
||||
"in a dedicated pool",
|
||||
descriptor_count);
|
||||
dfn.vkDestroyDescriptorPool(device, dedicated_descriptor_pool, nullptr);
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
pages_full_.push_back(dedicated_descriptor_pool);
|
||||
return descriptor_set;
|
||||
}
|
||||
|
||||
// Try allocating from the latest page an allocation has happened from, to
|
||||
// avoid detaching from the map and re-attaching for every allocation.
|
||||
if (page_usable_latest_.pool != VK_NULL_HANDLE) {
|
||||
assert_not_zero(page_usable_latest_.descriptors_remaining);
|
||||
assert_not_zero(page_usable_latest_.descriptor_sets_remaining);
|
||||
if (page_usable_latest_.descriptors_remaining >= descriptor_count) {
|
||||
descriptor_set_allocate_info.descriptorPool = page_usable_latest_.pool;
|
||||
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
|
||||
&descriptor_set) == VK_SUCCESS) {
|
||||
page_usable_latest_.descriptors_remaining -= descriptor_count;
|
||||
--page_usable_latest_.descriptor_sets_remaining;
|
||||
if (!page_usable_latest_.descriptors_remaining ||
|
||||
!page_usable_latest_.descriptor_sets_remaining) {
|
||||
pages_full_.push_back(page_usable_latest_.pool);
|
||||
page_usable_latest_.pool = VK_NULL_HANDLE;
|
||||
}
|
||||
return descriptor_set;
|
||||
}
|
||||
// Failed to allocate internally even though there should be enough space,
|
||||
// don't try to allocate from this pool again at all.
|
||||
pages_full_.push_back(page_usable_latest_.pool);
|
||||
page_usable_latest_.pool = VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
// If allocating from the latest pool wasn't possible, pick any that has free
|
||||
// space. Prefer filling pages that have the most free space as they can more
|
||||
// likely be used for more allocations later.
|
||||
while (!pages_usable_.empty()) {
|
||||
auto page_usable_last_it = std::prev(pages_usable_.cend());
|
||||
if (page_usable_last_it->second.descriptors_remaining < descriptor_count) {
|
||||
// All other pages_usable_ entries have fewer free descriptors too (the
|
||||
// remaining count is the map key).
|
||||
break;
|
||||
}
|
||||
// Remove the page from the map unconditionally - in case of a successful
|
||||
// allocation, it will have a different number of free descriptors, thus a
|
||||
// new map key (but it will also become page_usable_latest_ instead even),
|
||||
// or will become full, and in case of a failure to allocate internally even
|
||||
// though there still should be enough space, it should never be allocated
|
||||
// from again.
|
||||
Page map_page = page_usable_last_it->second;
|
||||
pages_usable_.erase(page_usable_last_it);
|
||||
descriptor_set_allocate_info.descriptorPool = map_page.pool;
|
||||
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
|
||||
&descriptor_set) != VK_SUCCESS) {
|
||||
pages_full_.push_back(map_page.pool);
|
||||
continue;
|
||||
}
|
||||
map_page.descriptors_remaining -= descriptor_count;
|
||||
--map_page.descriptor_sets_remaining;
|
||||
if (!map_page.descriptors_remaining ||
|
||||
!map_page.descriptor_sets_remaining) {
|
||||
pages_full_.push_back(map_page.pool);
|
||||
} else {
|
||||
if (page_usable_latest_.pool != VK_NULL_HANDLE) {
|
||||
// Make the page with more free descriptors the next to allocate from.
|
||||
if (map_page.descriptors_remaining >
|
||||
page_usable_latest_.descriptors_remaining) {
|
||||
pages_usable_.emplace(page_usable_latest_.descriptors_remaining,
|
||||
page_usable_latest_);
|
||||
page_usable_latest_ = map_page;
|
||||
} else {
|
||||
pages_usable_.emplace(map_page.descriptors_remaining, map_page);
|
||||
}
|
||||
} else {
|
||||
page_usable_latest_ = map_page;
|
||||
}
|
||||
}
|
||||
return descriptor_set;
|
||||
}
|
||||
|
||||
// Try allocating from a new page.
|
||||
VkDescriptorPoolCreateInfo new_descriptor_pool_create_info;
|
||||
new_descriptor_pool_create_info.sType =
|
||||
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
||||
new_descriptor_pool_create_info.pNext = nullptr;
|
||||
new_descriptor_pool_create_info.flags = 0;
|
||||
new_descriptor_pool_create_info.maxSets = descriptor_sets_per_page_;
|
||||
new_descriptor_pool_create_info.poolSizeCount = 1;
|
||||
new_descriptor_pool_create_info.pPoolSizes = &descriptor_pool_size_;
|
||||
VkDescriptorPool new_descriptor_pool;
|
||||
if (dfn.vkCreateDescriptorPool(device, &new_descriptor_pool_create_info,
|
||||
nullptr, &new_descriptor_pool) != VK_SUCCESS) {
|
||||
XELOGE(
|
||||
"SingleTypeDescriptorSetAllocator: Failed to create a pool for {} sets "
|
||||
"with {} descriptors",
|
||||
descriptor_sets_per_page_, descriptor_pool_size_.descriptorCount);
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
descriptor_set_allocate_info.descriptorPool = new_descriptor_pool;
|
||||
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
|
||||
&descriptor_set) != VK_SUCCESS) {
|
||||
XELOGE(
|
||||
"SingleTypeDescriptorSetAllocator: Failed to allocate {} descriptors",
|
||||
descriptor_count);
|
||||
dfn.vkDestroyDescriptorPool(device, new_descriptor_pool, nullptr);
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
Page new_page;
|
||||
new_page.pool = new_descriptor_pool;
|
||||
new_page.descriptors_remaining =
|
||||
descriptor_pool_size_.descriptorCount - descriptor_count;
|
||||
new_page.descriptor_sets_remaining = descriptor_sets_per_page_ - 1;
|
||||
if (!new_page.descriptors_remaining || !new_page.descriptor_sets_remaining) {
|
||||
pages_full_.push_back(new_page.pool);
|
||||
} else {
|
||||
if (page_usable_latest_.pool != VK_NULL_HANDLE) {
|
||||
// Make the page with more free descriptors the next to allocate from.
|
||||
if (new_page.descriptors_remaining >
|
||||
page_usable_latest_.descriptors_remaining) {
|
||||
pages_usable_.emplace(page_usable_latest_.descriptors_remaining,
|
||||
page_usable_latest_);
|
||||
page_usable_latest_ = new_page;
|
||||
} else {
|
||||
pages_usable_.emplace(new_page.descriptors_remaining, new_page);
|
||||
}
|
||||
} else {
|
||||
page_usable_latest_ = new_page;
|
||||
}
|
||||
}
|
||||
return descriptor_set;
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
|
@ -1,84 +0,0 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2022 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_
|
||||
#define XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
// Allocates multiple descriptors of a single type in descriptor set layouts
|
||||
// consisting of descriptors of only that type. There's no way to free these
|
||||
// descriptors within the SingleTypeDescriptorSetAllocator, per-layout free
|
||||
// lists should be used externally.
|
||||
class SingleTypeDescriptorSetAllocator {
|
||||
public:
|
||||
explicit SingleTypeDescriptorSetAllocator(
|
||||
const ui::vulkan::VulkanProvider& provider,
|
||||
VkDescriptorType descriptor_type, uint32_t descriptors_per_page,
|
||||
uint32_t descriptor_sets_per_page)
|
||||
: provider_(provider),
|
||||
descriptor_sets_per_page_(descriptor_sets_per_page) {
|
||||
assert_not_zero(descriptor_sets_per_page_);
|
||||
descriptor_pool_size_.type = descriptor_type;
|
||||
// Not allocating sets with 0 descriptors using the allocator - pointless to
|
||||
// have the descriptor count below the set count.
|
||||
descriptor_pool_size_.descriptorCount =
|
||||
std::max(descriptors_per_page, descriptor_sets_per_page);
|
||||
}
|
||||
SingleTypeDescriptorSetAllocator(
|
||||
const SingleTypeDescriptorSetAllocator& allocator) = delete;
|
||||
SingleTypeDescriptorSetAllocator& operator=(
|
||||
const SingleTypeDescriptorSetAllocator& allocator) = delete;
|
||||
~SingleTypeDescriptorSetAllocator() { Reset(); }
|
||||
|
||||
void Reset();
|
||||
|
||||
VkDescriptorSet Allocate(VkDescriptorSetLayout descriptor_set_layout,
|
||||
uint32_t descriptor_count);
|
||||
|
||||
private:
|
||||
struct Page {
|
||||
VkDescriptorPool pool;
|
||||
uint32_t descriptors_remaining;
|
||||
uint32_t descriptor_sets_remaining;
|
||||
};
|
||||
|
||||
const ui::vulkan::VulkanProvider& provider_;
|
||||
|
||||
VkDescriptorPoolSize descriptor_pool_size_;
|
||||
uint32_t descriptor_sets_per_page_;
|
||||
|
||||
std::vector<VkDescriptorPool> pages_full_;
|
||||
// Because allocations must be contiguous, overflow may happen even if a page
|
||||
// still has free descriptors, so multiple pages may have free space.
|
||||
// To avoid removing and re-adding the page to the map that keeps them sorted
|
||||
// (the key is the number of free descriptors remaining, and it changes at
|
||||
// every allocation from a page), instead of always looking for a free space
|
||||
// in the map, maintaining one page outside the map, and allocation attempts
|
||||
// will be made from that page first.
|
||||
std::multimap<uint32_t, Page> pages_usable_;
|
||||
// Doesn't exist if page_usable_latest_.pool == VK_NULL_HANDLE.
|
||||
Page page_usable_latest_ = {};
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_
|
|
@ -1 +1 @@
|
|||
Subproject commit 7eba2825887e49d3a72b30e0a7480bd427a5bab0
|
||||
Subproject commit fe71eb790c7d085cd3c6a7b71a50167b4da06e69
|
Loading…
Reference in New Issue