This commit is contained in:
Wunk 2024-06-23 21:44:07 +00:00 committed by GitHub
commit 4a008c624e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
60 changed files with 11530 additions and 73 deletions

3
.gitmodules vendored
View File

@ -85,3 +85,6 @@
[submodule "third_party/VulkanMemoryAllocator"]
path = third_party/VulkanMemoryAllocator
url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator.git
[submodule "third_party/oaknut"]
path = third_party/oaknut
url = https://github.com/merryhime/oaknut.git

View File

@ -54,7 +54,7 @@ filter("configurations:Checked")
defines({
"DEBUG",
})
filter({"configurations:Checked", "platforms:Windows"})
filter({"configurations:Checked", "platforms:Windows-*"})
buildoptions({
"/RTCsu", -- Full Run-Time Checks.
})
@ -153,7 +153,7 @@ filter("platforms:Android-*")
"log",
})
filter("platforms:Windows")
filter("platforms:Windows-*")
system("windows")
toolset("msc")
buildoptions({
@ -179,8 +179,12 @@ filter("platforms:Windows")
"_CRT_SECURE_NO_WARNINGS",
"WIN32",
"_WIN64=1",
"_AMD64=1",
})
filter("architecture:x86_64")
defines({
"_AMD64=1",
})
filter({})
linkoptions({
"/ignore:4006", -- Ignores complaints about empty obj files.
"/ignore:4221",
@ -198,7 +202,7 @@ filter("platforms:Windows")
})
-- Embed the manifest for things like dependencies and DPI awareness.
filter({"platforms:Windows", "kind:ConsoleApp or WindowedApp"})
filter({"platforms:Windows-*", "kind:ConsoleApp or WindowedApp"})
files({
"src/xenia/base/app_win32.manifest"
})
@ -228,7 +232,12 @@ workspace("xenia")
["ARCHS"] = "x86_64"
})
elseif os.istarget("windows") then
platforms({"Windows"})
platforms({"Windows-ARM64", "Windows-x86_64"})
filter("platforms:Windows-ARM64")
architecture("ARM64")
filter("platforms:Windows-x86_64")
architecture("x86_64")
filter({})
-- 10.0.15063.0: ID3D12GraphicsCommandList1::SetSamplePositions.
-- 10.0.19041.0: D3D12_HEAP_FLAG_CREATE_NOT_ZEROED.
-- 10.0.22000.0: DWMWA_WINDOW_CORNER_PREFERENCE.
@ -284,7 +293,13 @@ workspace("xenia")
include("src/xenia/apu/nop")
include("src/xenia/base")
include("src/xenia/cpu")
include("src/xenia/cpu/backend/x64")
filter("architecture:x86_64")
include("src/xenia/cpu/backend/x64")
filter("architecture:ARM64")
include("src/xenia/cpu/backend/a64")
filter({})
include("src/xenia/debug/ui")
include("src/xenia/gpu")
include("src/xenia/gpu/null")

View File

@ -32,6 +32,7 @@ project("xenia-app")
"libavcodec",
"libavutil",
"mspack",
"SDL2",
"snappy",
"xxhash",
})
@ -72,13 +73,18 @@ project("xenia-app")
"xenia-cpu-backend-x64",
})
filter("architecture:ARM64")
links({
"xenia-cpu-backend-a64",
})
-- TODO(Triang3l): The emulator itself on Android.
filter("platforms:not Android-*")
files({
"xenia_main.cc",
})
filter("platforms:Windows")
filter("platforms:Windows-*")
files({
"main_resources.rc",
})
@ -104,7 +110,7 @@ project("xenia-app")
"SDL2",
})
filter("platforms:Windows")
filter("platforms:Windows-*")
links({
"xenia-apu-xaudio2",
"xenia-gpu-d3d12",
@ -113,13 +119,13 @@ project("xenia-app")
"xenia-ui-d3d12",
})
filter({"platforms:Windows", SINGLE_LIBRARY_FILTER})
filter({"platforms:Windows-*", SINGLE_LIBRARY_FILTER})
links({
"xenia-gpu-d3d12-trace-viewer",
"xenia-ui-window-d3d12-demo",
})
filter("platforms:Windows")
filter("platforms:Windows-*")
-- Only create the .user file if it doesn't already exist.
local user_file = project_root.."/build/xenia-app.vcxproj.user"
if not os.isfile(user_file) then

View File

@ -21,8 +21,9 @@ DEFINE_bool(clock_no_scaling, false,
"Guest system time is directly pulled from host.",
"CPU");
DEFINE_bool(clock_source_raw, false,
"Use the RDTSC instruction as the time source. "
"Host CPU must support invariant TSC.",
"On x64, Use the RDTSC instruction as the time source. Requires "
"invariant TSC. "
"On a64, Use the CNTVCT_EL0 register as the time source",
"CPU");
namespace xe {

View File

@ -18,6 +18,8 @@
#if XE_ARCH_AMD64
#define XE_CLOCK_RAW_AVAILABLE 1
#elif XE_ARCH_ARM64
#define XE_CLOCK_RAW_AVAILABLE 1
#endif
DECLARE_bool(clock_no_scaling);

View File

@ -0,0 +1,50 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/base/clock.h"
#include "xenia/base/platform.h"
#if XE_ARCH_ARM64 && XE_CLOCK_RAW_AVAILABLE
#include "xenia/base/logging.h"
#ifdef _MSC_VER
#include <arm64_neon.h>
#include <intrin.h>
#else
#include <arm_neon.h>
#endif
// Wrap all these different cpu compiler intrinsics.
#if XE_COMPILER_MSVC
constexpr int32_t CNTFRQ_EL0 = ARM64_SYSREG(3, 3, 14, 0, 0);
constexpr int32_t CNTVCT_EL0 = ARM64_SYSREG(3, 3, 14, 0, 2);
#define xe_cpu_mrs(reg) _ReadStatusReg(reg)
#elif XE_COMPILER_CLANG || XE_COMPILER_GNUC
constexpr int32_t CNTFRQ_EL0 = 0b11'011'1110'0000'000;
constexpr int32_t CNTVCT_EL0 = 0b11'011'1110'0000'010;
uint64_t xe_cpu_mrs(uint32_t reg) {
uint64_t result;
__asm__ volatile("mrs \t%0," #reg : "=r"(result));
return result;
}
#else
#error \
"No cpu instruction wrappers xe_cpu_mrs(CNTVCT_EL0); for current compiler implemented."
#endif
namespace xe {
uint64_t Clock::host_tick_frequency_raw() { return xe_cpu_mrs(CNTFRQ_EL0); }
uint64_t Clock::host_tick_count_raw() { return xe_cpu_mrs(CNTVCT_EL0); }
} // namespace xe
#endif

View File

@ -36,12 +36,22 @@ LONG CALLBACK ExceptionHandlerCallback(PEXCEPTION_POINTERS ex_info) {
}
HostThreadContext thread_context;
#if XE_ARCH_AMD64
thread_context.rip = ex_info->ContextRecord->Rip;
thread_context.eflags = ex_info->ContextRecord->EFlags;
std::memcpy(thread_context.int_registers, &ex_info->ContextRecord->Rax,
sizeof(thread_context.int_registers));
std::memcpy(thread_context.xmm_registers, &ex_info->ContextRecord->Xmm0,
sizeof(thread_context.xmm_registers));
#elif XE_ARCH_ARM64
thread_context.pc = ex_info->ContextRecord->Pc;
thread_context.cpsr = ex_info->ContextRecord->Cpsr;
std::memcpy(thread_context.x, &ex_info->ContextRecord->X,
sizeof(thread_context.x));
std::memcpy(thread_context.v, &ex_info->ContextRecord->V,
sizeof(thread_context.v));
#endif
// https://msdn.microsoft.com/en-us/library/ms679331(v=vs.85).aspx
// https://msdn.microsoft.com/en-us/library/aa363082(v=vs.85).aspx
@ -78,6 +88,7 @@ LONG CALLBACK ExceptionHandlerCallback(PEXCEPTION_POINTERS ex_info) {
for (size_t i = 0; i < xe::countof(handlers_) && handlers_[i].first; ++i) {
if (handlers_[i].first(&ex, handlers_[i].second)) {
// Exception handled.
#if XE_ARCH_AMD64
ex_info->ContextRecord->Rip = thread_context.rip;
ex_info->ContextRecord->EFlags = thread_context.eflags;
uint32_t modified_register_index;
@ -98,6 +109,28 @@ LONG CALLBACK ExceptionHandlerCallback(PEXCEPTION_POINTERS ex_info) {
&thread_context.xmm_registers[modified_register_index],
sizeof(vec128_t));
}
#elif XE_ARCH_ARM64
ex_info->ContextRecord->Pc = thread_context.pc;
ex_info->ContextRecord->Cpsr = thread_context.cpsr;
uint32_t modified_register_index;
uint16_t modified_int_registers_remaining = ex.modified_x_registers();
while (xe::bit_scan_forward(modified_int_registers_remaining,
&modified_register_index)) {
modified_int_registers_remaining &=
~(UINT16_C(1) << modified_register_index);
ex_info->ContextRecord->X[modified_register_index] =
thread_context.x[modified_register_index];
}
uint16_t modified_xmm_registers_remaining = ex.modified_v_registers();
while (xe::bit_scan_forward(modified_xmm_registers_remaining,
&modified_register_index)) {
modified_xmm_registers_remaining &=
~(UINT16_C(1) << modified_register_index);
std::memcpy(&ex_info->ContextRecord->V + modified_register_index,
&thread_context.v[modified_register_index],
sizeof(vec128_t));
}
#endif
return EXCEPTION_CONTINUE_EXECUTION;
}
}

View File

@ -67,7 +67,7 @@ std::string HostThreadContext::GetStringFromValue(HostRegister reg,
case Arm64Register::kPc:
return hex ? string_util::to_hex_string(pc) : std::to_string(pc);
case Arm64Register::kPstate:
return hex ? string_util::to_hex_string(pstate) : std::to_string(pstate);
return hex ? string_util::to_hex_string(cpsr) : std::to_string(cpsr);
case Arm64Register::kFpsr:
return hex ? string_util::to_hex_string(fpsr) : std::to_string(fpsr);
case Arm64Register::kFpcr:

View File

@ -202,7 +202,7 @@ class HostThreadContext {
uint64_t x[31];
uint64_t sp;
uint64_t pc;
uint64_t pstate;
uint32_t cpsr;
uint32_t fpsr;
uint32_t fpcr;
vec128_t v[32];

View File

@ -11,6 +11,8 @@
#include <cstdlib>
#if XE_ARCH_AMD64
// Includes Windows headers, so it goes after platform_win.h.
#include "third_party/xbyak/xbyak/xbyak_util.h"
@ -39,3 +41,5 @@ class StartupAvxCheck {
#pragma warning(suppress : 4073)
#pragma init_seg(lib)
static StartupAvxCheck gStartupAvxCheck;
#endif

View File

@ -31,6 +31,8 @@
#if XE_ARCH_AMD64
#include <xmmintrin.h>
#elif XE_ARCH_ARM64
#include <arm64_neon.h>
#endif
namespace xe {
@ -135,10 +137,17 @@ constexpr inline uint32_t bit_count(T v) {
}
#else
#if XE_COMPILER_MSVC || XE_COMPILER_INTEL
#if XE_ARCH_AMD64
inline uint32_t bit_count(uint32_t v) { return __popcnt(v); }
inline uint32_t bit_count(uint64_t v) {
return static_cast<uint32_t>(__popcnt64(v));
}
#elif XE_ARCH_ARM64
inline uint32_t bit_count(uint32_t v) { return _CountOneBits(v); }
inline uint32_t bit_count(uint64_t v) {
return static_cast<uint32_t>(_CountOneBits64(v));
}
#endif
#elif XE_COMPILER_GCC || XE_COMPILER_CLANG
static_assert(sizeof(unsigned int) == sizeof(uint32_t));
static_assert(sizeof(unsigned long long) == sizeof(uint64_t));
@ -372,6 +381,24 @@ template <int N>
int64_t m128_i64(const __m128& v) {
return m128_i64<N>(_mm_castps_pd(v));
}
#elif XE_ARCH_ARM64
// Utilities for NEON values.
template <int N>
float m128_f32(const float32x4_t& v) {
return vgetq_lane_f32(v, N);
}
template <int N>
int32_t m128_i32(const int32x4_t& v) {
return vgetq_lane_s32(v, N);
}
template <int N>
double m128_f64(const float64x2_t& v) {
return vgetq_lane_f64(v, N);
}
template <int N>
int64_t m128_i64(const int64x2_t& v) {
return vgetq_lane_s64(v, N);
}
#endif
// Similar to the C++ implementation of XMConvertFloatToHalf and

View File

@ -66,6 +66,14 @@
#define XE_ARCH_PPC 1
#endif
#ifdef XE_ARCH_AMD64
#define XE_HOST_ARCH_NAME "x64"
#elif XE_ARCH_ARM64
#define XE_HOST_ARCH_NAME "a64"
#elif XE_ARCH_PPC
#define XE_HOST_ARCH_NAME "ppc"
#endif
#if XE_PLATFORM_WIN32
#define WIN32_LEAN_AND_MEAN
#define NOMINMAX // Don't want windows.h including min/max macros.

View File

@ -0,0 +1,146 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/cpu/backend/a64/a64_assembler.h"
#include <climits>
#include "third_party/capstone/include/capstone/arm64.h"
#include "third_party/capstone/include/capstone/capstone.h"
#include "xenia/base/profiling.h"
#include "xenia/base/reset_scope.h"
#include "xenia/base/string.h"
#include "xenia/cpu/backend/a64/a64_backend.h"
#include "xenia/cpu/backend/a64/a64_code_cache.h"
#include "xenia/cpu/backend/a64/a64_emitter.h"
#include "xenia/cpu/backend/a64/a64_function.h"
#include "xenia/cpu/cpu_flags.h"
#include "xenia/cpu/hir/hir_builder.h"
#include "xenia/cpu/hir/label.h"
#include "xenia/cpu/processor.h"
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {
using xe::cpu::hir::HIRBuilder;
A64Assembler::A64Assembler(A64Backend* backend)
: Assembler(backend), a64_backend_(backend), capstone_handle_(0) {
if (cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &capstone_handle_) !=
CS_ERR_OK) {
assert_always("Failed to initialize capstone");
}
cs_option(capstone_handle_, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL);
cs_option(capstone_handle_, CS_OPT_DETAIL, CS_OPT_OFF);
}
A64Assembler::~A64Assembler() {
// Emitter must be freed before the allocator.
emitter_.reset();
if (capstone_handle_) {
cs_close(&capstone_handle_);
}
}
bool A64Assembler::Initialize() {
if (!Assembler::Initialize()) {
return false;
}
emitter_.reset(new A64Emitter(a64_backend_));
return true;
}
void A64Assembler::Reset() {
string_buffer_.Reset();
Assembler::Reset();
}
bool A64Assembler::Assemble(GuestFunction* function, HIRBuilder* builder,
uint32_t debug_info_flags,
std::unique_ptr<FunctionDebugInfo> debug_info) {
SCOPE_profile_cpu_f("cpu");
// Reset when we leave.
xe::make_reset_scope(this);
// Lower HIR -> a64.
void* machine_code = nullptr;
size_t code_size = 0;
if (!emitter_->Emit(function, builder, debug_info_flags, debug_info.get(),
&machine_code, &code_size, &function->source_map())) {
return false;
}
// Stash generated machine code.
if (debug_info_flags & DebugInfoFlags::kDebugInfoDisasmMachineCode) {
DumpMachineCode(machine_code, code_size, function->source_map(),
&string_buffer_);
debug_info->set_machine_code_disasm(xe_strdup(string_buffer_.buffer()));
string_buffer_.Reset();
}
function->set_debug_info(std::move(debug_info));
static_cast<A64Function*>(function)->Setup(
reinterpret_cast<uint8_t*>(machine_code), code_size);
// Install into indirection table.
const uint64_t host_address = reinterpret_cast<uint64_t>(machine_code);
assert_true((host_address >> 32) == 0);
reinterpret_cast<A64CodeCache*>(backend_->code_cache())
->AddIndirection(function->address(),
static_cast<uint32_t>(host_address));
return true;
}
void A64Assembler::DumpMachineCode(
void* machine_code, size_t code_size,
const std::vector<SourceMapEntry>& source_map, StringBuffer* str) {
if (source_map.empty()) {
return;
}
auto source_map_index = 0;
uint32_t next_code_offset = source_map[0].code_offset;
const uint8_t* code_ptr = reinterpret_cast<uint8_t*>(machine_code);
size_t remaining_code_size = code_size;
uint64_t address = uint64_t(machine_code);
cs_insn insn = {0};
while (remaining_code_size &&
cs_disasm_iter(capstone_handle_, &code_ptr, &remaining_code_size,
&address, &insn)) {
// Look up source offset.
auto code_offset =
uint32_t(code_ptr - reinterpret_cast<uint8_t*>(machine_code));
if (code_offset >= next_code_offset &&
source_map_index < source_map.size()) {
auto& source_map_entry = source_map[source_map_index];
str->AppendFormat("{:08X} ", source_map_entry.guest_address);
++source_map_index;
next_code_offset = source_map_index < source_map.size()
? source_map[source_map_index].code_offset
: UINT_MAX;
} else {
str->Append(" ");
}
str->AppendFormat("{:08X} {:<6} {}\n", uint32_t(insn.address),
insn.mnemonic, insn.op_str);
}
}
} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe

View File

@ -0,0 +1,59 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_CPU_BACKEND_A64_A64_ASSEMBLER_H_
#define XENIA_CPU_BACKEND_A64_A64_ASSEMBLER_H_
#include <memory>
#include <vector>
#include "xenia/base/string_buffer.h"
#include "xenia/cpu/backend/assembler.h"
#include "xenia/cpu/function.h"
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {
class A64Backend;
class A64Emitter;
class A64Assembler : public Assembler {
public:
explicit A64Assembler(A64Backend* backend);
~A64Assembler() override;
bool Initialize() override;
void Reset() override;
bool Assemble(GuestFunction* function, hir::HIRBuilder* builder,
uint32_t debug_info_flags,
std::unique_ptr<FunctionDebugInfo> debug_info) override;
private:
void DumpMachineCode(void* machine_code, size_t code_size,
const std::vector<SourceMapEntry>& source_map,
StringBuffer* str);
private:
A64Backend* a64_backend_;
std::unique_ptr<A64Emitter> emitter_;
uintptr_t capstone_handle_;
StringBuffer string_buffer_;
};
} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe
#endif // XENIA_CPU_BACKEND_A64_A64_ASSEMBLER_H_

View File

@ -0,0 +1,735 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/cpu/backend/a64/a64_backend.h"
#include <cstddef>
#include "third_party/capstone/include/capstone/arm64.h"
#include "third_party/capstone/include/capstone/capstone.h"
#include "xenia/base/exception_handler.h"
#include "xenia/base/logging.h"
#include "xenia/cpu/backend/a64/a64_assembler.h"
#include "xenia/cpu/backend/a64/a64_code_cache.h"
#include "xenia/cpu/backend/a64/a64_emitter.h"
#include "xenia/cpu/backend/a64/a64_function.h"
#include "xenia/cpu/backend/a64/a64_sequences.h"
#include "xenia/cpu/backend/a64/a64_stack_layout.h"
#include "xenia/cpu/breakpoint.h"
#include "xenia/cpu/processor.h"
#include "xenia/cpu/stack_walker.h"
DEFINE_int32(a64_extension_mask, -1,
"Allow the detection and utilization of specific instruction set "
"features.\n"
" 0 = armv8.0\n"
" 1 = LSE\n"
" 2 = F16C\n"
" -1 = Detect and utilize all possible processor features\n",
"a64");
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {
using namespace oaknut::util;
class A64ThunkEmitter : public A64Emitter {
public:
A64ThunkEmitter(A64Backend* backend);
~A64ThunkEmitter() override;
HostToGuestThunk EmitHostToGuestThunk();
GuestToHostThunk EmitGuestToHostThunk();
ResolveFunctionThunk EmitResolveFunctionThunk();
private:
// The following four functions provide save/load functionality for registers.
// They assume at least StackLayout::THUNK_STACK_SIZE bytes have been
// allocated on the stack.
// Caller saved:
// Dont assume these registers will survive a subroutine call
// x0, v0 is not saved for use as arg0/return
// x1-x15, x30 | v0-v7 and v16-v31
void EmitSaveVolatileRegs();
void EmitLoadVolatileRegs();
// Callee saved:
// Subroutines must preserve these registers if they intend to use them
// x19-x30 | d8-d15
void EmitSaveNonvolatileRegs();
void EmitLoadNonvolatileRegs();
};
A64Backend::A64Backend() : Backend(), code_cache_(nullptr) {
if (cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &capstone_handle_) !=
CS_ERR_OK) {
assert_always("Failed to initialize capstone");
}
cs_option(capstone_handle_, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL);
cs_option(capstone_handle_, CS_OPT_DETAIL, CS_OPT_ON);
cs_option(capstone_handle_, CS_OPT_SKIPDATA, CS_OPT_OFF);
}
A64Backend::~A64Backend() {
if (capstone_handle_) {
cs_close(&capstone_handle_);
}
A64Emitter::FreeConstData(emitter_data_);
ExceptionHandler::Uninstall(&ExceptionCallbackThunk, this);
}
bool A64Backend::Initialize(Processor* processor) {
if (!Backend::Initialize(processor)) {
return false;
}
auto& gprs = machine_info_.register_sets[0];
gprs.id = 0;
std::strcpy(gprs.name, "x");
gprs.types = MachineInfo::RegisterSet::INT_TYPES;
gprs.count = A64Emitter::GPR_COUNT;
auto& fprs = machine_info_.register_sets[1];
fprs.id = 1;
std::strcpy(fprs.name, "v");
fprs.types = MachineInfo::RegisterSet::FLOAT_TYPES |
MachineInfo::RegisterSet::VEC_TYPES;
fprs.count = A64Emitter::FPR_COUNT;
code_cache_ = A64CodeCache::Create();
Backend::code_cache_ = code_cache_.get();
if (!code_cache_->Initialize()) {
return false;
}
// Generate thunks used to transition between jitted code and host code.
A64ThunkEmitter thunk_emitter(this);
host_to_guest_thunk_ = thunk_emitter.EmitHostToGuestThunk();
guest_to_host_thunk_ = thunk_emitter.EmitGuestToHostThunk();
resolve_function_thunk_ = thunk_emitter.EmitResolveFunctionThunk();
// Set the code cache to use the ResolveFunction thunk for default
// indirections.
assert_zero(uint64_t(resolve_function_thunk_) & 0xFFFFFFFF00000000ull);
code_cache_->set_indirection_default(
uint32_t(uint64_t(resolve_function_thunk_)));
// Allocate some special indirections.
code_cache_->CommitExecutableRange(0x9FFF0000, 0x9FFFFFFF);
// Allocate emitter constant data.
emitter_data_ = A64Emitter::PlaceConstData();
// Setup exception callback
ExceptionHandler::Install(&ExceptionCallbackThunk, this);
return true;
}
void A64Backend::CommitExecutableRange(uint32_t guest_low,
uint32_t guest_high) {
code_cache_->CommitExecutableRange(guest_low, guest_high);
}
std::unique_ptr<Assembler> A64Backend::CreateAssembler() {
return std::make_unique<A64Assembler>(this);
}
std::unique_ptr<GuestFunction> A64Backend::CreateGuestFunction(
Module* module, uint32_t address) {
return std::make_unique<A64Function>(module, address);
}
uint64_t ReadCapstoneReg(HostThreadContext* context, arm64_reg reg) {
switch (reg) {
case ARM64_REG_X0:
return context->x[0];
case ARM64_REG_X1:
return context->x[1];
case ARM64_REG_X2:
return context->x[2];
case ARM64_REG_X3:
return context->x[3];
case ARM64_REG_X4:
return context->x[4];
case ARM64_REG_X5:
return context->x[5];
case ARM64_REG_X6:
return context->x[6];
case ARM64_REG_X7:
return context->x[7];
case ARM64_REG_X8:
return context->x[8];
case ARM64_REG_X9:
return context->x[9];
case ARM64_REG_X10:
return context->x[10];
case ARM64_REG_X11:
return context->x[11];
case ARM64_REG_X12:
return context->x[12];
case ARM64_REG_X13:
return context->x[13];
case ARM64_REG_X14:
return context->x[14];
case ARM64_REG_X15:
return context->x[15];
case ARM64_REG_X16:
return context->x[16];
case ARM64_REG_X17:
return context->x[17];
case ARM64_REG_X18:
return context->x[18];
case ARM64_REG_X19:
return context->x[19];
case ARM64_REG_X20:
return context->x[20];
case ARM64_REG_X21:
return context->x[21];
case ARM64_REG_X22:
return context->x[22];
case ARM64_REG_X23:
return context->x[23];
case ARM64_REG_X24:
return context->x[24];
case ARM64_REG_X25:
return context->x[25];
case ARM64_REG_X26:
return context->x[26];
case ARM64_REG_X27:
return context->x[27];
case ARM64_REG_X28:
return context->x[28];
case ARM64_REG_X29:
return context->x[29];
case ARM64_REG_X30:
return context->x[30];
case ARM64_REG_W0:
return uint32_t(context->x[0]);
case ARM64_REG_W1:
return uint32_t(context->x[1]);
case ARM64_REG_W2:
return uint32_t(context->x[2]);
case ARM64_REG_W3:
return uint32_t(context->x[3]);
case ARM64_REG_W4:
return uint32_t(context->x[4]);
case ARM64_REG_W5:
return uint32_t(context->x[5]);
case ARM64_REG_W6:
return uint32_t(context->x[6]);
case ARM64_REG_W7:
return uint32_t(context->x[7]);
case ARM64_REG_W8:
return uint32_t(context->x[8]);
case ARM64_REG_W9:
return uint32_t(context->x[9]);
case ARM64_REG_W10:
return uint32_t(context->x[10]);
case ARM64_REG_W11:
return uint32_t(context->x[11]);
case ARM64_REG_W12:
return uint32_t(context->x[12]);
case ARM64_REG_W13:
return uint32_t(context->x[13]);
case ARM64_REG_W14:
return uint32_t(context->x[14]);
case ARM64_REG_W15:
return uint32_t(context->x[15]);
case ARM64_REG_W16:
return uint32_t(context->x[16]);
case ARM64_REG_W17:
return uint32_t(context->x[17]);
case ARM64_REG_W18:
return uint32_t(context->x[18]);
case ARM64_REG_W19:
return uint32_t(context->x[19]);
case ARM64_REG_W20:
return uint32_t(context->x[20]);
case ARM64_REG_W21:
return uint32_t(context->x[21]);
case ARM64_REG_W22:
return uint32_t(context->x[22]);
case ARM64_REG_W23:
return uint32_t(context->x[23]);
case ARM64_REG_W24:
return uint32_t(context->x[24]);
case ARM64_REG_W25:
return uint32_t(context->x[25]);
case ARM64_REG_W26:
return uint32_t(context->x[26]);
case ARM64_REG_W27:
return uint32_t(context->x[27]);
case ARM64_REG_W28:
return uint32_t(context->x[28]);
case ARM64_REG_W29:
return uint32_t(context->x[29]);
case ARM64_REG_W30:
return uint32_t(context->x[30]);
default:
assert_unhandled_case(reg);
return 0;
}
}
bool TestCapstonePstate(arm64_cc cond, uint32_t pstate) {
// https://devblogs.microsoft.com/oldnewthing/20220815-00/?p=106975
// Upper 4 bits of pstate are NZCV
const bool N = !!(pstate & 0x80000000);
const bool Z = !!(pstate & 0x40000000);
const bool C = !!(pstate & 0x20000000);
const bool V = !!(pstate & 0x10000000);
switch (cond) {
case ARM64_CC_EQ:
return (Z == true);
case ARM64_CC_NE:
return (Z == false);
case ARM64_CC_HS:
return (C == true);
case ARM64_CC_LO:
return (C == false);
case ARM64_CC_MI:
return (N == true);
case ARM64_CC_PL:
return (N == false);
case ARM64_CC_VS:
return (V == true);
case ARM64_CC_VC:
return (V == false);
case ARM64_CC_HI:
return ((C == true) && (Z == false));
case ARM64_CC_LS:
return ((C == false) || (Z == true));
case ARM64_CC_GE:
return (N == V);
case ARM64_CC_LT:
return (N != V);
case ARM64_CC_GT:
return ((Z == false) && (N == V));
case ARM64_CC_LE:
return ((Z == true) || (N != V));
case ARM64_CC_AL:
return true;
case ARM64_CC_NV:
return false;
default:
assert_unhandled_case(cond);
return false;
}
}
uint64_t A64Backend::CalculateNextHostInstruction(ThreadDebugInfo* thread_info,
uint64_t current_pc) {
auto machine_code_ptr = reinterpret_cast<const uint8_t*>(current_pc);
size_t remaining_machine_code_size = 64;
uint64_t host_address = current_pc;
cs_insn insn = {0};
cs_detail all_detail = {0};
insn.detail = &all_detail;
cs_disasm_iter(capstone_handle_, &machine_code_ptr,
&remaining_machine_code_size, &host_address, &insn);
const auto& detail = all_detail.arm64;
switch (insn.id) {
case ARM64_INS_B:
case ARM64_INS_BL: {
assert_true(detail.operands[0].type == ARM64_OP_IMM);
const int64_t pc_offset = static_cast<int64_t>(detail.operands[0].imm);
const bool test_passed =
TestCapstonePstate(detail.cc, thread_info->host_context.cpsr);
if (test_passed) {
return current_pc + pc_offset;
} else {
return current_pc + insn.size;
}
} break;
case ARM64_INS_BR:
case ARM64_INS_BLR: {
assert_true(detail.operands[0].type == ARM64_OP_REG);
const uint64_t target_pc =
ReadCapstoneReg(&thread_info->host_context, detail.operands[0].reg);
return target_pc;
} break;
case ARM64_INS_RET: {
assert_true(detail.operands[0].type == ARM64_OP_REG);
const uint64_t target_pc =
ReadCapstoneReg(&thread_info->host_context, detail.operands[0].reg);
return target_pc;
} break;
case ARM64_INS_CBNZ: {
assert_true(detail.operands[0].type == ARM64_OP_REG);
assert_true(detail.operands[1].type == ARM64_OP_IMM);
const int64_t pc_offset = static_cast<int64_t>(detail.operands[1].imm);
const bool test_passed = (0 != ReadCapstoneReg(&thread_info->host_context,
detail.operands[0].reg));
if (test_passed) {
return current_pc + pc_offset;
} else {
return current_pc + insn.size;
}
} break;
case ARM64_INS_CBZ: {
assert_true(detail.operands[0].type == ARM64_OP_REG);
assert_true(detail.operands[1].type == ARM64_OP_IMM);
const int64_t pc_offset = static_cast<int64_t>(detail.operands[1].imm);
const bool test_passed = (0 == ReadCapstoneReg(&thread_info->host_context,
detail.operands[0].reg));
if (test_passed) {
return current_pc + pc_offset;
} else {
return current_pc + insn.size;
}
} break;
default: {
// Not a branching instruction - just move over it.
return current_pc + insn.size;
} break;
}
}
void A64Backend::InstallBreakpoint(Breakpoint* breakpoint) {
breakpoint->ForEachHostAddress([breakpoint](uint64_t host_address) {
auto ptr = reinterpret_cast<void*>(host_address);
auto original_bytes = xe::load_and_swap<uint32_t>(ptr);
assert_true(original_bytes != 0x0000'dead);
xe::store_and_swap<uint32_t>(ptr, 0x0000'dead);
breakpoint->backend_data().emplace_back(host_address, original_bytes);
});
}
void A64Backend::InstallBreakpoint(Breakpoint* breakpoint, Function* fn) {
assert_true(breakpoint->address_type() == Breakpoint::AddressType::kGuest);
assert_true(fn->is_guest());
auto guest_function = reinterpret_cast<cpu::GuestFunction*>(fn);
auto host_address =
guest_function->MapGuestAddressToMachineCode(breakpoint->guest_address());
if (!host_address) {
assert_always();
return;
}
// Assume we haven't already installed a breakpoint in this spot.
auto ptr = reinterpret_cast<void*>(host_address);
auto original_bytes = xe::load_and_swap<uint32_t>(ptr);
assert_true(original_bytes != 0x0000'dead);
xe::store_and_swap<uint32_t>(ptr, 0x0000'dead);
breakpoint->backend_data().emplace_back(host_address, original_bytes);
}
void A64Backend::UninstallBreakpoint(Breakpoint* breakpoint) {
for (auto& pair : breakpoint->backend_data()) {
auto ptr = reinterpret_cast<uint8_t*>(pair.first);
auto instruction_bytes = xe::load_and_swap<uint32_t>(ptr);
assert_true(instruction_bytes == 0x0000'dead);
xe::store_and_swap<uint32_t>(ptr, static_cast<uint32_t>(pair.second));
}
breakpoint->backend_data().clear();
}
bool A64Backend::ExceptionCallbackThunk(Exception* ex, void* data) {
auto backend = reinterpret_cast<A64Backend*>(data);
return backend->ExceptionCallback(ex);
}
bool A64Backend::ExceptionCallback(Exception* ex) {
if (ex->code() != Exception::Code::kIllegalInstruction) {
// We only care about illegal instructions. Other things will be handled by
// other handlers (probably). If nothing else picks it up we'll be called
// with OnUnhandledException to do real crash handling.
return false;
}
// Verify an expected illegal instruction.
auto instruction_bytes =
xe::load_and_swap<uint32_t>(reinterpret_cast<void*>(ex->pc()));
if (instruction_bytes != 0x0000'dead) {
// Not our `udf #0xdead` - not us.
return false;
}
// Let the processor handle things.
return processor()->OnThreadBreakpointHit(ex);
}
A64ThunkEmitter::A64ThunkEmitter(A64Backend* backend) : A64Emitter(backend) {}
A64ThunkEmitter::~A64ThunkEmitter() {}
HostToGuestThunk A64ThunkEmitter::EmitHostToGuestThunk() {
// X0 = target
// X1 = arg0 (context)
// X2 = arg1 (guest return address)
struct _code_offsets {
size_t prolog;
size_t prolog_stack_alloc;
size_t body;
size_t epilog;
size_t tail;
} code_offsets = {};
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
code_offsets.prolog = offset();
SUB(SP, SP, stack_size);
code_offsets.prolog_stack_alloc = offset();
code_offsets.body = offset();
EmitSaveNonvolatileRegs();
MOV(X16, X0);
MOV(GetContextReg(), X1); // context
MOV(X0, X2); // return address
BLR(X16);
EmitLoadNonvolatileRegs();
code_offsets.epilog = offset();
ADD(SP, SP, stack_size);
RET();
code_offsets.tail = offset();
assert_zero(code_offsets.prolog);
EmitFunctionInfo func_info = {};
func_info.code_size.total = offset();
func_info.code_size.prolog = code_offsets.body - code_offsets.prolog;
func_info.code_size.body = code_offsets.epilog - code_offsets.body;
func_info.code_size.epilog = code_offsets.tail - code_offsets.epilog;
func_info.code_size.tail = offset() - code_offsets.tail;
func_info.prolog_stack_alloc_offset =
code_offsets.prolog_stack_alloc - code_offsets.prolog;
func_info.stack_size = stack_size;
void* fn = Emplace(func_info);
return (HostToGuestThunk)fn;
}
GuestToHostThunk A64ThunkEmitter::EmitGuestToHostThunk() {
// X0 = target function
// X1 = arg0
// X2 = arg1
// X3 = arg2
struct _code_offsets {
size_t prolog;
size_t prolog_stack_alloc;
size_t body;
size_t epilog;
size_t tail;
} code_offsets = {};
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
code_offsets.prolog = offset();
SUB(SP, SP, stack_size);
code_offsets.prolog_stack_alloc = offset();
code_offsets.body = offset();
EmitSaveVolatileRegs();
MOV(X16, X0); // function
MOV(X0, GetContextReg()); // context
BLR(X16);
EmitLoadVolatileRegs();
code_offsets.epilog = offset();
ADD(SP, SP, stack_size);
RET();
code_offsets.tail = offset();
assert_zero(code_offsets.prolog);
EmitFunctionInfo func_info = {};
func_info.code_size.total = offset();
func_info.code_size.prolog = code_offsets.body - code_offsets.prolog;
func_info.code_size.body = code_offsets.epilog - code_offsets.body;
func_info.code_size.epilog = code_offsets.tail - code_offsets.epilog;
func_info.code_size.tail = offset() - code_offsets.tail;
func_info.prolog_stack_alloc_offset =
code_offsets.prolog_stack_alloc - code_offsets.prolog;
func_info.stack_size = stack_size;
void* fn = Emplace(func_info);
return (GuestToHostThunk)fn;
}
// A64Emitter handles actually resolving functions.
uint64_t ResolveFunction(void* raw_context, uint64_t target_address);
ResolveFunctionThunk A64ThunkEmitter::EmitResolveFunctionThunk() {
// Entry:
// W17 = target PPC address
// X0 = context
struct _code_offsets {
size_t prolog;
size_t prolog_stack_alloc;
size_t body;
size_t epilog;
size_t tail;
} code_offsets = {};
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
code_offsets.prolog = offset();
// Preserve context register
STP(ZR, X0, SP, PRE_INDEXED, -16);
SUB(SP, SP, stack_size);
code_offsets.prolog_stack_alloc = offset();
code_offsets.body = offset();
EmitSaveVolatileRegs();
// mov(rcx, rsi); // context
// mov(rdx, rbx);
// mov(rax, reinterpret_cast<uint64_t>(&ResolveFunction));
// call(rax)
MOV(X0, GetContextReg()); // context
MOV(W1, W17);
MOV(X16, reinterpret_cast<uint64_t>(&ResolveFunction));
BLR(X16);
MOV(X16, X0);
EmitLoadVolatileRegs();
code_offsets.epilog = offset();
// add(rsp, stack_size);
// jmp(rax);
ADD(SP, SP, stack_size);
// Reload context register
LDP(ZR, X0, SP, POST_INDEXED, 16);
BR(X16);
code_offsets.tail = offset();
assert_zero(code_offsets.prolog);
EmitFunctionInfo func_info = {};
func_info.code_size.total = offset();
func_info.code_size.prolog = code_offsets.body - code_offsets.prolog;
func_info.code_size.body = code_offsets.epilog - code_offsets.body;
func_info.code_size.epilog = code_offsets.tail - code_offsets.epilog;
func_info.code_size.tail = offset() - code_offsets.tail;
func_info.prolog_stack_alloc_offset =
code_offsets.prolog_stack_alloc - code_offsets.prolog;
func_info.stack_size = stack_size;
void* fn = Emplace(func_info);
return (ResolveFunctionThunk)fn;
}
void A64ThunkEmitter::EmitSaveVolatileRegs() {
// Save off volatile registers.
// Preserve arguments passed to and returned from a subroutine
// STR(X0, SP, offsetof(StackLayout::Thunk, r[0]));
STP(X1, X2, SP, offsetof(StackLayout::Thunk, r[0]));
STP(X3, X4, SP, offsetof(StackLayout::Thunk, r[2]));
STP(X5, X6, SP, offsetof(StackLayout::Thunk, r[4]));
STP(X7, X8, SP, offsetof(StackLayout::Thunk, r[6]));
STP(X9, X10, SP, offsetof(StackLayout::Thunk, r[8]));
STP(X11, X12, SP, offsetof(StackLayout::Thunk, r[10]));
STP(X13, X14, SP, offsetof(StackLayout::Thunk, r[12]));
STP(X15, X30, SP, offsetof(StackLayout::Thunk, r[14]));
// Preserve arguments passed to and returned from a subroutine
// STR(Q0, SP, offsetof(StackLayout::Thunk, xmm[0]));
STP(Q1, Q2, SP, offsetof(StackLayout::Thunk, xmm[0]));
STP(Q3, Q4, SP, offsetof(StackLayout::Thunk, xmm[2]));
STP(Q5, Q6, SP, offsetof(StackLayout::Thunk, xmm[4]));
STP(Q7, Q16, SP, offsetof(StackLayout::Thunk, xmm[6]));
STP(Q17, Q18, SP, offsetof(StackLayout::Thunk, xmm[8]));
STP(Q19, Q20, SP, offsetof(StackLayout::Thunk, xmm[10]));
STP(Q21, Q22, SP, offsetof(StackLayout::Thunk, xmm[12]));
STP(Q23, Q24, SP, offsetof(StackLayout::Thunk, xmm[14]));
STP(Q25, Q26, SP, offsetof(StackLayout::Thunk, xmm[16]));
STP(Q27, Q28, SP, offsetof(StackLayout::Thunk, xmm[18]));
STP(Q29, Q30, SP, offsetof(StackLayout::Thunk, xmm[20]));
STR(Q31, SP, offsetof(StackLayout::Thunk, xmm[21]));
}
void A64ThunkEmitter::EmitLoadVolatileRegs() {
// Preserve arguments passed to and returned from a subroutine
// LDR(X0, SP, offsetof(StackLayout::Thunk, r[0]));
LDP(X1, X2, SP, offsetof(StackLayout::Thunk, r[0]));
LDP(X3, X4, SP, offsetof(StackLayout::Thunk, r[2]));
LDP(X5, X6, SP, offsetof(StackLayout::Thunk, r[4]));
LDP(X7, X8, SP, offsetof(StackLayout::Thunk, r[6]));
LDP(X9, X10, SP, offsetof(StackLayout::Thunk, r[8]));
LDP(X11, X12, SP, offsetof(StackLayout::Thunk, r[10]));
LDP(X13, X14, SP, offsetof(StackLayout::Thunk, r[12]));
LDP(X15, X30, SP, offsetof(StackLayout::Thunk, r[14]));
// Preserve arguments passed to and returned from a subroutine
// LDR(Q0, SP, offsetof(StackLayout::Thunk, xmm[0]));
LDP(Q1, Q2, SP, offsetof(StackLayout::Thunk, xmm[0]));
LDP(Q3, Q4, SP, offsetof(StackLayout::Thunk, xmm[2]));
LDP(Q5, Q6, SP, offsetof(StackLayout::Thunk, xmm[4]));
LDP(Q7, Q16, SP, offsetof(StackLayout::Thunk, xmm[6]));
LDP(Q17, Q18, SP, offsetof(StackLayout::Thunk, xmm[8]));
LDP(Q19, Q20, SP, offsetof(StackLayout::Thunk, xmm[10]));
LDP(Q21, Q22, SP, offsetof(StackLayout::Thunk, xmm[12]));
LDP(Q23, Q24, SP, offsetof(StackLayout::Thunk, xmm[14]));
LDP(Q25, Q26, SP, offsetof(StackLayout::Thunk, xmm[16]));
LDP(Q27, Q28, SP, offsetof(StackLayout::Thunk, xmm[18]));
LDP(Q29, Q30, SP, offsetof(StackLayout::Thunk, xmm[20]));
LDR(Q31, SP, offsetof(StackLayout::Thunk, xmm[21]));
}
void A64ThunkEmitter::EmitSaveNonvolatileRegs() {
STP(X19, X20, SP, offsetof(StackLayout::Thunk, r[0]));
STP(X21, X22, SP, offsetof(StackLayout::Thunk, r[2]));
STP(X23, X24, SP, offsetof(StackLayout::Thunk, r[4]));
STP(X25, X26, SP, offsetof(StackLayout::Thunk, r[6]));
STP(X27, X28, SP, offsetof(StackLayout::Thunk, r[8]));
STP(X29, X30, SP, offsetof(StackLayout::Thunk, r[10]));
STR(X17, SP, offsetof(StackLayout::Thunk, r[12]));
STP(D8, D9, SP, offsetof(StackLayout::Thunk, xmm[0]));
STP(D10, D11, SP, offsetof(StackLayout::Thunk, xmm[1]));
STP(D12, D13, SP, offsetof(StackLayout::Thunk, xmm[2]));
STP(D14, D15, SP, offsetof(StackLayout::Thunk, xmm[3]));
}
void A64ThunkEmitter::EmitLoadNonvolatileRegs() {
LDP(X19, X20, SP, offsetof(StackLayout::Thunk, r[0]));
LDP(X21, X22, SP, offsetof(StackLayout::Thunk, r[2]));
LDP(X23, X24, SP, offsetof(StackLayout::Thunk, r[4]));
LDP(X25, X26, SP, offsetof(StackLayout::Thunk, r[6]));
LDP(X27, X28, SP, offsetof(StackLayout::Thunk, r[8]));
LDP(X29, X30, SP, offsetof(StackLayout::Thunk, r[10]));
LDR(X17, SP, offsetof(StackLayout::Thunk, r[12]));
LDP(D8, D9, SP, offsetof(StackLayout::Thunk, xmm[0]));
LDP(D10, D11, SP, offsetof(StackLayout::Thunk, xmm[1]));
LDP(D12, D13, SP, offsetof(StackLayout::Thunk, xmm[2]));
LDP(D14, D15, SP, offsetof(StackLayout::Thunk, xmm[3]));
}
} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe

View File

@ -0,0 +1,88 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_CPU_BACKEND_A64_A64_BACKEND_H_
#define XENIA_CPU_BACKEND_A64_A64_BACKEND_H_
#include <memory>
#include "xenia/base/cvar.h"
#include "xenia/cpu/backend/backend.h"
DECLARE_int32(a64_extension_mask);
namespace xe {
class Exception;
} // namespace xe
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {
class A64CodeCache;
typedef void* (*HostToGuestThunk)(void* target, void* arg0, void* arg1);
typedef void* (*GuestToHostThunk)(void* target, void* arg0, void* arg1);
typedef void (*ResolveFunctionThunk)();
class A64Backend : public Backend {
public:
static const uint32_t kForceReturnAddress = 0x9FFF0000u;
explicit A64Backend();
~A64Backend() override;
A64CodeCache* code_cache() const { return code_cache_.get(); }
uintptr_t emitter_data() const { return emitter_data_; }
// Call a generated function, saving all stack parameters.
HostToGuestThunk host_to_guest_thunk() const { return host_to_guest_thunk_; }
// Function that guest code can call to transition into host code.
GuestToHostThunk guest_to_host_thunk() const { return guest_to_host_thunk_; }
// Function that thunks to the ResolveFunction in A64Emitter.
ResolveFunctionThunk resolve_function_thunk() const {
return resolve_function_thunk_;
}
bool Initialize(Processor* processor) override;
void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high) override;
std::unique_ptr<Assembler> CreateAssembler() override;
std::unique_ptr<GuestFunction> CreateGuestFunction(Module* module,
uint32_t address) override;
uint64_t CalculateNextHostInstruction(ThreadDebugInfo* thread_info,
uint64_t current_pc) override;
void InstallBreakpoint(Breakpoint* breakpoint) override;
void InstallBreakpoint(Breakpoint* breakpoint, Function* fn) override;
void UninstallBreakpoint(Breakpoint* breakpoint) override;
private:
static bool ExceptionCallbackThunk(Exception* ex, void* data);
bool ExceptionCallback(Exception* ex);
uintptr_t capstone_handle_ = 0;
std::unique_ptr<A64CodeCache> code_cache_;
uintptr_t emitter_data_ = 0;
HostToGuestThunk host_to_guest_thunk_;
GuestToHostThunk guest_to_host_thunk_;
ResolveFunctionThunk resolve_function_thunk_;
};
} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe
#endif // XENIA_CPU_BACKEND_A64_A64_BACKEND_H_

View File

@ -0,0 +1,342 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/cpu/backend/a64/a64_code_cache.h"
#include <cstdlib>
#include <cstring>
#include "third_party/fmt/include/fmt/format.h"
#include "xenia/base/assert.h"
#include "xenia/base/clock.h"
#include "xenia/base/literals.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/cpu/function.h"
#include "xenia/cpu/module.h"
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {
using namespace xe::literals;
A64CodeCache::A64CodeCache() = default;
A64CodeCache::~A64CodeCache() {
if (indirection_table_base_) {
xe::memory::DeallocFixed(indirection_table_base_, 0,
xe::memory::DeallocationType::kRelease);
}
// Unmap all views and close mapping.
if (mapping_ != xe::memory::kFileMappingHandleInvalid) {
if (generated_code_write_base_ &&
generated_code_write_base_ != generated_code_execute_base_) {
xe::memory::UnmapFileView(mapping_, generated_code_write_base_,
kGeneratedCodeSize);
}
if (generated_code_execute_base_) {
xe::memory::UnmapFileView(mapping_, generated_code_execute_base_,
kGeneratedCodeSize);
}
xe::memory::CloseFileMappingHandle(mapping_, file_name_);
mapping_ = xe::memory::kFileMappingHandleInvalid;
}
}
bool A64CodeCache::Initialize() {
indirection_table_base_ = reinterpret_cast<uint8_t*>(xe::memory::AllocFixed(
reinterpret_cast<void*>(kIndirectionTableBase), kIndirectionTableSize,
xe::memory::AllocationType::kReserve,
xe::memory::PageAccess::kReadWrite));
if (!indirection_table_base_) {
XELOGE("Unable to allocate code cache indirection table");
XELOGE(
"This is likely because the {:X}-{:X} range is in use by some other "
"system DLL",
static_cast<uint64_t>(kIndirectionTableBase),
kIndirectionTableBase + kIndirectionTableSize);
}
// Create mmap file. This allows us to share the code cache with the debugger.
file_name_ = fmt::format("xenia_code_cache_{}", Clock::QueryHostTickCount());
mapping_ = xe::memory::CreateFileMappingHandle(
file_name_, kGeneratedCodeSize, xe::memory::PageAccess::kExecuteReadWrite,
false);
if (mapping_ == xe::memory::kFileMappingHandleInvalid) {
XELOGE("Unable to create code cache mmap");
return false;
}
// Map generated code region into the file. Pages are committed as required.
if (xe::memory::IsWritableExecutableMemoryPreferred()) {
generated_code_execute_base_ =
reinterpret_cast<uint8_t*>(xe::memory::MapFileView(
mapping_, reinterpret_cast<void*>(kGeneratedCodeExecuteBase),
kGeneratedCodeSize, xe::memory::PageAccess::kExecuteReadWrite, 0));
generated_code_write_base_ = generated_code_execute_base_;
if (!generated_code_execute_base_ || !generated_code_write_base_) {
XELOGE("Unable to allocate code cache generated code storage");
XELOGE(
"This is likely because the {:X}-{:X} range is in use by some other "
"system DLL",
uint64_t(kGeneratedCodeExecuteBase),
uint64_t(kGeneratedCodeExecuteBase + kGeneratedCodeSize));
return false;
}
} else {
generated_code_execute_base_ =
reinterpret_cast<uint8_t*>(xe::memory::MapFileView(
mapping_, reinterpret_cast<void*>(kGeneratedCodeExecuteBase),
kGeneratedCodeSize, xe::memory::PageAccess::kExecuteReadOnly, 0));
generated_code_write_base_ =
reinterpret_cast<uint8_t*>(xe::memory::MapFileView(
mapping_, reinterpret_cast<void*>(kGeneratedCodeWriteBase),
kGeneratedCodeSize, xe::memory::PageAccess::kReadWrite, 0));
if (!generated_code_execute_base_ || !generated_code_write_base_) {
XELOGE("Unable to allocate code cache generated code storage");
XELOGE(
"This is likely because the {:X}-{:X} and {:X}-{:X} ranges are in "
"use by some other system DLL",
uint64_t(kGeneratedCodeExecuteBase),
uint64_t(kGeneratedCodeExecuteBase + kGeneratedCodeSize),
uint64_t(kGeneratedCodeWriteBase),
uint64_t(kGeneratedCodeWriteBase + kGeneratedCodeSize));
return false;
}
}
// Preallocate the function map to a large, reasonable size.
generated_code_map_.reserve(kMaximumFunctionCount);
return true;
}
void A64CodeCache::set_indirection_default(uint32_t default_value) {
indirection_default_value_ = default_value;
}
void A64CodeCache::AddIndirection(uint32_t guest_address,
uint32_t host_address) {
if (!indirection_table_base_) {
return;
}
uint32_t* indirection_slot = reinterpret_cast<uint32_t*>(
indirection_table_base_ + (guest_address - kIndirectionTableBase));
*indirection_slot = host_address;
}
void A64CodeCache::CommitExecutableRange(uint32_t guest_low,
uint32_t guest_high) {
if (!indirection_table_base_) {
return;
}
// Commit the memory.
xe::memory::AllocFixed(
indirection_table_base_ + (guest_low - kIndirectionTableBase),
guest_high - guest_low, xe::memory::AllocationType::kCommit,
xe::memory::PageAccess::kReadWrite);
// Fill memory with the default value.
uint32_t* p = reinterpret_cast<uint32_t*>(indirection_table_base_);
for (uint32_t address = guest_low; address < guest_high; ++address) {
p[(address - kIndirectionTableBase) / 4] = indirection_default_value_;
}
}
void A64CodeCache::PlaceHostCode(uint32_t guest_address, void* machine_code,
const EmitFunctionInfo& func_info,
void*& code_execute_address_out,
void*& code_write_address_out) {
// Same for now. We may use different pools or whatnot later on, like when
// we only want to place guest code in a serialized cache on disk.
PlaceGuestCode(guest_address, machine_code, func_info, nullptr,
code_execute_address_out, code_write_address_out);
}
void A64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code,
const EmitFunctionInfo& func_info,
GuestFunction* function_info,
void*& code_execute_address_out,
void*& code_write_address_out) {
// Hold a lock while we bump the pointers up. This is important as the
// unwind table requires entries AND code to be sorted in order.
size_t low_mark;
size_t high_mark;
uint8_t* code_execute_address;
UnwindReservation unwind_reservation;
{
auto global_lock = global_critical_region_.Acquire();
low_mark = generated_code_offset_;
// Reserve code.
// Always move the code to land on 16b alignment.
code_execute_address =
generated_code_execute_base_ + generated_code_offset_;
code_execute_address_out = code_execute_address;
uint8_t* code_write_address =
generated_code_write_base_ + generated_code_offset_;
code_write_address_out = code_write_address;
generated_code_offset_ += xe::round_up(func_info.code_size.total, 16);
auto tail_write_address =
generated_code_write_base_ + generated_code_offset_;
// Reserve unwind info.
// We go on the high size of the unwind info as we don't know how big we
// need it, and a few extra bytes of padding isn't the worst thing.
unwind_reservation = RequestUnwindReservation(generated_code_write_base_ +
generated_code_offset_);
generated_code_offset_ += xe::round_up(unwind_reservation.data_size, 16);
auto end_write_address =
generated_code_write_base_ + generated_code_offset_;
high_mark = generated_code_offset_;
// Store in map. It is maintained in sorted order of host PC dependent on
// us also being append-only.
generated_code_map_.emplace_back(
(uint64_t(code_execute_address - generated_code_execute_base_) << 32) |
generated_code_offset_,
function_info);
// TODO(DrChat): The following code doesn't really need to be under the
// global lock except for PlaceCode (but it depends on the previous code
// already being ran)
// If we are going above the high water mark of committed memory, commit
// some more. It's ok if multiple threads do this, as redundant commits
// aren't harmful.
size_t old_commit_mark, new_commit_mark;
do {
old_commit_mark = generated_code_commit_mark_;
if (high_mark <= old_commit_mark) break;
new_commit_mark = old_commit_mark + 16_MiB;
if (generated_code_execute_base_ == generated_code_write_base_) {
xe::memory::AllocFixed(generated_code_execute_base_, new_commit_mark,
xe::memory::AllocationType::kCommit,
xe::memory::PageAccess::kExecuteReadWrite);
} else {
xe::memory::AllocFixed(generated_code_execute_base_, new_commit_mark,
xe::memory::AllocationType::kCommit,
xe::memory::PageAccess::kExecuteReadOnly);
xe::memory::AllocFixed(generated_code_write_base_, new_commit_mark,
xe::memory::AllocationType::kCommit,
xe::memory::PageAccess::kReadWrite);
}
} while (generated_code_commit_mark_.compare_exchange_weak(
old_commit_mark, new_commit_mark));
// Copy code.
std::memcpy(code_write_address, machine_code, func_info.code_size.total);
// Fill unused slots with 0x00
std::memset(tail_write_address, 0x00,
static_cast<size_t>(end_write_address - tail_write_address));
// Notify subclasses of placed code.
PlaceCode(guest_address, machine_code, func_info, code_execute_address,
unwind_reservation);
}
// Now that everything is ready, fix up the indirection table.
// Note that we do support code that doesn't have an indirection fixup, so
// ignore those when we see them.
if (guest_address && indirection_table_base_) {
uint32_t* indirection_slot = reinterpret_cast<uint32_t*>(
indirection_table_base_ + (guest_address - kIndirectionTableBase));
*indirection_slot =
uint32_t(reinterpret_cast<uint64_t>(code_execute_address));
}
}
uint32_t A64CodeCache::PlaceData(const void* data, size_t length) {
// Hold a lock while we bump the pointers up.
size_t high_mark;
uint8_t* data_address = nullptr;
{
auto global_lock = global_critical_region_.Acquire();
// Reserve code.
// Always move the code to land on 16b alignment.
data_address = generated_code_write_base_ + generated_code_offset_;
generated_code_offset_ += xe::round_up(length, 16);
high_mark = generated_code_offset_;
}
// If we are going above the high water mark of committed memory, commit some
// more. It's ok if multiple threads do this, as redundant commits aren't
// harmful.
size_t old_commit_mark, new_commit_mark;
do {
old_commit_mark = generated_code_commit_mark_;
if (high_mark <= old_commit_mark) break;
new_commit_mark = old_commit_mark + 16_MiB;
if (generated_code_execute_base_ == generated_code_write_base_) {
xe::memory::AllocFixed(generated_code_execute_base_, new_commit_mark,
xe::memory::AllocationType::kCommit,
xe::memory::PageAccess::kExecuteReadWrite);
} else {
xe::memory::AllocFixed(generated_code_execute_base_, new_commit_mark,
xe::memory::AllocationType::kCommit,
xe::memory::PageAccess::kExecuteReadOnly);
xe::memory::AllocFixed(generated_code_write_base_, new_commit_mark,
xe::memory::AllocationType::kCommit,
xe::memory::PageAccess::kReadWrite);
}
} while (generated_code_commit_mark_.compare_exchange_weak(old_commit_mark,
new_commit_mark));
// Copy code.
std::memcpy(data_address, data, length);
return uint32_t(uintptr_t(data_address));
}
GuestFunction* A64CodeCache::LookupFunction(uint64_t host_pc) {
uint32_t key = uint32_t(host_pc - kGeneratedCodeExecuteBase);
void* fn_entry = std::bsearch(
&key, generated_code_map_.data(), generated_code_map_.size() + 1,
sizeof(std::pair<uint32_t, Function*>),
[](const void* key_ptr, const void* element_ptr) {
auto key = *reinterpret_cast<const uint32_t*>(key_ptr);
auto element =
reinterpret_cast<const std::pair<uint64_t, GuestFunction*>*>(
element_ptr);
if (key < (element->first >> 32)) {
return -1;
} else if (key > uint32_t(element->first)) {
return 1;
} else {
return 0;
}
});
if (fn_entry) {
return reinterpret_cast<const std::pair<uint64_t, GuestFunction*>*>(
fn_entry)
->second;
} else {
return nullptr;
}
}
} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe

View File

@ -0,0 +1,151 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_CPU_BACKEND_A64_A64_CODE_CACHE_H_
#define XENIA_CPU_BACKEND_A64_A64_CODE_CACHE_H_
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "xenia/base/memory.h"
#include "xenia/base/mutex.h"
#include "xenia/cpu/backend/code_cache.h"
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {
struct EmitFunctionInfo {
struct _code_size {
size_t prolog;
size_t body;
size_t epilog;
size_t tail;
size_t total;
} code_size;
size_t prolog_stack_alloc_offset; // offset of instruction after stack alloc
size_t stack_size;
};
class A64CodeCache : public CodeCache {
public:
~A64CodeCache() override;
static std::unique_ptr<A64CodeCache> Create();
virtual bool Initialize();
const std::filesystem::path& file_name() const override { return file_name_; }
uintptr_t execute_base_address() const override {
return kGeneratedCodeExecuteBase;
}
size_t total_size() const override { return kGeneratedCodeSize; }
// TODO(benvanik): ELF serialization/etc
// TODO(benvanik): keep track of code blocks
// TODO(benvanik): padding/guards/etc
bool has_indirection_table() { return indirection_table_base_ != nullptr; }
void set_indirection_default(uint32_t default_value);
void AddIndirection(uint32_t guest_address, uint32_t host_address);
void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high);
void PlaceHostCode(uint32_t guest_address, void* machine_code,
const EmitFunctionInfo& func_info,
void*& code_execute_address_out,
void*& code_write_address_out);
void PlaceGuestCode(uint32_t guest_address, void* machine_code,
const EmitFunctionInfo& func_info,
GuestFunction* function_info,
void*& code_execute_address_out,
void*& code_write_address_out);
uint32_t PlaceData(const void* data, size_t length);
GuestFunction* LookupFunction(uint64_t host_pc) override;
protected:
// All executable code falls within 0x80000000 to 0x9FFFFFFF, so we can
// only map enough for lookups within that range.
static const size_t kIndirectionTableSize = 0x1FFFFFFF;
static const uintptr_t kIndirectionTableBase = 0x80000000;
// The code range is 512MB, but we know the total code games will have is
// pretty small (dozens of mb at most) and our expansion is reasonablish
// so 256MB should be more than enough.
static const size_t kGeneratedCodeSize = 0x0FFFFFFF;
static const uintptr_t kGeneratedCodeExecuteBase = 0xA0000000;
// Used for writing when PageAccess::kExecuteReadWrite is not supported.
static const uintptr_t kGeneratedCodeWriteBase =
kGeneratedCodeExecuteBase + kGeneratedCodeSize + 1;
// This is picked to be high enough to cover whatever we can reasonably
// expect. If we hit issues with this it probably means some corner case
// in analysis triggering.
static const size_t kMaximumFunctionCount = 100000;
struct UnwindReservation {
size_t data_size = 0;
size_t table_slot = 0;
uint8_t* entry_address = 0;
};
A64CodeCache();
virtual UnwindReservation RequestUnwindReservation(uint8_t* entry_address) {
return UnwindReservation();
}
virtual void PlaceCode(uint32_t guest_address, void* machine_code,
const EmitFunctionInfo& func_info,
void* code_execute_address,
UnwindReservation unwind_reservation) {}
std::filesystem::path file_name_;
xe::memory::FileMappingHandle mapping_ =
xe::memory::kFileMappingHandleInvalid;
// NOTE: the global critical region must be held when manipulating the offsets
// or counts of anything, to keep the tables consistent and ordered.
xe::global_critical_region global_critical_region_;
// Value that the indirection table will be initialized with upon commit.
uint32_t indirection_default_value_ = 0xFEEDF00D;
// Fixed at kIndirectionTableBase in host space, holding 4 byte pointers into
// the generated code table that correspond to the PPC functions in guest
// space.
uint8_t* indirection_table_base_ = nullptr;
// Fixed at kGeneratedCodeExecuteBase and holding all generated code, growing
// as needed.
uint8_t* generated_code_execute_base_ = nullptr;
// View of the memory that backs generated_code_execute_base_ when
// PageAccess::kExecuteReadWrite is not supported, for writing the generated
// code. Equals to generated_code_execute_base_ when it's supported.
uint8_t* generated_code_write_base_ = nullptr;
// Current offset to empty space in generated code.
size_t generated_code_offset_ = 0;
// Current high water mark of COMMITTED code.
std::atomic<size_t> generated_code_commit_mark_ = {0};
// Sorted map by host PC base offsets to source function info.
// This can be used to bsearch on host PC to find the guest function.
// The key is [start address | end address].
std::vector<std::pair<uint64_t, GuestFunction*>> generated_code_map_;
};
} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe
#endif // XENIA_CPU_BACKEND_A64_A64_CODE_CACHE_H_

View File

@ -0,0 +1,319 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/cpu/backend/a64/a64_code_cache.h"
#include <cstdlib>
#include <cstring>
#include "xenia/base/assert.h"
#include "xenia/base/clock.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/platform_win.h"
#include "xenia/cpu/function.h"
// Function pointer definitions
using FnRtlAddGrowableFunctionTable = decltype(&RtlAddGrowableFunctionTable);
using FnRtlGrowFunctionTable = decltype(&RtlGrowFunctionTable);
using FnRtlDeleteGrowableFunctionTable =
decltype(&RtlDeleteGrowableFunctionTable);
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {
// ARM64 unwind-op codes
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling#unwind-codes
// https://www.corsix.org/content/windows-arm64-unwind-codes
typedef enum _UNWIND_OP_CODES {
UWOP_NOP = 0xE3,
UWOP_ALLOC_S = 0x00, // sub sp, sp, i*16
UWOP_ALLOC_L = 0xE0'00'00'00, // sub sp, sp, i*16
UWOP_SAVE_FPLR = 0x40, // stp fp, lr, [sp+i*8]
UWOP_SAVE_FPLRX = 0x80, // stp fp, lr, [sp-(i+1)*8]!
UWOP_SET_FP = 0xE1, // mov fp, sp
UWOP_END = 0xE4,
} UNWIND_CODE_OPS;
using UNWIND_CODE = uint32_t;
static_assert(sizeof(UNWIND_CODE) == sizeof(uint32_t));
// UNWIND_INFO defines the static part (first 32-bit) of the .xdata record
typedef struct _UNWIND_INFO {
uint32_t FunctionLength : 18;
uint32_t Version : 2;
uint32_t X : 1;
uint32_t E : 1;
uint32_t EpilogCount : 5;
uint32_t CodeWords : 5;
UNWIND_CODE UnwindCodes[2];
} UNWIND_INFO, *PUNWIND_INFO;
static_assert(offsetof(UNWIND_INFO, UnwindCodes[0]) == 4);
static_assert(offsetof(UNWIND_INFO, UnwindCodes[1]) == 8);
// Size of unwind info per function.
static const uint32_t kUnwindInfoSize = sizeof(UNWIND_INFO);
class Win32A64CodeCache : public A64CodeCache {
public:
Win32A64CodeCache();
~Win32A64CodeCache() override;
bool Initialize() override;
void* LookupUnwindInfo(uint64_t host_pc) override;
private:
UnwindReservation RequestUnwindReservation(uint8_t* entry_address) override;
void PlaceCode(uint32_t guest_address, void* machine_code,
const EmitFunctionInfo& func_info, void* code_execute_address,
UnwindReservation unwind_reservation) override;
void InitializeUnwindEntry(uint8_t* unwind_entry_address,
size_t unwind_table_slot,
void* code_execute_address,
const EmitFunctionInfo& func_info);
// Growable function table system handle.
void* unwind_table_handle_ = nullptr;
// Actual unwind table entries.
std::vector<RUNTIME_FUNCTION> unwind_table_;
// Current number of entries in the table.
std::atomic<uint32_t> unwind_table_count_ = {0};
// Does this version of Windows support growable funciton tables?
bool supports_growable_table_ = false;
FnRtlAddGrowableFunctionTable add_growable_table_ = nullptr;
FnRtlDeleteGrowableFunctionTable delete_growable_table_ = nullptr;
FnRtlGrowFunctionTable grow_table_ = nullptr;
};
std::unique_ptr<A64CodeCache> A64CodeCache::Create() {
return std::make_unique<Win32A64CodeCache>();
}
Win32A64CodeCache::Win32A64CodeCache() = default;
Win32A64CodeCache::~Win32A64CodeCache() {
if (supports_growable_table_) {
if (unwind_table_handle_) {
delete_growable_table_(unwind_table_handle_);
}
} else {
if (generated_code_execute_base_) {
RtlDeleteFunctionTable(reinterpret_cast<PRUNTIME_FUNCTION>(
reinterpret_cast<DWORD64>(generated_code_execute_base_) | 0x3));
}
}
}
bool Win32A64CodeCache::Initialize() {
if (!A64CodeCache::Initialize()) {
return false;
}
// Compute total number of unwind entries we should allocate.
// We don't support reallocing right now, so this should be high.
unwind_table_.resize(kMaximumFunctionCount);
// Check if this version of Windows supports growable function tables.
auto ntdll_handle = GetModuleHandleW(L"ntdll.dll");
if (!ntdll_handle) {
add_growable_table_ = nullptr;
delete_growable_table_ = nullptr;
grow_table_ = nullptr;
} else {
add_growable_table_ = (FnRtlAddGrowableFunctionTable)GetProcAddress(
ntdll_handle, "RtlAddGrowableFunctionTable");
delete_growable_table_ = (FnRtlDeleteGrowableFunctionTable)GetProcAddress(
ntdll_handle, "RtlDeleteGrowableFunctionTable");
grow_table_ = (FnRtlGrowFunctionTable)GetProcAddress(
ntdll_handle, "RtlGrowFunctionTable");
}
supports_growable_table_ =
add_growable_table_ && delete_growable_table_ && grow_table_;
// Create table and register with the system. It's empty now, but we'll grow
// it as functions are added.
if (supports_growable_table_) {
if (add_growable_table_(
&unwind_table_handle_, unwind_table_.data(), unwind_table_count_,
DWORD(unwind_table_.size()),
reinterpret_cast<ULONG_PTR>(generated_code_execute_base_),
reinterpret_cast<ULONG_PTR>(generated_code_execute_base_ +
kGeneratedCodeSize))) {
XELOGE("Unable to create unwind function table");
return false;
}
} else {
// Install a callback that the debugger will use to lookup unwind info on
// demand.
if (!RtlInstallFunctionTableCallback(
reinterpret_cast<DWORD64>(generated_code_execute_base_) | 0x3,
reinterpret_cast<DWORD64>(generated_code_execute_base_),
kGeneratedCodeSize,
[](DWORD64 control_pc, PVOID context) {
auto code_cache = reinterpret_cast<Win32A64CodeCache*>(context);
return reinterpret_cast<PRUNTIME_FUNCTION>(
code_cache->LookupUnwindInfo(control_pc));
},
this, nullptr)) {
XELOGE("Unable to install function table callback");
return false;
}
}
return true;
}
Win32A64CodeCache::UnwindReservation
Win32A64CodeCache::RequestUnwindReservation(uint8_t* entry_address) {
assert_false(unwind_table_count_ >= kMaximumFunctionCount);
UnwindReservation unwind_reservation;
unwind_reservation.data_size = xe::round_up(kUnwindInfoSize, 16);
unwind_reservation.table_slot = unwind_table_count_++;
unwind_reservation.entry_address = entry_address;
return unwind_reservation;
}
void Win32A64CodeCache::PlaceCode(uint32_t guest_address, void* machine_code,
const EmitFunctionInfo& func_info,
void* code_execute_address,
UnwindReservation unwind_reservation) {
// Add unwind info.
InitializeUnwindEntry(unwind_reservation.entry_address,
unwind_reservation.table_slot, code_execute_address,
func_info);
if (supports_growable_table_) {
// Notify that the unwind table has grown.
// We do this outside of the lock, but with the latest total count.
grow_table_(unwind_table_handle_, unwind_table_count_);
}
// https://docs.microsoft.com/en-us/uwp/win32-and-com/win32-apis
FlushInstructionCache(GetCurrentProcess(), code_execute_address,
func_info.code_size.total);
}
constexpr UNWIND_CODE UnwindOpWord(uint8_t code0 = UWOP_NOP,
uint8_t code1 = UWOP_NOP,
uint8_t code2 = UWOP_NOP,
uint8_t code3 = UWOP_NOP) {
return static_cast<uint32_t>(code0) | (static_cast<uint32_t>(code1) << 8) |
(static_cast<uint32_t>(code2) << 16) |
(static_cast<uint32_t>(code3) << 24);
}
// 8-byte unwind code for "stp fp, lr, [sp, #-16]!
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling#unwind-codes
static uint8_t OpSaveFpLrX(int16_t pre_index_offset) {
assert_true(pre_index_offset <= -8);
assert_true(pre_index_offset >= -512);
// 16-byte aligned
constexpr int IndexShift = 3;
constexpr int IndexMask = (1 << IndexShift) - 1;
assert_true((pre_index_offset & IndexMask) == 0);
const uint32_t encoded_value = (-pre_index_offset >> IndexShift) - 1;
return UWOP_SAVE_FPLRX | encoded_value;
}
// Ensure a 16-byte aligned stack
static constexpr size_t StackAlignShift = 4; // n / 16
static constexpr size_t StackAlignMask = (1 << StackAlignShift) - 1; // n % 16
// 8-byte unwind code for up to +512-byte "sub sp, sp, #stack_space"
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling#unwind-codes
static uint8_t OpAllocS(int16_t stack_space) {
assert_true(stack_space >= 0);
assert_true(stack_space < 512);
assert_true((stack_space & StackAlignMask) == 0);
return UWOP_ALLOC_S | (stack_space >> StackAlignShift);
}
// 4-byte unwind code for +256MiB "sub sp, sp, #stack_space"
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling#unwind-codes
uint32_t OpAllocL(int32_t stack_space) {
assert_true(stack_space >= 0);
assert_true(stack_space < (0xFFFFFF * 16));
assert_true((stack_space & StackAlignMask) == 0);
return xe::byte_swap(UWOP_ALLOC_L |
((stack_space >> StackAlignShift) & 0xFF'FF'FF));
}
void Win32A64CodeCache::InitializeUnwindEntry(
uint8_t* unwind_entry_address, size_t unwind_table_slot,
void* code_execute_address, const EmitFunctionInfo& func_info) {
auto unwind_info = reinterpret_cast<UNWIND_INFO*>(unwind_entry_address);
*unwind_info = {};
// ARM64 instructions are always multiples of 4 bytes
// Windows ignores the bottom 2 bits
unwind_info->FunctionLength = func_info.code_size.total / 4;
unwind_info->CodeWords = 2;
// https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling?view=msvc-170#unwind-codes
// The array of unwind codes is a pool of sequences that describe exactly how
// to undo the effects of the prolog. They're stored in the same order the
// operations need to be undone. The unwind codes can be thought of as a small
// instruction set, encoded as a string of bytes. When execution is complete,
// the return address to the calling function is in the lr register. And, all
// non-volatile registers are restored to their values at the time the
// function was called.
// Function frames are generally:
// STP(X29, X30, SP, PRE_INDEXED, -16);
// MOV(X29, XSP);
// SUB(XSP, XSP, stack_size);
// ... function body ...
// ADD(XSP, XSP, stack_size);
// MOV(XSP, X29);
// LDP(X29, X30, SP, POST_INDEXED, 16);
// These opcodes must undo the epilog and put the return address within lr
unwind_info->UnwindCodes[0] = OpAllocL(func_info.stack_size);
unwind_info->UnwindCodes[1] =
UnwindOpWord(UWOP_SET_FP, OpSaveFpLrX(-16), UWOP_END);
// Add entry.
RUNTIME_FUNCTION& fn_entry = unwind_table_[unwind_table_slot];
fn_entry.BeginAddress =
DWORD(reinterpret_cast<uint8_t*>(code_execute_address) -
generated_code_execute_base_);
fn_entry.UnwindData =
DWORD(unwind_entry_address - generated_code_execute_base_);
}
void* Win32A64CodeCache::LookupUnwindInfo(uint64_t host_pc) {
return std::bsearch(
&host_pc, unwind_table_.data(), unwind_table_count_,
sizeof(RUNTIME_FUNCTION),
[](const void* key_ptr, const void* element_ptr) {
auto key = *reinterpret_cast<const uintptr_t*>(key_ptr) -
kGeneratedCodeExecuteBase;
auto element = reinterpret_cast<const RUNTIME_FUNCTION*>(element_ptr);
if (key < element->BeginAddress) {
return -1;
} else if (key > (element->BeginAddress + element->FunctionLength)) {
return 1;
} else {
return 0;
}
});
}
} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe

View File

@ -0,0 +1,995 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/cpu/backend/a64/a64_emitter.h"
#include "xenia/cpu/backend/a64/a64_util.h"
#include <cstddef>
#include <climits>
#include <cstring>
#include "third_party/fmt/include/fmt/format.h"
#include "xenia/base/assert.h"
#include "xenia/base/atomic.h"
#include "xenia/base/debugging.h"
#include "xenia/base/literals.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h"
#include "xenia/base/vec128.h"
#include "xenia/cpu/backend/a64/a64_backend.h"
#include "xenia/cpu/backend/a64/a64_code_cache.h"
#include "xenia/cpu/backend/a64/a64_function.h"
#include "xenia/cpu/backend/a64/a64_sequences.h"
#include "xenia/cpu/backend/a64/a64_stack_layout.h"
#include "xenia/cpu/cpu_flags.h"
#include "xenia/cpu/function.h"
#include "xenia/cpu/function_debug_info.h"
#include "xenia/cpu/processor.h"
#include "xenia/cpu/symbol.h"
#include "xenia/cpu/thread_state.h"
#include "oaknut/feature_detection/cpu_feature.hpp"
#include "oaknut/feature_detection/feature_detection.hpp"
#include "oaknut/feature_detection/feature_detection_idregs.hpp"
DEFINE_bool(debugprint_trap_log, false,
"Log debugprint traps to the active debugger", "CPU");
DEFINE_bool(ignore_undefined_externs, true,
"Don't exit when an undefined extern is called.", "CPU");
DEFINE_bool(emit_source_annotations, false,
"Add extra movs and nops to make disassembly easier to read.",
"CPU");
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {
using xe::cpu::hir::HIRBuilder;
using xe::cpu::hir::Instr;
using namespace xe::literals;
using namespace oaknut::util;
static const size_t kStashOffset = 32;
// static const size_t kStashOffsetHigh = 32 + 32;
// Register indices that the HIR is allowed to use for operands
const uint8_t A64Emitter::gpr_reg_map_[A64Emitter::GPR_COUNT] = {
19, 20, 21, 22, 23, 24, 25, 26,
};
const uint8_t A64Emitter::fpr_reg_map_[A64Emitter::FPR_COUNT] = {
8, 9, 10, 11, 12, 13, 14, 15,
};
A64Emitter::A64Emitter(A64Backend* backend)
: VectorCodeGenerator(assembly_buffer),
processor_(backend->processor()),
backend_(backend),
code_cache_(backend->code_cache()) {
oaknut::CpuFeatures cpu_ = oaknut::detect_features();
// Combine with id register detection
#if OAKNUT_SUPPORTS_READING_ID_REGISTERS > 0
#if OAKNUT_SUPPORTS_READING_ID_REGISTERS == 1
const std::optional<oaknut::id::IdRegisters> id_registers =
oaknut::read_id_registers();
#elif OAKNUT_SUPPORTS_READING_ID_REGISTERS == 2
const std::optional<oaknut::id::IdRegisters> id_registers =
oaknut::read_id_registers(0);
#endif
if (id_registers.has_value()) {
cpu_ = cpu_ | oaknut::detect_features_via_id_registers(*id_registers);
}
#endif
#define TEST_EMIT_FEATURE(emit, ext) \
if ((cvars::a64_extension_mask & emit) == emit) { \
feature_flags_ |= (cpu_.has(ext) ? emit : 0); \
}
TEST_EMIT_FEATURE(kA64EmitLSE, oaknut::CpuFeature::LSE);
TEST_EMIT_FEATURE(kA64EmitF16C, oaknut::CpuFeature::FP16Conv);
#undef TEST_EMIT_FEATURE
}
A64Emitter::~A64Emitter() = default;
bool A64Emitter::Emit(GuestFunction* function, HIRBuilder* builder,
uint32_t debug_info_flags, FunctionDebugInfo* debug_info,
void** out_code_address, size_t* out_code_size,
std::vector<SourceMapEntry>* out_source_map) {
SCOPE_profile_cpu_f("cpu");
// Reset.
debug_info_ = debug_info;
debug_info_flags_ = debug_info_flags;
trace_data_ = &function->trace_data();
source_map_arena_.Reset();
// Fill the generator with code.
EmitFunctionInfo func_info = {};
if (!Emit(builder, func_info)) {
return false;
}
// Copy the final code to the cache and relocate it.
*out_code_size = offset();
*out_code_address = Emplace(func_info, function);
// Stash source map.
source_map_arena_.CloneContents(out_source_map);
return true;
}
void* A64Emitter::Emplace(const EmitFunctionInfo& func_info,
GuestFunction* function) {
// Copy the current oaknut instruction-buffer into the code-cache
void* new_execute_address;
void* new_write_address;
assert_true(func_info.code_size.total == offset());
if (function) {
code_cache_->PlaceGuestCode(function->address(), assembly_buffer.data(),
func_info, function, new_execute_address,
new_write_address);
} else {
code_cache_->PlaceHostCode(0, assembly_buffer.data(), func_info,
new_execute_address, new_write_address);
}
// Reset the oaknut instruction-buffer
assembly_buffer.clear();
label_lookup_.clear();
return new_execute_address;
}
bool A64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
oaknut::Label epilog_label;
epilog_label_ = &epilog_label;
// Calculate stack size. We need to align things to their natural sizes.
// This could be much better (sort by type/etc).
auto locals = builder->locals();
size_t stack_offset = StackLayout::GUEST_STACK_SIZE;
for (auto it = locals.begin(); it != locals.end(); ++it) {
auto slot = *it;
size_t type_size = GetTypeSize(slot->type);
// Align to natural size.
stack_offset = xe::align(stack_offset, type_size);
slot->set_constant((uint32_t)stack_offset);
stack_offset += type_size;
}
// Ensure 16b alignment.
stack_offset -= StackLayout::GUEST_STACK_SIZE;
stack_offset = xe::align(stack_offset, static_cast<size_t>(16));
struct _code_offsets {
size_t prolog;
size_t prolog_stack_alloc;
size_t body;
size_t epilog;
size_t tail;
} code_offsets = {};
code_offsets.prolog = offset();
// Function prolog.
// Must be 16b aligned.
// Windows is very strict about the form of this and the epilog:
// https://docs.microsoft.com/en-us/cpp/build/prolog-and-epilog?view=vs-2017
// IMPORTANT: any changes to the prolog must be kept in sync with
// A64CodeCache, which dynamically generates exception information.
// Adding or changing anything here must be matched!
size_t stack_size = StackLayout::GUEST_STACK_SIZE + stack_offset;
// The SUB instruction can only encode immediates withi 0xFFF or 0xFFF000
// If the stack size is greater than 0xFFF, then just align it to 0x1000
if (stack_size > 0xFFF) {
stack_size = xe::align(stack_size, static_cast<size_t>(0x1000));
}
assert_true(stack_size % 16 == 0);
func_info.stack_size = stack_size;
stack_size_ = stack_size;
STP(X29, X30, SP, PRE_INDEXED, -16);
MOV(X29, SP);
SUB(SP, SP, (uint32_t)stack_size);
code_offsets.prolog_stack_alloc = offset();
code_offsets.body = offset();
STR(GetContextReg(), SP, StackLayout::GUEST_CTX_HOME);
STR(X0, SP, StackLayout::GUEST_RET_ADDR);
STR(XZR, SP, StackLayout::GUEST_CALL_RET_ADDR);
// Safe now to do some tracing.
if (debug_info_flags_ & DebugInfoFlags::kDebugInfoTraceFunctions) {
// We require 32-bit addresses.
assert_true(uint64_t(trace_data_->header()) < UINT_MAX);
auto trace_header = trace_data_->header();
// Call count.
MOV(W0, 1);
MOV(X5, reinterpret_cast<uintptr_t>(
low_address(&trace_header->function_call_count)));
LDADDAL(X0, X0, X5);
// Get call history slot.
static_assert(FunctionTraceData::kFunctionCallerHistoryCount == 4,
"bitmask depends on count");
LDR(X0, X5);
AND(W0, W0, 0b00000011);
// Record call history value into slot (guest addr in W1).
MOV(X5, reinterpret_cast<uintptr_t>(
low_address(&trace_header->function_caller_history)));
STR(W1, X5, X0, oaknut::IndexExt::LSL, 2);
// Calling thread. Load X0 with thread ID.
EmitGetCurrentThreadId();
MOV(W5, 1);
LSL(W0, W5, W0);
MOV(X5, reinterpret_cast<uintptr_t>(
low_address(&trace_header->function_thread_use)));
LDSET(W0, WZR, X5);
}
// Load membase.
LDR(GetMembaseReg(), GetContextReg(),
offsetof(ppc::PPCContext, virtual_membase));
// Body.
auto block = builder->first_block();
while (block) {
// Mark block labels.
auto label = block->label_head;
while (label) {
l(label_lookup_[label->name]);
label = label->next;
}
// Process instructions.
const Instr* instr = block->instr_head;
while (instr) {
const Instr* new_tail = instr;
if (!SelectSequence(this, instr, &new_tail)) {
// No sequence found!
// NOTE: If you encounter this after adding a new instruction, do a full
// rebuild!
assert_always();
XELOGE("Unable to process HIR opcode {}", instr->opcode->name);
break;
}
instr = new_tail;
}
block = block->next;
}
// Function epilog.
l(epilog_label);
epilog_label_ = nullptr;
EmitTraceUserCallReturn();
LDR(GetContextReg(), SP, StackLayout::GUEST_CTX_HOME);
code_offsets.epilog = offset();
ADD(SP, SP, (uint32_t)stack_size);
MOV(SP, X29);
LDP(X29, X30, SP, POST_INDEXED, 16);
RET();
code_offsets.tail = offset();
if (cvars::emit_source_annotations) {
NOP();
NOP();
NOP();
NOP();
NOP();
}
assert_zero(code_offsets.prolog);
func_info.code_size.total = offset();
func_info.code_size.prolog = code_offsets.body - code_offsets.prolog;
func_info.code_size.body = code_offsets.epilog - code_offsets.body;
func_info.code_size.epilog = code_offsets.tail - code_offsets.epilog;
func_info.code_size.tail = offset() - code_offsets.tail;
func_info.prolog_stack_alloc_offset =
code_offsets.prolog_stack_alloc - code_offsets.prolog;
return true;
}
void A64Emitter::MarkSourceOffset(const Instr* i) {
auto entry = source_map_arena_.Alloc<SourceMapEntry>();
entry->guest_address = static_cast<uint32_t>(i->src1.offset);
entry->hir_offset = uint32_t(i->block->ordinal << 16) | i->ordinal;
entry->code_offset = static_cast<uint32_t>(offset());
if (cvars::emit_source_annotations) {
NOP();
NOP();
MOV(X0, entry->guest_address);
NOP();
NOP();
}
if (debug_info_flags_ & DebugInfoFlags::kDebugInfoTraceFunctionCoverage) {
const uint32_t instruction_index =
(entry->guest_address - trace_data_->start_address()) / 4;
MOV(X0, 1);
MOV(X1, reinterpret_cast<uintptr_t>(
low_address(trace_data_->instruction_execute_counts() +
instruction_index * 8)));
LDADDAL(X0, ZR, X1);
}
}
void A64Emitter::EmitGetCurrentThreadId() {
// X27 must point to context. We could fetch from the stack if needed.
LDRH(W0, GetContextReg(), offsetof(ppc::PPCContext, thread_id));
}
void A64Emitter::EmitTraceUserCallReturn() {}
void A64Emitter::DebugBreak() { BRK(0xF000); }
uint64_t TrapDebugPrint(void* raw_context, uint64_t address) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
uint32_t str_ptr = uint32_t(thread_state->context()->r[3]);
// uint16_t str_len = uint16_t(thread_state->context()->r[4]);
auto str = thread_state->memory()->TranslateVirtual<const char*>(str_ptr);
// TODO(benvanik): truncate to length?
XELOGD("(DebugPrint) {}", str);
if (cvars::debugprint_trap_log) {
debugging::DebugPrint("(DebugPrint) {}", str);
}
return 0;
}
uint64_t TrapDebugBreak(void* raw_context, uint64_t address) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
XELOGE("tw/td forced trap hit! This should be a crash!");
if (cvars::break_on_debugbreak) {
xe::debugging::Break();
}
return 0;
}
void A64Emitter::Trap(uint16_t trap_type) {
switch (trap_type) {
case 20:
case 26:
// 0x0FE00014 is a 'debug print' where r3 = buffer r4 = length
CallNative(TrapDebugPrint, 0);
break;
case 0:
case 22:
// Always trap?
// TODO(benvanik): post software interrupt to debugger.
CallNative(TrapDebugBreak, 0);
break;
case 25:
// ?
break;
default:
XELOGW("Unknown trap type {}", trap_type);
BRK(0xF000);
break;
}
}
void A64Emitter::UnimplementedInstr(const hir::Instr* i) {
// TODO(benvanik): notify debugger.
BRK(0xF000);
assert_always();
}
// This is used by the A64ThunkEmitter's ResolveFunctionThunk.
uint64_t ResolveFunction(void* raw_context, uint64_t target_address) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
// TODO(benvanik): required?
assert_not_zero(target_address);
auto fn = thread_state->processor()->ResolveFunction(
static_cast<uint32_t>(target_address));
assert_not_null(fn);
auto a64_fn = static_cast<A64Function*>(fn);
uint64_t addr = reinterpret_cast<uint64_t>(a64_fn->machine_code());
return addr;
}
void A64Emitter::Call(const hir::Instr* instr, GuestFunction* function) {
assert_not_null(function);
auto fn = static_cast<A64Function*>(function);
// Resolve address to the function to call and store in X16.
if (fn->machine_code()) {
// TODO(benvanik): is it worth it to do this? It removes the need for
// a ResolveFunction call, but makes the table less useful.
assert_zero(uint64_t(fn->machine_code()) & 0xFFFFFFFF00000000);
MOV(X16, uint32_t(uint64_t(fn->machine_code())));
} else if (code_cache_->has_indirection_table()) {
// Load the pointer to the indirection table maintained in A64CodeCache.
// The target dword will either contain the address of the generated code
// or a thunk to ResolveAddress.
MOV(W17, function->address());
LDR(W16, X17);
} else {
// Old-style resolve.
// Not too important because indirection table is almost always available.
// TODO: Overwrite the call-site with a straight call.
CallNative(&ResolveFunction, function->address());
MOV(X16, X0);
}
// Actually jump/call to X16.
if (instr->flags & hir::CALL_TAIL) {
// Since we skip the prolog we need to mark the return here.
EmitTraceUserCallReturn();
// Pass the callers return address over.
LDR(X0, SP, StackLayout::GUEST_RET_ADDR);
ADD(SP, SP, static_cast<uint32_t>(stack_size()));
MOV(SP, X29);
LDP(X29, X30, SP, POST_INDEXED, 16);
BR(X16);
} else {
// Return address is from the previous SET_RETURN_ADDRESS.
LDR(X0, SP, StackLayout::GUEST_CALL_RET_ADDR);
BLR(X16);
}
}
void A64Emitter::CallIndirect(const hir::Instr* instr,
const oaknut::XReg& reg) {
// Check if return.
if (instr->flags & hir::CALL_POSSIBLE_RETURN) {
LDR(W16, SP, StackLayout::GUEST_RET_ADDR);
CMP(reg.toW(), W16);
B(oaknut::Cond::EQ, epilog_label());
}
// Load the pointer to the indirection table maintained in A64CodeCache.
// The target dword will either contain the address of the generated code
// or a thunk to ResolveAddress.
if (code_cache_->has_indirection_table()) {
if (reg.toW().index() != W17.index()) {
MOV(W17, reg.toW());
}
LDR(W16, X17);
} else {
// Old-style resolve.
// Not too important because indirection table is almost always available.
MOV(X0, GetContextReg());
MOV(W1, reg.toW());
MOV(X16, reinterpret_cast<uint64_t>(ResolveFunction));
BLR(X16);
MOV(X16, X0);
}
// Actually jump/call to X16.
if (instr->flags & hir::CALL_TAIL) {
// Since we skip the prolog we need to mark the return here.
EmitTraceUserCallReturn();
// Pass the callers return address over.
LDR(X0, SP, StackLayout::GUEST_RET_ADDR);
ADD(SP, SP, static_cast<uint32_t>(stack_size()));
MOV(SP, X29);
LDP(X29, X30, SP, POST_INDEXED, 16);
BR(X16);
} else {
// Return address is from the previous SET_RETURN_ADDRESS.
LDR(X0, SP, StackLayout::GUEST_CALL_RET_ADDR);
BLR(X16);
}
}
uint64_t UndefinedCallExtern(void* raw_context, uint64_t function_ptr) {
auto function = reinterpret_cast<Function*>(function_ptr);
if (!cvars::ignore_undefined_externs) {
xe::FatalError(fmt::format("undefined extern call to {:08X} {}",
function->address(), function->name().c_str()));
} else {
XELOGE("undefined extern call to {:08X} {}", function->address(),
function->name());
}
return 0;
}
void A64Emitter::CallExtern(const hir::Instr* instr, const Function* function) {
bool undefined = true;
if (function->behavior() == Function::Behavior::kBuiltin) {
auto builtin_function = static_cast<const BuiltinFunction*>(function);
if (builtin_function->handler()) {
undefined = false;
// x0 = target function
// x1 = arg0
// x2 = arg1
// x3 = arg2
MOV(X0, reinterpret_cast<uint64_t>(builtin_function->handler()));
MOV(X1, reinterpret_cast<uint64_t>(builtin_function->arg0()));
MOV(X2, reinterpret_cast<uint64_t>(builtin_function->arg1()));
auto thunk = backend()->guest_to_host_thunk();
MOV(X16, reinterpret_cast<uint64_t>(thunk));
BLR(X16);
// x0 = host return
}
} else if (function->behavior() == Function::Behavior::kExtern) {
auto extern_function = static_cast<const GuestFunction*>(function);
if (extern_function->extern_handler()) {
undefined = false;
// x0 = target function
// x1 = arg0
// x2 = arg1
// x3 = arg2
MOV(X0, reinterpret_cast<uint64_t>(extern_function->extern_handler()));
LDR(X1, GetContextReg(), offsetof(ppc::PPCContext, kernel_state));
auto thunk = backend()->guest_to_host_thunk();
MOV(X16, reinterpret_cast<uint64_t>(thunk));
BLR(X16);
// x0 = host return
}
}
if (undefined) {
CallNative(UndefinedCallExtern, reinterpret_cast<uint64_t>(function));
}
}
void A64Emitter::CallNative(void* fn) { CallNativeSafe(fn); }
void A64Emitter::CallNative(uint64_t (*fn)(void* raw_context)) {
CallNativeSafe(reinterpret_cast<void*>(fn));
}
void A64Emitter::CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0)) {
CallNativeSafe(reinterpret_cast<void*>(fn));
}
void A64Emitter::CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0),
uint64_t arg0) {
MOV(GetNativeParam(0), arg0);
CallNativeSafe(reinterpret_cast<void*>(fn));
}
void A64Emitter::CallNativeSafe(void* fn) {
// X0 = target function
// X1 = arg0
// X2 = arg1
// X3 = arg2
auto thunk = backend()->guest_to_host_thunk();
MOV(X0, reinterpret_cast<uint64_t>(fn));
MOV(X16, reinterpret_cast<uint64_t>(thunk));
BLR(X16);
// X0 = host return
}
void A64Emitter::SetReturnAddress(uint64_t value) {
MOV(X0, value);
STR(X0, SP, StackLayout::GUEST_CALL_RET_ADDR);
}
oaknut::XReg A64Emitter::GetNativeParam(uint32_t param) {
if (param == 0)
return X1;
else if (param == 1)
return X2;
else if (param == 2)
return X3;
assert_always();
return X3;
}
// Important: If you change these, you must update the thunks in a64_backend.cc!
oaknut::XReg A64Emitter::GetContextReg() { return X27; }
oaknut::XReg A64Emitter::GetMembaseReg() { return X28; }
void A64Emitter::ReloadContext() {
LDR(GetContextReg(), SP, StackLayout::GUEST_CTX_HOME);
}
void A64Emitter::ReloadMembase() {
LDR(GetMembaseReg(), GetContextReg(),
offsetof(ppc::PPCContext, virtual_membase));
}
bool A64Emitter::ConstantFitsIn32Reg(uint64_t v) {
if ((v & ~0x7FFFFFFF) == 0) {
// Fits under 31 bits, so just load using normal mov.
return true;
} else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) {
// Negative number that fits in 32bits.
return true;
}
return false;
}
void A64Emitter::MovMem64(const oaknut::XRegSp& addr, intptr_t offset,
uint64_t v) {
if (v == 0) {
STR(XZR, addr, offset);
} else if (!(v >> 32)) {
// All high bits are zero, 32-bit MOV
MOV(W0, static_cast<uint32_t>(v));
STR(X0, addr, offset);
} else {
// 64bit number that needs double movs.
MOV(X0, v);
STR(X0, addr, offset);
}
}
static const vec128_t v_consts[] = {
/* VZero */ vec128f(0.0f),
/* VOnePD */ vec128d(1.0),
/* VNegativeOne */ vec128f(-1.0f, -1.0f, -1.0f, -1.0f),
/* VFFFF */
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu),
/* VMaskX16Y16 */
vec128i(0x0000FFFFu, 0xFFFF0000u, 0x00000000u, 0x00000000u),
/* VFlipX16Y16 */
vec128i(0x00008000u, 0x00000000u, 0x00000000u, 0x00000000u),
/* VFixX16Y16 */ vec128f(-32768.0f, 0.0f, 0.0f, 0.0f),
/* VNormalizeX16Y16 */
vec128f(1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f),
/* V0001 */ vec128f(0.0f, 0.0f, 0.0f, 1.0f),
/* V3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f),
/* V3331 */ vec128f(3.0f, 3.0f, 3.0f, 1.0f),
/* V3333 */ vec128f(3.0f, 3.0f, 3.0f, 3.0f),
/* VSignMaskPS */
vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
/* VSignMaskPD */
vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u),
/* VAbsMaskPS */
vec128i(0x7FFFFFFFu, 0x7FFFFFFFu, 0x7FFFFFFFu, 0x7FFFFFFFu),
/* VAbsMaskPD */
vec128i(0xFFFFFFFFu, 0x7FFFFFFFu, 0xFFFFFFFFu, 0x7FFFFFFFu),
/* VByteSwapMask */
vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu),
/* VByteOrderMask */
vec128i(0x01000302u, 0x05040706u, 0x09080B0Au, 0x0D0C0F0Eu),
/* VPermuteControl15 */ vec128b(15),
/* VPermuteByteMask */ vec128b(0x1F),
/* VPackD3DCOLORSat */ vec128i(0x404000FFu),
/* VPackD3DCOLOR */
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x0C000408u),
/* VUnpackD3DCOLOR */
vec128i(0xFFFFFF0Eu, 0xFFFFFF0Du, 0xFFFFFF0Cu, 0xFFFFFF0Fu),
/* VPackFLOAT16_2 */
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x01000302u),
/* VUnpackFLOAT16_2 */
vec128i(0x0D0C0F0Eu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu),
/* VPackFLOAT16_4 */
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x01000302u, 0x05040706u),
/* VUnpackFLOAT16_4 */
vec128i(0x09080B0Au, 0x0D0C0F0Eu, 0xFFFFFFFFu, 0xFFFFFFFFu),
/* VPackSHORT_Min */ vec128i(0x403F8001u),
/* VPackSHORT_Max */ vec128i(0x40407FFFu),
/* VPackSHORT_2 */
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x01000504u),
/* VPackSHORT_4 */
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x01000504u, 0x09080D0Cu),
/* VUnpackSHORT_2 */
vec128i(0xFFFF0F0Eu, 0xFFFF0D0Cu, 0xFFFFFFFFu, 0xFFFFFFFFu),
/* VUnpackSHORT_4 */
vec128i(0xFFFF0B0Au, 0xFFFF0908u, 0xFFFF0F0Eu, 0xFFFF0D0Cu),
/* VUnpackSHORT_Overflow */ vec128i(0x403F8000u),
/* VPackUINT_2101010_MinUnpacked */
vec128i(0x403FFE01u, 0x403FFE01u, 0x403FFE01u, 0x40400000u),
/* VPackUINT_2101010_MaxUnpacked */
vec128i(0x404001FFu, 0x404001FFu, 0x404001FFu, 0x40400003u),
/* VPackUINT_2101010_MaskUnpacked */
vec128i(0x3FFu, 0x3FFu, 0x3FFu, 0x3u),
/* VPackUINT_2101010_MaskPacked */
vec128i(0x3FFu, 0x3FFu << 10, 0x3FFu << 20, 0x3u << 30),
/* VPackUINT_2101010_Shift */ vec128i(0, 10, 20, 30),
/* VUnpackUINT_2101010_Overflow */ vec128i(0x403FFE00u),
/* VPackULONG_4202020_MinUnpacked */
vec128i(0x40380001u, 0x40380001u, 0x40380001u, 0x40400000u),
/* VPackULONG_4202020_MaxUnpacked */
vec128i(0x4047FFFFu, 0x4047FFFFu, 0x4047FFFFu, 0x4040000Fu),
/* VPackULONG_4202020_MaskUnpacked */
vec128i(0xFFFFFu, 0xFFFFFu, 0xFFFFFu, 0xFu),
/* VPackULONG_4202020_PermuteXZ */
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x0A0908FFu, 0xFF020100u),
/* VPackULONG_4202020_PermuteYW */
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x0CFFFF06u, 0x0504FFFFu),
/* VUnpackULONG_4202020_Permute */
vec128i(0xFF0E0D0Cu, 0xFF0B0A09u, 0xFF080F0Eu, 0xFFFFFF0Bu),
/* VUnpackULONG_4202020_Overflow */ vec128i(0x40380000u),
/* VOneOver255 */ vec128f(1.0f / 255.0f),
/* VMaskEvenPI16 */
vec128i(0x0000FFFFu, 0x0000FFFFu, 0x0000FFFFu, 0x0000FFFFu),
/* VShiftMaskEvenPI16 */
vec128i(0x0000000Fu, 0x0000000Fu, 0x0000000Fu, 0x0000000Fu),
/* VShiftMaskPS */
vec128i(0x0000001Fu, 0x0000001Fu, 0x0000001Fu, 0x0000001Fu),
/* VShiftByteMask */
vec128i(0x000000FFu, 0x000000FFu, 0x000000FFu, 0x000000FFu),
/* VSwapWordMask */
vec128i(0x03030303u, 0x03030303u, 0x03030303u, 0x03030303u),
/* VUnsignedDwordMax */
vec128i(0xFFFFFFFFu, 0x00000000u, 0xFFFFFFFFu, 0x00000000u),
/* V255 */ vec128f(255.0f),
/* VPI32 */ vec128i(32),
/* VSignMaskI8 */
vec128i(0x80808080u, 0x80808080u, 0x80808080u, 0x80808080u),
/* VSignMaskI16 */
vec128i(0x80008000u, 0x80008000u, 0x80008000u, 0x80008000u),
/* VSignMaskI32 */
vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
/* VSignMaskF32 */
vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
/* VShortMinPS */ vec128f(SHRT_MIN),
/* VShortMaxPS */ vec128f(SHRT_MAX),
/* VIntMin */ vec128i(INT_MIN),
/* VIntMax */ vec128i(INT_MAX),
/* VIntMaxPD */ vec128d(INT_MAX),
/* VPosIntMinPS */ vec128f((float)0x80000000u),
/* VQNaN */ vec128i(0x7FC00000u),
/* VInt127 */ vec128i(0x7Fu),
/* V2To32 */ vec128f(0x1.0p32f),
};
// First location to try and place constants.
static const uintptr_t kConstDataLocation = 0x20000000;
static const uintptr_t kConstDataSize = sizeof(v_consts);
// Increment the location by this amount for every allocation failure.
static const uintptr_t kConstDataIncrement = 0x00001000;
// This function places constant data that is used by the emitter later on.
// Only called once and used by multiple instances of the emitter.
//
// TODO(DrChat): This should be placed in the code cache with the code, but
// doing so requires RIP-relative addressing, which is difficult to support
// given the current setup.
uintptr_t A64Emitter::PlaceConstData() {
uint8_t* ptr = reinterpret_cast<uint8_t*>(kConstDataLocation);
void* mem = nullptr;
while (!mem) {
mem = memory::AllocFixed(
ptr, xe::round_up(kConstDataSize, memory::page_size()),
memory::AllocationType::kReserveCommit, memory::PageAccess::kReadWrite);
ptr += kConstDataIncrement;
}
// The pointer must not be greater than 31 bits.
assert_zero(reinterpret_cast<uintptr_t>(mem) & ~0x7FFFFFFF);
std::memcpy(mem, v_consts, sizeof(v_consts));
memory::Protect(mem, kConstDataSize, memory::PageAccess::kReadOnly, nullptr);
return reinterpret_cast<uintptr_t>(mem);
}
void A64Emitter::FreeConstData(uintptr_t data) {
memory::DeallocFixed(reinterpret_cast<void*>(data), 0,
memory::DeallocationType::kRelease);
}
uintptr_t A64Emitter::GetVConstPtr() const { return backend_->emitter_data(); }
uintptr_t A64Emitter::GetVConstPtr(VConst id) const {
// Load through fixed constant table setup by PlaceConstData.
// It's important that the pointer is not signed, as it will be sign-extended.
return GetVConstPtr() + GetVConstOffset(id);
}
// Implies possible StashV(0, ...)!
void A64Emitter::LoadConstantV(oaknut::QReg dest, const vec128_t& v) {
if (!v.low && !v.high) {
// 0000...
// MOVI is implemented as a register-rename while EOR(x, x, x) is not
// https://dougallj.github.io/applecpu/firestorm.html
MOVI(dest.B16(), 0);
} else if (v.low == ~uint64_t(0) && v.high == ~uint64_t(0)) {
// 1111...
MOVI(dest.B16(), 0xFF);
} else {
// Try to figure out some common splat-patterns to utilize MOVI rather than
// stashing to memory.
const bool all_same_u8 =
std::adjacent_find(std::cbegin(v.u8), std::cend(v.u8),
std::not_equal_to<>()) == std::cend(v.u8);
if (all_same_u8) {
// 0xXX, 0xXX, 0xXX...
MOVI(dest.B16(), v.u8[0]);
return;
}
const bool all_same_u16 =
std::adjacent_find(std::cbegin(v.u16), std::cend(v.u16),
std::not_equal_to<>()) == std::cend(v.u16);
if (all_same_u16) {
if ((v.u16[0] & 0xFF00) == 0) {
// 0x00XX, 0x00XX, 0x00XX...
MOVI(dest.H8(), uint8_t(v.u16[0]));
return;
} else if ((v.u16[0] & 0x00FF) == 0) {
// 0xXX00, 0xXX00, 0xXX00...
MOVI(dest.H8(), uint8_t(v.u16[0] >> 8), oaknut::util::LSL, 8);
return;
}
}
const bool all_same_u32 =
std::adjacent_find(std::cbegin(v.u32), std::cend(v.u32),
std::not_equal_to<>()) == std::cend(v.u32);
if (all_same_u32) {
if ((v.u32[0] & 0x00FFFFFF) == 0) {
// This is used a lot for certain float-splats and should be checked
// first before the others
// 0xXX000000, 0xXX000000, 0xXX000000...
MOVI(dest.S4(), uint8_t(v.u32[0] >> 24), oaknut::util::LSL, 24);
return;
} else if ((v.u32[0] & 0xFFFFFF00) == 0) {
// 0x000000XX, 0x000000XX, 0x000000XX...
MOVI(dest.S4(), uint8_t(v.u32[0]));
return;
} else if ((v.u32[0] & 0xFFFF00FF) == 0) {
// 0x0000XX00, 0x0000XX00, 0x0000XX00...
MOVI(dest.S4(), uint8_t(v.u32[0] >> 8), oaknut::util::LSL, 8);
return;
} else if ((v.u32[0] & 0xFF00FFFF) == 0) {
// 0x00XX0000, 0x00XX0000, 0x00XX0000...
MOVI(dest.S4(), uint8_t(v.u32[0] >> 16), oaknut::util::LSL, 16);
return;
}
// Try to utilize FMOV if possible
oaknut::FImm8 fp8(0);
if (f32_to_fimm8(v.u32[0], fp8)) {
FMOV(dest.S4(), fp8);
return;
}
}
// TODO(benvanik): see what other common values are.
// TODO(benvanik): build constant table - 99% are reused.
MovMem64(SP, kStashOffset, v.low);
MovMem64(SP, kStashOffset + 8, v.high);
LDR(dest, SP, kStashOffset);
}
}
void A64Emitter::LoadConstantV(oaknut::QReg dest, float v) {
union {
float f;
uint32_t i;
} x = {v};
if (!x.i) {
// +0.0f (but not -0.0f because it may be used to flip the sign via xor).
MOVI(dest.B16(), 0);
} else if (x.i == ~uint32_t(0)) {
// 1111...
MOVI(dest.B16(), 0xFF);
} else {
// TODO(benvanik): see what other common values are.
// TODO(benvanik): build constant table - 99% are reused.
// Try to utilize FMOV if possible
oaknut::FImm8 fp8(0);
if (f32_to_fimm8(x.i, fp8)) {
FMOV(dest.toS(), fp8);
return;
}
MOV(W0, x.i);
FMOV(dest.toS(), W0);
}
}
void A64Emitter::LoadConstantV(oaknut::QReg dest, double v) {
union {
double d;
uint64_t i;
} x = {v};
if (!x.i) {
// +0.0 (but not -0.0 because it may be used to flip the sign via xor).
MOVI(dest.toD(), oaknut::RepImm(0));
} else if (x.i == ~uint64_t(0)) {
// 1111...
MOVI(dest.toD(), oaknut::RepImm(0xFF));
} else {
// TODO(benvanik): see what other common values are.
// TODO(benvanik): build constant table - 99% are reused.
// Try to utilize FMOV if possible
oaknut::FImm8 fp8(0);
if (f64_to_fimm8(x.i, fp8)) {
FMOV(dest.toD(), fp8);
return;
}
MOV(X0, x.i);
FMOV(dest.toD(), X0);
}
}
uintptr_t A64Emitter::StashV(int index, const oaknut::QReg& r) {
// auto addr = ptr[rsp + kStashOffset + (index * 16)];
// vmovups(addr, r);
const auto addr = kStashOffset + (index * 16);
STR(r, SP, addr);
return addr;
}
uintptr_t A64Emitter::StashConstantV(int index, float v) {
union {
float f;
uint32_t i;
} x = {v};
const auto addr = kStashOffset + (index * 16);
MovMem64(SP, addr, x.i);
MovMem64(SP, addr + 8, 0);
return addr;
}
uintptr_t A64Emitter::StashConstantV(int index, double v) {
union {
double d;
uint64_t i;
} x = {v};
const auto addr = kStashOffset + (index * 16);
MovMem64(SP, addr, x.i);
MovMem64(SP, addr + 8, 0);
return addr;
}
uintptr_t A64Emitter::StashConstantV(int index, const vec128_t& v) {
const auto addr = kStashOffset + (index * 16);
MovMem64(SP, addr, v.low);
MovMem64(SP, addr + 8, v.high);
return addr;
}
} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe

View File

@ -0,0 +1,267 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_CPU_BACKEND_A64_A64_EMITTER_H_
#define XENIA_CPU_BACKEND_A64_A64_EMITTER_H_
#include <unordered_map>
#include <vector>
#include "xenia/base/arena.h"
#include "xenia/cpu/function.h"
#include "xenia/cpu/function_trace_data.h"
#include "xenia/cpu/hir/hir_builder.h"
#include "xenia/cpu/hir/instr.h"
#include "xenia/cpu/hir/value.h"
#include "xenia/memory.h"
#include "oaknut/code_block.hpp"
#include "oaknut/oaknut.hpp"
namespace xe {
namespace cpu {
class Processor;
} // namespace cpu
} // namespace xe
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {
class A64Backend;
class A64CodeCache;
struct EmitFunctionInfo;
enum RegisterFlags {
REG_DEST = (1 << 0),
REG_ABCD = (1 << 1),
};
enum VConst {
VZero = 0,
VOnePD,
VNegativeOne,
VFFFF,
VMaskX16Y16,
VFlipX16Y16,
VFixX16Y16,
VNormalizeX16Y16,
V0001,
V3301,
V3331,
V3333,
VSignMaskPS,
VSignMaskPD,
VAbsMaskPS,
VAbsMaskPD,
VByteSwapMask,
VByteOrderMask,
VPermuteControl15,
VPermuteByteMask,
VPackD3DCOLORSat,
VPackD3DCOLOR,
VUnpackD3DCOLOR,
VPackFLOAT16_2,
VUnpackFLOAT16_2,
VPackFLOAT16_4,
VUnpackFLOAT16_4,
VPackSHORT_Min,
VPackSHORT_Max,
VPackSHORT_2,
VPackSHORT_4,
VUnpackSHORT_2,
VUnpackSHORT_4,
VUnpackSHORT_Overflow,
VPackUINT_2101010_MinUnpacked,
VPackUINT_2101010_MaxUnpacked,
VPackUINT_2101010_MaskUnpacked,
VPackUINT_2101010_MaskPacked,
VPackUINT_2101010_Shift,
VUnpackUINT_2101010_Overflow,
VPackULONG_4202020_MinUnpacked,
VPackULONG_4202020_MaxUnpacked,
VPackULONG_4202020_MaskUnpacked,
VPackULONG_4202020_PermuteXZ,
VPackULONG_4202020_PermuteYW,
VUnpackULONG_4202020_Permute,
VUnpackULONG_4202020_Overflow,
VOneOver255,
VMaskEvenPI16,
VShiftMaskEvenPI16,
VShiftMaskPS,
VShiftByteMask,
VSwapWordMask,
VUnsignedDwordMax,
V255,
VPI32,
VSignMaskI8,
VSignMaskI16,
VSignMaskI32,
VSignMaskF32,
VShortMinPS,
VShortMaxPS,
VIntMin,
VIntMax,
VIntMaxPD,
VPosIntMinPS,
VQNaN,
VInt127,
V2To32,
};
enum A64EmitterFeatureFlags {
kA64EmitLSE = 1 << 0,
kA64EmitF16C = 1 << 1,
};
class A64Emitter : public oaknut::VectorCodeGenerator {
public:
A64Emitter(A64Backend* backend);
virtual ~A64Emitter();
Processor* processor() const { return processor_; }
A64Backend* backend() const { return backend_; }
static uintptr_t PlaceConstData();
static void FreeConstData(uintptr_t data);
bool Emit(GuestFunction* function, hir::HIRBuilder* builder,
uint32_t debug_info_flags, FunctionDebugInfo* debug_info,
void** out_code_address, size_t* out_code_size,
std::vector<SourceMapEntry>* out_source_map);
public:
// Reserved: XSP, X27, X28
// Scratch: X1-X15, X30 | V0-v7 and V16-V31
// V0-2
// Available: X19-X26
// V4-V15 (save to get V3)
static const size_t GPR_COUNT = 8;
static const size_t FPR_COUNT = 8;
static void SetupReg(const hir::Value* v, oaknut::WReg& r) {
const auto idx = gpr_reg_map_[v->reg.index];
r = oaknut::WReg(idx);
}
static void SetupReg(const hir::Value* v, oaknut::XReg& r) {
const auto idx = gpr_reg_map_[v->reg.index];
r = oaknut::XReg(idx);
}
static void SetupReg(const hir::Value* v, oaknut::SReg& r) {
const auto idx = fpr_reg_map_[v->reg.index];
r = oaknut::SReg(idx);
}
static void SetupReg(const hir::Value* v, oaknut::DReg& r) {
const auto idx = fpr_reg_map_[v->reg.index];
r = oaknut::DReg(idx);
}
static void SetupReg(const hir::Value* v, oaknut::QReg& r) {
const auto idx = fpr_reg_map_[v->reg.index];
r = oaknut::QReg(idx);
}
// Gets(and possibly create) an HIR label with the specified name
oaknut::Label* lookup_label(const char* label_name) {
return &label_lookup_[label_name];
}
oaknut::Label& epilog_label() { return *epilog_label_; }
void MarkSourceOffset(const hir::Instr* i);
void DebugBreak();
void Trap(uint16_t trap_type = 0);
void UnimplementedInstr(const hir::Instr* i);
void Call(const hir::Instr* instr, GuestFunction* function);
void CallIndirect(const hir::Instr* instr, const oaknut::XReg& reg);
void CallExtern(const hir::Instr* instr, const Function* function);
void CallNative(void* fn);
void CallNative(uint64_t (*fn)(void* raw_context));
void CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0));
void CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0),
uint64_t arg0);
void CallNativeSafe(void* fn);
void SetReturnAddress(uint64_t value);
static oaknut::XReg GetNativeParam(uint32_t param);
static oaknut::XReg GetContextReg();
static oaknut::XReg GetMembaseReg();
void ReloadContext();
void ReloadMembase();
// Moves a 64bit immediate into memory.
static bool ConstantFitsIn32Reg(uint64_t v);
void MovMem64(const oaknut::XRegSp& addr, intptr_t offset, uint64_t v);
uintptr_t GetVConstPtr() const;
uintptr_t GetVConstPtr(VConst id) const;
static constexpr uintptr_t GetVConstOffset(VConst id) {
return sizeof(vec128_t) * id;
}
void LoadConstantV(oaknut::QReg dest, float v);
void LoadConstantV(oaknut::QReg dest, double v);
void LoadConstantV(oaknut::QReg dest, const vec128_t& v);
// Returned addresses are relative to XSP
uintptr_t StashV(int index, const oaknut::QReg& r);
uintptr_t StashConstantV(int index, float v);
uintptr_t StashConstantV(int index, double v);
uintptr_t StashConstantV(int index, const vec128_t& v);
bool IsFeatureEnabled(uint32_t feature_flag) const {
return (feature_flags_ & feature_flag) == feature_flag;
}
FunctionDebugInfo* debug_info() const { return debug_info_; }
size_t stack_size() const { return stack_size_; }
protected:
void* Emplace(const EmitFunctionInfo& func_info,
GuestFunction* function = nullptr);
bool Emit(hir::HIRBuilder* builder, EmitFunctionInfo& func_info);
void EmitGetCurrentThreadId();
void EmitTraceUserCallReturn();
protected:
Processor* processor_ = nullptr;
A64Backend* backend_ = nullptr;
A64CodeCache* code_cache_ = nullptr;
uint32_t feature_flags_ = 0;
std::vector<std::uint32_t> assembly_buffer;
oaknut::Label* epilog_label_ = nullptr;
// Convert from plain-text label-names into oaknut-labels
std::unordered_map<std::string, oaknut::Label> label_lookup_;
hir::Instr* current_instr_ = nullptr;
FunctionDebugInfo* debug_info_ = nullptr;
uint32_t debug_info_flags_ = 0;
FunctionTraceData* trace_data_ = nullptr;
Arena source_map_arena_;
size_t stack_size_ = 0;
static const uint8_t gpr_reg_map_[GPR_COUNT];
static const uint8_t fpr_reg_map_[FPR_COUNT];
};
} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe
#endif // XENIA_CPU_BACKEND_A64_A64_EMITTER_H_

View File

@ -0,0 +1,45 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/cpu/backend/a64/a64_function.h"
#include "xenia/cpu/backend/a64/a64_backend.h"
#include "xenia/cpu/processor.h"
#include "xenia/cpu/thread_state.h"
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {
A64Function::A64Function(Module* module, uint32_t address)
: GuestFunction(module, address) {}
A64Function::~A64Function() {
// machine_code_ is freed by code cache.
}
void A64Function::Setup(uint8_t* machine_code, size_t machine_code_length) {
machine_code_ = machine_code;
machine_code_length_ = machine_code_length;
}
bool A64Function::CallImpl(ThreadState* thread_state, uint32_t return_address) {
auto backend =
reinterpret_cast<A64Backend*>(thread_state->processor()->backend());
auto thunk = backend->host_to_guest_thunk();
thunk(machine_code_, thread_state->context(),
reinterpret_cast<void*>(uintptr_t(return_address)));
return true;
}
} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe

View File

@ -0,0 +1,44 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_CPU_BACKEND_A64_A64_FUNCTION_H_
#define XENIA_CPU_BACKEND_A64_A64_FUNCTION_H_
#include "xenia/cpu/function.h"
#include "xenia/cpu/thread_state.h"
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {
class A64Function : public GuestFunction {
public:
A64Function(Module* module, uint32_t address);
~A64Function() override;
uint8_t* machine_code() const override { return machine_code_; }
size_t machine_code_length() const override { return machine_code_length_; }
void Setup(uint8_t* machine_code, size_t machine_code_length);
protected:
bool CallImpl(ThreadState* thread_state, uint32_t return_address) override;
private:
uint8_t* machine_code_ = nullptr;
size_t machine_code_length_ = 0;
};
} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe
#endif // XENIA_CPU_BACKEND_A64_A64_FUNCTION_H_

View File

@ -0,0 +1,618 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Xenia Developers. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_CPU_BACKEND_A64_A64_OP_H_
#define XENIA_CPU_BACKEND_A64_A64_OP_H_
#include "xenia/cpu/backend/a64/a64_emitter.h"
#include "xenia/cpu/hir/instr.h"
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {
// TODO(benvanik): direct usings.
using namespace xe::cpu;
using namespace xe::cpu::hir;
using namespace oaknut;
using namespace oaknut::util;
// Selects the right byte/word/etc from a vector. We need to flip logical
// indices (0,1,2,3,4,5,6,7,...) = (3,2,1,0,7,6,5,4,...)
#define VEC128_B(n) ((n) ^ 0x3)
#define VEC128_W(n) ((n) ^ 0x1)
#define VEC128_D(n) (n)
#define VEC128_F(n) (n)
enum KeyType {
KEY_TYPE_X = OPCODE_SIG_TYPE_X,
KEY_TYPE_L = OPCODE_SIG_TYPE_L,
KEY_TYPE_O = OPCODE_SIG_TYPE_O,
KEY_TYPE_S = OPCODE_SIG_TYPE_S,
KEY_TYPE_V_I8 = OPCODE_SIG_TYPE_V + INT8_TYPE,
KEY_TYPE_V_I16 = OPCODE_SIG_TYPE_V + INT16_TYPE,
KEY_TYPE_V_I32 = OPCODE_SIG_TYPE_V + INT32_TYPE,
KEY_TYPE_V_I64 = OPCODE_SIG_TYPE_V + INT64_TYPE,
KEY_TYPE_V_F32 = OPCODE_SIG_TYPE_V + FLOAT32_TYPE,
KEY_TYPE_V_F64 = OPCODE_SIG_TYPE_V + FLOAT64_TYPE,
KEY_TYPE_V_V128 = OPCODE_SIG_TYPE_V + VEC128_TYPE,
};
#pragma pack(push, 1)
union InstrKey {
uint32_t value;
struct {
uint32_t opcode : 8;
uint32_t dest : 5;
uint32_t src1 : 5;
uint32_t src2 : 5;
uint32_t src3 : 5;
uint32_t reserved : 4;
};
operator uint32_t() const { return value; }
InstrKey() : value(0) { static_assert_size(*this, sizeof(value)); }
InstrKey(uint32_t v) : value(v) {}
InstrKey(const Instr* i) : value(0) {
opcode = i->opcode->num;
uint32_t sig = i->opcode->signature;
dest =
GET_OPCODE_SIG_TYPE_DEST(sig) ? OPCODE_SIG_TYPE_V + i->dest->type : 0;
src1 = GET_OPCODE_SIG_TYPE_SRC1(sig);
if (src1 == OPCODE_SIG_TYPE_V) {
src1 += i->src1.value->type;
}
src2 = GET_OPCODE_SIG_TYPE_SRC2(sig);
if (src2 == OPCODE_SIG_TYPE_V) {
src2 += i->src2.value->type;
}
src3 = GET_OPCODE_SIG_TYPE_SRC3(sig);
if (src3 == OPCODE_SIG_TYPE_V) {
src3 += i->src3.value->type;
}
}
template <Opcode OPCODE, KeyType DEST = KEY_TYPE_X, KeyType SRC1 = KEY_TYPE_X,
KeyType SRC2 = KEY_TYPE_X, KeyType SRC3 = KEY_TYPE_X>
struct Construct {
static const uint32_t value =
(OPCODE) | (DEST << 8) | (SRC1 << 13) | (SRC2 << 18) | (SRC3 << 23);
};
};
#pragma pack(pop)
static_assert(sizeof(InstrKey) <= 4, "Key must be 4 bytes");
template <typename... Ts>
struct CombinedStruct;
template <>
struct CombinedStruct<> {};
template <typename T, typename... Ts>
struct CombinedStruct<T, Ts...> : T, CombinedStruct<Ts...> {};
struct OpBase {};
template <typename T, KeyType KEY_TYPE>
struct Op : OpBase {
static const KeyType key_type = KEY_TYPE;
};
struct VoidOp : Op<VoidOp, KEY_TYPE_X> {
protected:
friend struct Op<VoidOp, KEY_TYPE_X>;
template <hir::Opcode OPCODE, typename... Ts>
friend struct I;
void Load(const Instr::Op& op) {}
};
struct OffsetOp : Op<OffsetOp, KEY_TYPE_O> {
uint64_t value;
protected:
friend struct Op<OffsetOp, KEY_TYPE_O>;
template <hir::Opcode OPCODE, typename... Ts>
friend struct I;
void Load(const Instr::Op& op) { this->value = op.offset; }
};
struct SymbolOp : Op<SymbolOp, KEY_TYPE_S> {
Function* value;
protected:
friend struct Op<SymbolOp, KEY_TYPE_S>;
template <hir::Opcode OPCODE, typename... Ts>
friend struct I;
bool Load(const Instr::Op& op) {
this->value = op.symbol;
return true;
}
};
struct LabelOp : Op<LabelOp, KEY_TYPE_L> {
hir::Label* value;
protected:
friend struct Op<LabelOp, KEY_TYPE_L>;
template <hir::Opcode OPCODE, typename... Ts>
friend struct I;
void Load(const Instr::Op& op) { this->value = op.label; }
};
template <typename T, KeyType KEY_TYPE, typename REG_TYPE, typename CONST_TYPE>
struct ValueOp : Op<ValueOp<T, KEY_TYPE, REG_TYPE, CONST_TYPE>, KEY_TYPE> {
typedef REG_TYPE reg_type;
const Value* value;
bool is_constant;
virtual bool ConstantFitsIn32Reg() const { return true; }
const REG_TYPE& reg() const {
assert_true(!is_constant);
return reg_;
}
operator const REG_TYPE&() const { return reg(); }
bool IsEqual(const T& b) const {
if (is_constant && b.is_constant) {
return reinterpret_cast<const T*>(this)->constant() == b.constant();
} else if (!is_constant && !b.is_constant) {
return reg_.index() == b.reg_.index();
} else {
return false;
}
}
bool IsEqual(const oaknut::Reg& b) const {
if (is_constant) {
return false;
} else if (!is_constant) {
return reg_.index() == b.index();
} else {
return false;
}
}
bool operator==(const T& b) const { return IsEqual(b); }
bool operator!=(const T& b) const { return !IsEqual(b); }
bool operator==(const oaknut::Reg& b) const { return IsEqual(b); }
bool operator!=(const oaknut::Reg& b) const { return !IsEqual(b); }
void Load(const Instr::Op& op) {
value = op.value;
is_constant = value->IsConstant();
if (!is_constant) {
A64Emitter::SetupReg(value, reg_);
}
}
protected:
REG_TYPE reg_ = REG_TYPE(0);
};
struct I8Op : ValueOp<I8Op, KEY_TYPE_V_I8, WReg, int8_t> {
typedef ValueOp<I8Op, KEY_TYPE_V_I8, WReg, int8_t> BASE;
const int8_t constant() const {
assert_true(BASE::is_constant);
return BASE::value->constant.i8;
}
};
struct I16Op : ValueOp<I16Op, KEY_TYPE_V_I16, WReg, int16_t> {
typedef ValueOp<I16Op, KEY_TYPE_V_I16, WReg, int16_t> BASE;
const int16_t constant() const {
assert_true(BASE::is_constant);
return BASE::value->constant.i16;
}
};
struct I32Op : ValueOp<I32Op, KEY_TYPE_V_I32, WReg, int32_t> {
typedef ValueOp<I32Op, KEY_TYPE_V_I32, WReg, int32_t> BASE;
const int32_t constant() const {
assert_true(BASE::is_constant);
return BASE::value->constant.i32;
}
};
struct I64Op : ValueOp<I64Op, KEY_TYPE_V_I64, XReg, int64_t> {
typedef ValueOp<I64Op, KEY_TYPE_V_I64, XReg, int64_t> BASE;
const int64_t constant() const {
assert_true(BASE::is_constant);
return BASE::value->constant.i64;
}
bool ConstantFitsIn32Reg() const override {
int64_t v = BASE::value->constant.i64;
if ((v & ~0x7FFFFFFF) == 0) {
// Fits under 31 bits, so just load using normal mov.
return true;
} else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) {
// Negative number that fits in 32bits.
return true;
}
return false;
}
};
struct F32Op : ValueOp<F32Op, KEY_TYPE_V_F32, SReg, float> {
typedef ValueOp<F32Op, KEY_TYPE_V_F32, SReg, float> BASE;
const float constant() const {
assert_true(BASE::is_constant);
return BASE::value->constant.f32;
}
};
struct F64Op : ValueOp<F64Op, KEY_TYPE_V_F64, DReg, double> {
typedef ValueOp<F64Op, KEY_TYPE_V_F64, DReg, double> BASE;
const double constant() const {
assert_true(BASE::is_constant);
return BASE::value->constant.f64;
}
};
struct V128Op : ValueOp<V128Op, KEY_TYPE_V_V128, QReg, vec128_t> {
typedef ValueOp<V128Op, KEY_TYPE_V_V128, QReg, vec128_t> BASE;
const vec128_t& constant() const {
assert_true(BASE::is_constant);
return BASE::value->constant.v128;
}
};
template <typename DEST, typename... Tf>
struct DestField;
template <typename DEST>
struct DestField<DEST> {
DEST dest;
protected:
bool LoadDest(const Instr* i) {
Instr::Op op;
op.value = i->dest;
dest.Load(op);
return true;
}
};
template <>
struct DestField<VoidOp> {
protected:
bool LoadDest(const Instr* i) { return true; }
};
template <hir::Opcode OPCODE, typename... Ts>
struct I;
template <hir::Opcode OPCODE, typename DEST>
struct I<OPCODE, DEST> : DestField<DEST> {
typedef DestField<DEST> BASE;
static const hir::Opcode opcode = OPCODE;
static const uint32_t key =
InstrKey::Construct<OPCODE, DEST::key_type>::value;
static const KeyType dest_type = DEST::key_type;
const Instr* instr;
protected:
template <typename SEQ, typename T>
friend struct Sequence;
bool Load(const Instr* i) {
if (InstrKey(i).value == key && BASE::LoadDest(i)) {
instr = i;
return true;
}
return false;
}
};
template <hir::Opcode OPCODE, typename DEST, typename SRC1>
struct I<OPCODE, DEST, SRC1> : DestField<DEST> {
typedef DestField<DEST> BASE;
static const hir::Opcode opcode = OPCODE;
static const uint32_t key =
InstrKey::Construct<OPCODE, DEST::key_type, SRC1::key_type>::value;
static const KeyType dest_type = DEST::key_type;
static const KeyType src1_type = SRC1::key_type;
const Instr* instr;
SRC1 src1 = {};
protected:
template <typename SEQ, typename T>
friend struct Sequence;
bool Load(const Instr* i) {
if (InstrKey(i).value == key && BASE::LoadDest(i)) {
instr = i;
src1.Load(i->src1);
return true;
}
return false;
}
};
template <hir::Opcode OPCODE, typename DEST, typename SRC1, typename SRC2>
struct I<OPCODE, DEST, SRC1, SRC2> : DestField<DEST> {
typedef DestField<DEST> BASE;
static const hir::Opcode opcode = OPCODE;
static const uint32_t key =
InstrKey::Construct<OPCODE, DEST::key_type, SRC1::key_type,
SRC2::key_type>::value;
static const KeyType dest_type = DEST::key_type;
static const KeyType src1_type = SRC1::key_type;
static const KeyType src2_type = SRC2::key_type;
const Instr* instr;
SRC1 src1;
SRC2 src2;
protected:
template <typename SEQ, typename T>
friend struct Sequence;
bool Load(const Instr* i) {
if (InstrKey(i).value == key && BASE::LoadDest(i)) {
instr = i;
src1.Load(i->src1);
src2.Load(i->src2);
return true;
}
return false;
}
};
template <hir::Opcode OPCODE, typename DEST, typename SRC1, typename SRC2,
typename SRC3>
struct I<OPCODE, DEST, SRC1, SRC2, SRC3> : DestField<DEST> {
typedef DestField<DEST> BASE;
static const hir::Opcode opcode = OPCODE;
static const uint32_t key =
InstrKey::Construct<OPCODE, DEST::key_type, SRC1::key_type,
SRC2::key_type, SRC3::key_type>::value;
static const KeyType dest_type = DEST::key_type;
static const KeyType src1_type = SRC1::key_type;
static const KeyType src2_type = SRC2::key_type;
static const KeyType src3_type = SRC3::key_type;
const Instr* instr;
SRC1 src1;
SRC2 src2;
SRC3 src3;
protected:
template <typename SEQ, typename T>
friend struct Sequence;
bool Load(const Instr* i) {
if (InstrKey(i).value == key && BASE::LoadDest(i)) {
instr = i;
src1.Load(i->src1);
src2.Load(i->src2);
src3.Load(i->src3);
return true;
}
return false;
}
};
template <typename T>
static const T GetTempReg(A64Emitter& e);
template <>
const WReg GetTempReg<WReg>(A64Emitter& e) {
return W0;
}
template <>
const XReg GetTempReg<XReg>(A64Emitter& e) {
return X0;
}
template <typename SEQ, typename T>
struct Sequence {
typedef T EmitArgType;
static constexpr uint32_t head_key() { return T::key; }
static bool Select(A64Emitter& e, const Instr* i) {
T args;
if (!args.Load(i)) {
return false;
}
SEQ::Emit(e, args);
return true;
}
template <typename REG_FN>
static void EmitUnaryOp(A64Emitter& e, const EmitArgType& i,
const REG_FN& reg_fn) {
if (i.src1.is_constant) {
e.MOV(i.dest, i.src1.constant());
reg_fn(e, i.dest);
} else {
if (i.dest != i.src1) {
e.MOV(i.dest, i.src1);
}
reg_fn(e, i.dest);
}
}
template <typename REG_REG_FN, typename REG_CONST_FN>
static void EmitCommutativeBinaryOp(A64Emitter& e, const EmitArgType& i,
const REG_REG_FN& reg_reg_fn,
const REG_CONST_FN& reg_const_fn) {
if (i.src1.is_constant) {
if (i.src2.is_constant) {
// Both constants.
if (i.src1.ConstantFitsIn32Reg()) {
e.MOV(i.dest, i.src2.constant());
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src1.constant()));
} else if (i.src2.ConstantFitsIn32Reg()) {
e.MOV(i.dest, i.src1.constant());
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
} else {
e.MOV(i.dest, i.src1.constant());
auto temp = GetTempReg<typename decltype(i.src2)::reg_type>(e);
e.MOV(temp, i.src2.constant());
reg_reg_fn(e, i.dest, temp);
}
} else {
// src1 constant.
if (i.dest == i.src2) {
if (i.src1.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src1.constant()));
} else {
auto temp = GetTempReg<typename decltype(i.src1)::reg_type>(e);
e.MOV(temp, i.src1.constant());
reg_reg_fn(e, i.dest, temp);
}
} else {
e.MOV(i.dest, i.src1.constant());
reg_reg_fn(e, i.dest, i.src2);
}
}
} else if (i.src2.is_constant) {
if (i.dest == i.src1) {
if (i.src2.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
} else {
auto temp = GetTempReg<typename decltype(i.src2)::reg_type>(e);
e.MOV(temp, i.src2.constant());
reg_reg_fn(e, i.dest, temp);
}
} else {
e.MOV(i.dest, i.src2.constant());
reg_reg_fn(e, i.dest, i.src1);
}
} else {
if (i.dest == i.src1) {
reg_reg_fn(e, i.dest, i.src2);
} else if (i.dest == i.src2) {
reg_reg_fn(e, i.dest, i.src1);
} else {
e.MOV(i.dest, i.src1);
reg_reg_fn(e, i.dest, i.src2);
}
}
}
template <typename REG_REG_FN, typename REG_CONST_FN>
static void EmitAssociativeBinaryOp(A64Emitter& e, const EmitArgType& i,
const REG_REG_FN& reg_reg_fn,
const REG_CONST_FN& reg_const_fn) {
if (i.src1.is_constant) {
assert_true(!i.src2.is_constant);
if (i.dest == i.src2) {
auto temp = GetTempReg<typename decltype(i.src2)::reg_type>(e);
e.MOV(temp, i.src2);
e.MOV(i.dest, i.src1.constant());
reg_reg_fn(e, i.dest, temp);
} else {
e.MOV(i.dest, i.src1.constant());
reg_reg_fn(e, i.dest, i.src2);
}
} else if (i.src2.is_constant) {
if (i.dest == i.src1) {
if (i.src2.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
} else {
auto temp = GetTempReg<typename decltype(i.src2)::reg_type>(e);
e.MOV(temp, i.src2.constant());
reg_reg_fn(e, i.dest, temp);
}
} else {
e.MOV(i.dest, i.src1);
if (i.src2.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
} else {
auto temp = GetTempReg<typename decltype(i.src2)::reg_type>(e);
e.MOV(temp, i.src2.constant());
reg_reg_fn(e, i.dest, temp);
}
}
} else {
if (i.dest == i.src1) {
reg_reg_fn(e, i.dest, i.src2);
} else if (i.dest == i.src2) {
auto temp = GetTempReg<typename decltype(i.src2)::reg_type>(e);
e.MOV(temp, i.src2);
e.MOV(i.dest, i.src1);
reg_reg_fn(e, i.dest, temp);
} else {
e.MOV(i.dest, i.src1);
reg_reg_fn(e, i.dest, i.src2);
}
}
}
template <typename REG = QReg, typename FN>
static void EmitCommutativeBinaryVOp(A64Emitter& e, const EmitArgType& i,
const FN& fn) {
if (i.src1.is_constant) {
assert_true(!i.src2.is_constant);
e.LoadConstantV(Q0, i.src1.constant());
fn(e, i.dest, REG(0), i.src2);
} else if (i.src2.is_constant) {
assert_true(!i.src1.is_constant);
e.LoadConstantV(Q0, i.src2.constant());
fn(e, i.dest, i.src1, REG(0));
} else {
fn(e, i.dest, i.src1, i.src2);
}
}
template <typename REG = QReg, typename FN>
static void EmitAssociativeBinaryVOp(A64Emitter& e, const EmitArgType& i,
const FN& fn) {
if (i.src1.is_constant) {
assert_true(!i.src2.is_constant);
e.LoadConstantV(Q0, i.src1.constant());
fn(e, i.dest, REG(0), i.src2);
} else if (i.src2.is_constant) {
assert_true(!i.src1.is_constant);
e.LoadConstantV(Q0, i.src2.constant());
fn(e, i.dest, i.src1, REG(0));
} else {
fn(e, i.dest, i.src1, i.src2);
}
}
template <typename REG_REG_FN, typename REG_CONST_FN>
static void EmitCommutativeCompareOp(A64Emitter& e, const EmitArgType& i,
const REG_REG_FN& reg_reg_fn,
const REG_CONST_FN& reg_const_fn) {
if (i.src1.is_constant) {
assert_true(!i.src2.is_constant);
if (i.src1.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.src2, static_cast<int32_t>(i.src1.constant()));
} else {
auto temp = GetTempReg<typename decltype(i.src1)::reg_type>(e);
e.MOV(temp, i.src1.constant());
reg_reg_fn(e, i.src2, temp);
}
} else if (i.src2.is_constant) {
assert_true(!i.src1.is_constant);
if (i.src2.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.src1, static_cast<int32_t>(i.src2.constant()));
} else {
auto temp = GetTempReg<typename decltype(i.src2)::reg_type>(e);
e.MOV(temp, i.src2.constant());
reg_reg_fn(e, i.src1, temp);
}
} else {
reg_reg_fn(e, i.src1, i.src2);
}
}
template <typename REG_REG_FN, typename REG_CONST_FN>
static void EmitAssociativeCompareOp(A64Emitter& e, const EmitArgType& i,
const REG_REG_FN& reg_reg_fn,
const REG_CONST_FN& reg_const_fn) {
if (i.src1.is_constant) {
assert_true(!i.src2.is_constant);
if (i.src1.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.dest, i.src2, static_cast<int32_t>(i.src1.constant()),
true);
} else {
auto temp = GetTempReg<typename decltype(i.src1)::reg_type>(e);
e.MOV(temp, i.src1.constant());
reg_reg_fn(e, i.dest, i.src2, temp, true);
}
} else if (i.src2.is_constant) {
assert_true(!i.src1.is_constant);
if (i.src2.ConstantFitsIn32Reg()) {
reg_const_fn(e, i.dest, i.src1, static_cast<int32_t>(i.src2.constant()),
false);
} else {
auto temp = GetTempReg<typename decltype(i.src2)::reg_type>(e);
e.MOV(temp, i.src2.constant());
reg_reg_fn(e, i.dest, i.src1, temp, false);
}
} else {
reg_reg_fn(e, i.dest, i.src1, i.src2, false);
}
}
};
} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe
#endif // XENIA_CPU_BACKEND_A64_A64_OP_H_

View File

@ -0,0 +1,551 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Xenia Developers. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/cpu/backend/a64/a64_sequences.h"
#include <algorithm>
#include <cstring>
#include "xenia/cpu/backend/a64/a64_op.h"
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {
volatile int anchor_control = 0;
// ============================================================================
// OPCODE_DEBUG_BREAK
// ============================================================================
struct DEBUG_BREAK : Sequence<DEBUG_BREAK, I<OPCODE_DEBUG_BREAK, VoidOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) { e.DebugBreak(); }
};
EMITTER_OPCODE_TABLE(OPCODE_DEBUG_BREAK, DEBUG_BREAK);
// ============================================================================
// OPCODE_DEBUG_BREAK_TRUE
// ============================================================================
struct DEBUG_BREAK_TRUE_I8
: Sequence<DEBUG_BREAK_TRUE_I8, I<OPCODE_DEBUG_BREAK_TRUE, VoidOp, I8Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label skip;
e.CBZ(i.src1, skip);
e.DebugBreak();
e.l(skip);
}
};
struct DEBUG_BREAK_TRUE_I16
: Sequence<DEBUG_BREAK_TRUE_I16,
I<OPCODE_DEBUG_BREAK_TRUE, VoidOp, I16Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label skip;
e.CBZ(i.src1, skip);
e.DebugBreak();
e.l(skip);
}
};
struct DEBUG_BREAK_TRUE_I32
: Sequence<DEBUG_BREAK_TRUE_I32,
I<OPCODE_DEBUG_BREAK_TRUE, VoidOp, I32Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label skip;
e.CBZ(i.src1, skip);
e.DebugBreak();
e.l(skip);
}
};
struct DEBUG_BREAK_TRUE_I64
: Sequence<DEBUG_BREAK_TRUE_I64,
I<OPCODE_DEBUG_BREAK_TRUE, VoidOp, I64Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label skip;
e.CBZ(i.src1, skip);
e.DebugBreak();
e.l(skip);
}
};
struct DEBUG_BREAK_TRUE_F32
: Sequence<DEBUG_BREAK_TRUE_F32,
I<OPCODE_DEBUG_BREAK_TRUE, VoidOp, F32Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label skip;
e.FCMP(i.src1, 0);
e.B(Cond::EQ, skip);
e.DebugBreak();
e.l(skip);
}
};
struct DEBUG_BREAK_TRUE_F64
: Sequence<DEBUG_BREAK_TRUE_F64,
I<OPCODE_DEBUG_BREAK_TRUE, VoidOp, F64Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label skip;
e.FCMP(i.src1, 0);
e.B(Cond::EQ, skip);
e.DebugBreak();
e.l(skip);
}
};
EMITTER_OPCODE_TABLE(OPCODE_DEBUG_BREAK_TRUE, DEBUG_BREAK_TRUE_I8,
DEBUG_BREAK_TRUE_I16, DEBUG_BREAK_TRUE_I32,
DEBUG_BREAK_TRUE_I64, DEBUG_BREAK_TRUE_F32,
DEBUG_BREAK_TRUE_F64);
// ============================================================================
// OPCODE_TRAP
// ============================================================================
struct TRAP : Sequence<TRAP, I<OPCODE_TRAP, VoidOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
e.Trap(i.instr->flags);
}
};
EMITTER_OPCODE_TABLE(OPCODE_TRAP, TRAP);
// ============================================================================
// OPCODE_TRAP_TRUE
// ============================================================================
struct TRAP_TRUE_I8
: Sequence<TRAP_TRUE_I8, I<OPCODE_TRAP_TRUE, VoidOp, I8Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label skip;
e.CBZ(i.src1, skip);
e.Trap(i.instr->flags);
e.l(skip);
}
};
struct TRAP_TRUE_I16
: Sequence<TRAP_TRUE_I16, I<OPCODE_TRAP_TRUE, VoidOp, I16Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label skip;
e.CBZ(i.src1, skip);
e.Trap(i.instr->flags);
e.l(skip);
}
};
struct TRAP_TRUE_I32
: Sequence<TRAP_TRUE_I32, I<OPCODE_TRAP_TRUE, VoidOp, I32Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label skip;
e.CBZ(i.src1, skip);
e.Trap(i.instr->flags);
e.l(skip);
}
};
struct TRAP_TRUE_I64
: Sequence<TRAP_TRUE_I64, I<OPCODE_TRAP_TRUE, VoidOp, I64Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label skip;
e.CBZ(i.src1, skip);
e.Trap(i.instr->flags);
e.l(skip);
}
};
struct TRAP_TRUE_F32
: Sequence<TRAP_TRUE_F32, I<OPCODE_TRAP_TRUE, VoidOp, F32Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label skip;
e.FCMP(i.src1, 0);
e.B(Cond::EQ, skip);
e.Trap(i.instr->flags);
e.l(skip);
}
};
struct TRAP_TRUE_F64
: Sequence<TRAP_TRUE_F64, I<OPCODE_TRAP_TRUE, VoidOp, F64Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label skip;
e.FCMP(i.src1, 0);
e.B(Cond::EQ, skip);
e.Trap(i.instr->flags);
e.l(skip);
}
};
EMITTER_OPCODE_TABLE(OPCODE_TRAP_TRUE, TRAP_TRUE_I8, TRAP_TRUE_I16,
TRAP_TRUE_I32, TRAP_TRUE_I64, TRAP_TRUE_F32,
TRAP_TRUE_F64);
// ============================================================================
// OPCODE_CALL
// ============================================================================
struct CALL : Sequence<CALL, I<OPCODE_CALL, VoidOp, SymbolOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
assert_true(i.src1.value->is_guest());
e.Call(i.instr, static_cast<GuestFunction*>(i.src1.value));
}
};
EMITTER_OPCODE_TABLE(OPCODE_CALL, CALL);
// ============================================================================
// OPCODE_CALL_TRUE
// ============================================================================
struct CALL_TRUE_I8
: Sequence<CALL_TRUE_I8, I<OPCODE_CALL_TRUE, VoidOp, I8Op, SymbolOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
assert_true(i.src2.value->is_guest());
oaknut::Label skip;
e.CBZ(i.src1, skip);
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
e.l(skip);
}
};
struct CALL_TRUE_I16
: Sequence<CALL_TRUE_I16, I<OPCODE_CALL_TRUE, VoidOp, I16Op, SymbolOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
assert_true(i.src2.value->is_guest());
oaknut::Label skip;
e.CBZ(i.src1, skip);
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
e.l(skip);
}
};
struct CALL_TRUE_I32
: Sequence<CALL_TRUE_I32, I<OPCODE_CALL_TRUE, VoidOp, I32Op, SymbolOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
assert_true(i.src2.value->is_guest());
oaknut::Label skip;
e.CBZ(i.src1, skip);
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
e.l(skip);
}
};
struct CALL_TRUE_I64
: Sequence<CALL_TRUE_I64, I<OPCODE_CALL_TRUE, VoidOp, I64Op, SymbolOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
assert_true(i.src2.value->is_guest());
oaknut::Label skip;
e.CBZ(i.src1, skip);
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
e.l(skip);
}
};
struct CALL_TRUE_F32
: Sequence<CALL_TRUE_F32, I<OPCODE_CALL_TRUE, VoidOp, F32Op, SymbolOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
assert_true(i.src2.value->is_guest());
oaknut::Label skip;
e.FCMP(i.src1, 0);
e.B(Cond::EQ, skip);
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
e.l(skip);
}
};
struct CALL_TRUE_F64
: Sequence<CALL_TRUE_F64, I<OPCODE_CALL_TRUE, VoidOp, F64Op, SymbolOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
assert_true(i.src2.value->is_guest());
oaknut::Label skip;
e.FCMP(i.src1, 0);
e.B(Cond::EQ, skip);
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
e.l(skip);
}
};
EMITTER_OPCODE_TABLE(OPCODE_CALL_TRUE, CALL_TRUE_I8, CALL_TRUE_I16,
CALL_TRUE_I32, CALL_TRUE_I64, CALL_TRUE_F32,
CALL_TRUE_F64);
// ============================================================================
// OPCODE_CALL_INDIRECT
// ============================================================================
struct CALL_INDIRECT
: Sequence<CALL_INDIRECT, I<OPCODE_CALL_INDIRECT, VoidOp, I64Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
e.CallIndirect(i.instr, i.src1);
}
};
EMITTER_OPCODE_TABLE(OPCODE_CALL_INDIRECT, CALL_INDIRECT);
// ============================================================================
// OPCODE_CALL_INDIRECT_TRUE
// ============================================================================
struct CALL_INDIRECT_TRUE_I8
: Sequence<CALL_INDIRECT_TRUE_I8,
I<OPCODE_CALL_INDIRECT_TRUE, VoidOp, I8Op, I64Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label skip;
e.CBZ(i.src1, skip);
e.CallIndirect(i.instr, i.src2);
e.l(skip);
}
};
struct CALL_INDIRECT_TRUE_I16
: Sequence<CALL_INDIRECT_TRUE_I16,
I<OPCODE_CALL_INDIRECT_TRUE, VoidOp, I16Op, I64Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label skip;
e.CBZ(i.src1, skip);
e.CallIndirect(i.instr, i.src2);
e.l(skip);
}
};
struct CALL_INDIRECT_TRUE_I32
: Sequence<CALL_INDIRECT_TRUE_I32,
I<OPCODE_CALL_INDIRECT_TRUE, VoidOp, I32Op, I64Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label skip;
e.CBZ(i.src1, skip);
e.CallIndirect(i.instr, i.src2);
e.l(skip);
}
};
struct CALL_INDIRECT_TRUE_I64
: Sequence<CALL_INDIRECT_TRUE_I64,
I<OPCODE_CALL_INDIRECT_TRUE, VoidOp, I64Op, I64Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label skip;
e.CBZ(i.src1, skip);
e.CallIndirect(i.instr, i.src2);
e.l(skip);
}
};
struct CALL_INDIRECT_TRUE_F32
: Sequence<CALL_INDIRECT_TRUE_F32,
I<OPCODE_CALL_INDIRECT_TRUE, VoidOp, F32Op, I64Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label skip;
e.FCMP(i.src1, 0);
e.B(Cond::EQ, skip);
e.CallIndirect(i.instr, i.src2);
e.l(skip);
}
};
struct CALL_INDIRECT_TRUE_F64
: Sequence<CALL_INDIRECT_TRUE_F64,
I<OPCODE_CALL_INDIRECT_TRUE, VoidOp, F64Op, I64Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label skip;
e.FCMP(i.src1, 0);
e.B(Cond::EQ, skip);
e.CallIndirect(i.instr, i.src2);
e.l(skip);
}
};
EMITTER_OPCODE_TABLE(OPCODE_CALL_INDIRECT_TRUE, CALL_INDIRECT_TRUE_I8,
CALL_INDIRECT_TRUE_I16, CALL_INDIRECT_TRUE_I32,
CALL_INDIRECT_TRUE_I64, CALL_INDIRECT_TRUE_F32,
CALL_INDIRECT_TRUE_F64);
// ============================================================================
// OPCODE_CALL_EXTERN
// ============================================================================
struct CALL_EXTERN
: Sequence<CALL_EXTERN, I<OPCODE_CALL_EXTERN, VoidOp, SymbolOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
e.CallExtern(i.instr, i.src1.value);
}
};
EMITTER_OPCODE_TABLE(OPCODE_CALL_EXTERN, CALL_EXTERN);
// ============================================================================
// OPCODE_RETURN
// ============================================================================
struct RETURN : Sequence<RETURN, I<OPCODE_RETURN, VoidOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
// If this is the last instruction in the last block, just let us
// fall through.
if (i.instr->next || i.instr->block->next) {
e.B(e.epilog_label());
}
}
};
EMITTER_OPCODE_TABLE(OPCODE_RETURN, RETURN);
// ============================================================================
// OPCODE_RETURN_TRUE
// ============================================================================
struct RETURN_TRUE_I8
: Sequence<RETURN_TRUE_I8, I<OPCODE_RETURN_TRUE, VoidOp, I8Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
e.CBNZ(i.src1, e.epilog_label());
}
};
struct RETURN_TRUE_I16
: Sequence<RETURN_TRUE_I16, I<OPCODE_RETURN_TRUE, VoidOp, I16Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
e.CBNZ(i.src1, e.epilog_label());
}
};
struct RETURN_TRUE_I32
: Sequence<RETURN_TRUE_I32, I<OPCODE_RETURN_TRUE, VoidOp, I32Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
e.CBNZ(i.src1, e.epilog_label());
}
};
struct RETURN_TRUE_I64
: Sequence<RETURN_TRUE_I64, I<OPCODE_RETURN_TRUE, VoidOp, I64Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
e.CBNZ(i.src1, e.epilog_label());
}
};
struct RETURN_TRUE_F32
: Sequence<RETURN_TRUE_F32, I<OPCODE_RETURN_TRUE, VoidOp, F32Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
e.FCMP(i.src1, 0);
e.B(Cond::NE, e.epilog_label());
}
};
struct RETURN_TRUE_F64
: Sequence<RETURN_TRUE_F64, I<OPCODE_RETURN_TRUE, VoidOp, F64Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
e.FCMP(i.src1, 0);
e.B(Cond::NE, e.epilog_label());
}
};
EMITTER_OPCODE_TABLE(OPCODE_RETURN_TRUE, RETURN_TRUE_I8, RETURN_TRUE_I16,
RETURN_TRUE_I32, RETURN_TRUE_I64, RETURN_TRUE_F32,
RETURN_TRUE_F64);
// ============================================================================
// OPCODE_SET_RETURN_ADDRESS
// ============================================================================
struct SET_RETURN_ADDRESS
: Sequence<SET_RETURN_ADDRESS,
I<OPCODE_SET_RETURN_ADDRESS, VoidOp, I64Op>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
e.SetReturnAddress(i.src1.constant());
}
};
EMITTER_OPCODE_TABLE(OPCODE_SET_RETURN_ADDRESS, SET_RETURN_ADDRESS);
// ============================================================================
// OPCODE_BRANCH
// ============================================================================
struct BRANCH : Sequence<BRANCH, I<OPCODE_BRANCH, VoidOp, LabelOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label* label = e.lookup_label(i.src1.value->name);
assert_not_null(label);
e.B(*label);
}
};
EMITTER_OPCODE_TABLE(OPCODE_BRANCH, BRANCH);
// ============================================================================
// OPCODE_BRANCH_TRUE
// ============================================================================
struct BRANCH_TRUE_I8
: Sequence<BRANCH_TRUE_I8, I<OPCODE_BRANCH_TRUE, VoidOp, I8Op, LabelOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label* label = e.lookup_label(i.src2.value->name);
assert_not_null(label);
e.CBNZ(i.src1, *label);
}
};
struct BRANCH_TRUE_I16
: Sequence<BRANCH_TRUE_I16, I<OPCODE_BRANCH_TRUE, VoidOp, I16Op, LabelOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label* label = e.lookup_label(i.src2.value->name);
assert_not_null(label);
e.CBNZ(i.src1, *label);
}
};
struct BRANCH_TRUE_I32
: Sequence<BRANCH_TRUE_I32, I<OPCODE_BRANCH_TRUE, VoidOp, I32Op, LabelOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label* label = e.lookup_label(i.src2.value->name);
assert_not_null(label);
e.CBNZ(i.src1, *label);
}
};
struct BRANCH_TRUE_I64
: Sequence<BRANCH_TRUE_I64, I<OPCODE_BRANCH_TRUE, VoidOp, I64Op, LabelOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label* label = e.lookup_label(i.src2.value->name);
assert_not_null(label);
e.CBNZ(i.src1, *label);
}
};
struct BRANCH_TRUE_F32
: Sequence<BRANCH_TRUE_F32, I<OPCODE_BRANCH_TRUE, VoidOp, F32Op, LabelOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label* label = e.lookup_label(i.src2.value->name);
assert_not_null(label);
e.FCMP(i.src1, 0);
e.B(Cond::NE, *label);
}
};
struct BRANCH_TRUE_F64
: Sequence<BRANCH_TRUE_F64, I<OPCODE_BRANCH_TRUE, VoidOp, F64Op, LabelOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label* label = e.lookup_label(i.src2.value->name);
assert_not_null(label);
e.FCMP(i.src1, 0);
e.B(Cond::NE, *label);
}
};
EMITTER_OPCODE_TABLE(OPCODE_BRANCH_TRUE, BRANCH_TRUE_I8, BRANCH_TRUE_I16,
BRANCH_TRUE_I32, BRANCH_TRUE_I64, BRANCH_TRUE_F32,
BRANCH_TRUE_F64);
// ============================================================================
// OPCODE_BRANCH_FALSE
// ============================================================================
struct BRANCH_FALSE_I8
: Sequence<BRANCH_FALSE_I8, I<OPCODE_BRANCH_FALSE, VoidOp, I8Op, LabelOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label* label = e.lookup_label(i.src2.value->name);
assert_not_null(label);
e.CBZ(i.src1, *label);
}
};
struct BRANCH_FALSE_I16
: Sequence<BRANCH_FALSE_I16,
I<OPCODE_BRANCH_FALSE, VoidOp, I16Op, LabelOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label* label = e.lookup_label(i.src2.value->name);
assert_not_null(label);
e.CBZ(i.src1, *label);
}
};
struct BRANCH_FALSE_I32
: Sequence<BRANCH_FALSE_I32,
I<OPCODE_BRANCH_FALSE, VoidOp, I32Op, LabelOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label* label = e.lookup_label(i.src2.value->name);
assert_not_null(label);
e.CBZ(i.src1, *label);
}
};
struct BRANCH_FALSE_I64
: Sequence<BRANCH_FALSE_I64,
I<OPCODE_BRANCH_FALSE, VoidOp, I64Op, LabelOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label* label = e.lookup_label(i.src2.value->name);
assert_not_null(label);
e.CBZ(i.src1, *label);
}
};
struct BRANCH_FALSE_F32
: Sequence<BRANCH_FALSE_F32,
I<OPCODE_BRANCH_FALSE, VoidOp, F32Op, LabelOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label* label = e.lookup_label(i.src2.value->name);
assert_not_null(label);
e.FCMP(i.src1, 0);
e.B(Cond::EQ, *label);
}
};
struct BRANCH_FALSE_F64
: Sequence<BRANCH_FALSE_F64,
I<OPCODE_BRANCH_FALSE, VoidOp, F64Op, LabelOp>> {
static void Emit(A64Emitter& e, const EmitArgType& i) {
oaknut::Label* label = e.lookup_label(i.src2.value->name);
assert_not_null(label);
e.FCMP(i.src1, 0);
e.B(Cond::EQ, *label);
}
};
EMITTER_OPCODE_TABLE(OPCODE_BRANCH_FALSE, BRANCH_FALSE_I8, BRANCH_FALSE_I16,
BRANCH_FALSE_I32, BRANCH_FALSE_I64, BRANCH_FALSE_F32,
BRANCH_FALSE_F64);
} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,51 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_CPU_BACKEND_A64_A64_SEQUENCES_H_
#define XENIA_CPU_BACKEND_A64_A64_SEQUENCES_H_
#include "xenia/cpu/hir/instr.h"
#include <unordered_map>
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {
class A64Emitter;
typedef bool (*SequenceSelectFn)(A64Emitter&, const hir::Instr*);
extern std::unordered_map<uint32_t, SequenceSelectFn> sequence_table;
template <typename T>
bool Register() {
sequence_table.insert({T::head_key(), T::Select});
return true;
}
template <typename T, typename Tn, typename... Ts>
static bool Register() {
bool b = true;
b = b && Register<T>(); // Call the above function
b = b && Register<Tn, Ts...>(); // Call ourself again (recursively)
return b;
}
#define EMITTER_OPCODE_TABLE(name, ...) \
const auto A64_INSTR_##name = Register<__VA_ARGS__>();
bool SelectSequence(A64Emitter* e, const hir::Instr* i,
const hir::Instr** new_tail);
} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe
#endif // XENIA_CPU_BACKEND_A64_A64_SEQUENCES_H_

View File

@ -0,0 +1,129 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_CPU_BACKEND_A64_A64_STACK_LAYOUT_H_
#define XENIA_CPU_BACKEND_A64_A64_STACK_LAYOUT_H_
#include "xenia/base/vec128.h"
#include "xenia/cpu/backend/a64/a64_backend.h"
#include "xenia/cpu/backend/a64/a64_emitter.h"
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {
class StackLayout {
public:
/**
* Stack Layout
* ----------------------------
* NOTE: stack must always be 16b aligned.
*
* Thunk stack:
* Non-Volatile Volatile
* +------------------+------------------+
* | arg temp, 3 * 8 | arg temp, 3 * 8 | sp + 0x000
* | | |
* | | |
* +------------------+------------------+
* | rbx | (unused) | sp + 0x018
* +------------------+------------------+
* | rbp | X1 | sp + 0x020
* +------------------+------------------+
* | rcx (Win32) | X2 | sp + 0x028
* +------------------+------------------+
* | rsi (Win32) | X3 | sp + 0x030
* +------------------+------------------+
* | rdi (Win32) | X4 | sp + 0x038
* +------------------+------------------+
* | r12 | X5 | sp + 0x040
* +------------------+------------------+
* | r13 | X6 | sp + 0x048
* +------------------+------------------+
* | r14 | X7 | sp + 0x050
* +------------------+------------------+
* | r15 | X8 | sp + 0x058
* +------------------+------------------+
* | xmm6 (Win32) | X9 | sp + 0x060
* | | |
* +------------------+------------------+
* | xmm7 (Win32) | X10 | sp + 0x070
* | | |
* +------------------+------------------+
* | xmm8 (Win32) | X11 | sp + 0x080
* | | |
* +------------------+------------------+
* | xmm9 (Win32) | X12 | sp + 0x090
* | | |
* +------------------+------------------+
* | xmm10 (Win32) | X13 | sp + 0x0A0
* | | |
* +------------------+------------------+
* | xmm11 (Win32) | X14 | sp + 0x0B0
* | | |
* +------------------+------------------+
* | xmm12 (Win32) | X15 | sp + 0x0C0
* | | |
* +------------------+------------------+
* | xmm13 (Win32) | X16 | sp + 0x0D0
* | | |
* +------------------+------------------+
* | xmm14 (Win32) | X17 | sp + 0x0E0
* | | |
* +------------------+------------------+
* | xmm15 (Win32) | X18 | sp + 0x0F0
* | | |
* +------------------+------------------+
*/
XEPACKEDSTRUCT(Thunk, {
uint64_t arg_temp[3];
uint64_t r[17];
vec128_t xmm[22];
});
static_assert(sizeof(Thunk) % 16 == 0,
"sizeof(Thunk) must be a multiple of 16!");
static const size_t THUNK_STACK_SIZE = sizeof(Thunk);
/**
*
*
* Guest stack:
* +------------------+
* | arg temp, 3 * 8 | sp + 0
* | |
* | |
* +------------------+
* | scratch, 48b | sp + 32(kStashOffset)
* | |
* +------------------+
* | X0 / context | sp + 80
* +------------------+
* | guest ret addr | sp + 88
* +------------------+
* | call ret addr | sp + 96
* +------------------+
* ... locals ...
* +------------------+
* | (return address) |
* +------------------+
*
*/
static const size_t GUEST_STACK_SIZE = 96 + 16;
static const size_t GUEST_CTX_HOME = 80;
static const size_t GUEST_RET_ADDR = 88;
static const size_t GUEST_CALL_RET_ADDR = 96;
};
} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe
#endif // XENIA_CPU_BACKEND_A64_A64_STACK_LAYOUT_H_

View File

@ -0,0 +1,225 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/cpu/backend/a64/a64_tracers.h"
#include <cinttypes>
#include "xenia/base/logging.h"
#include "xenia/base/vec128.h"
#include "xenia/cpu/backend/a64/a64_emitter.h"
#include "xenia/cpu/processor.h"
#include "xenia/cpu/thread_state.h"
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {
#define ITRACE 0
#define DTRACE 0
#define TARGET_THREAD 0
bool trace_enabled = true;
#define THREAD_MATCH \
(!TARGET_THREAD || thread_state->thread_id() == TARGET_THREAD)
#define IFLUSH()
#define IPRINT(s) \
if (trace_enabled && THREAD_MATCH) \
xe::logging::AppendLogLine(xe::LogLevel::Debug, 't', s)
#define DFLUSH()
#define DPRINT(...) \
if (trace_enabled && THREAD_MATCH) \
xe::logging::AppendLogLineFormat(xe::LogLevel::Debug, 't', __VA_ARGS__)
uint32_t GetTracingMode() {
uint32_t mode = 0;
#if ITRACE
mode |= TRACING_INSTR;
#endif // ITRACE
#if DTRACE
mode |= TRACING_DATA;
#endif // DTRACE
return mode;
}
void TraceString(void* raw_context, const char* str) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
IPRINT(str);
IFLUSH();
}
void TraceContextLoadI8(void* raw_context, uint64_t offset, uint8_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("{} ({:X}) = ctx i8 +{}\n", (int8_t)value, value, offset);
}
void TraceContextLoadI16(void* raw_context, uint64_t offset, uint16_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("{} ({:X}) = ctx i16 +{}\n", (int16_t)value, value, offset);
}
void TraceContextLoadI32(void* raw_context, uint64_t offset, uint32_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("{} ({:X}) = ctx i32 +{}\n", (int32_t)value, value, offset);
}
void TraceContextLoadI64(void* raw_context, uint64_t offset, uint64_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("{} ({:X}) = ctx i64 +{}\n", (int64_t)value, value, offset);
}
void TraceContextLoadF32(void* raw_context, uint64_t offset,
float32x4_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("{} ({:X}) = ctx f32 +{}\n", xe::m128_f32<0>(value),
xe::m128_i32<0>(value), offset);
}
void TraceContextLoadF64(void* raw_context, uint64_t offset,
const double* value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
// auto v = _mm_loadu_pd(value);
auto v = vld1q_f64(value);
DPRINT("{} ({:X}) = ctx f64 +{}\n", xe::m128_f64<0>(v), xe::m128_i64<0>(v),
offset);
}
void TraceContextLoadV128(void* raw_context, uint64_t offset,
float32x4_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("[{}, {}, {}, {}] [{:08X}, {:08X}, {:08X}, {:08X}] = ctx v128 +{}\n",
xe::m128_f32<0>(value), xe::m128_f32<1>(value), xe::m128_f32<2>(value),
xe::m128_f32<3>(value), xe::m128_i32<0>(value), xe::m128_i32<1>(value),
xe::m128_i32<2>(value), xe::m128_i32<3>(value), offset);
}
void TraceContextStoreI8(void* raw_context, uint64_t offset, uint8_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("ctx i8 +{} = {} ({:X})\n", offset, (int8_t)value, value);
}
void TraceContextStoreI16(void* raw_context, uint64_t offset, uint16_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("ctx i16 +{} = {} ({:X})\n", offset, (int16_t)value, value);
}
void TraceContextStoreI32(void* raw_context, uint64_t offset, uint32_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("ctx i32 +{} = {} ({:X})\n", offset, (int32_t)value, value);
}
void TraceContextStoreI64(void* raw_context, uint64_t offset, uint64_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("ctx i64 +{} = {} ({:X})\n", offset, (int64_t)value, value);
}
void TraceContextStoreF32(void* raw_context, uint64_t offset,
float32x4_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("ctx f32 +{} = {} ({:X})\n", offset, xe::m128_f32<0>(value),
xe::m128_i32<0>(value));
}
void TraceContextStoreF64(void* raw_context, uint64_t offset,
const double* value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
// auto v = _mm_loadu_pd(value);
auto v = vld1q_f64(value);
DPRINT("ctx f64 +{} = {} ({:X})\n", offset, xe::m128_f64<0>(v),
xe::m128_i64<0>(v));
}
void TraceContextStoreV128(void* raw_context, uint64_t offset,
float32x4_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("ctx v128 +{} = [{}, {}, {}, {}] [{:08X}, {:08X}, {:08X}, {:08X}]\n",
offset, xe::m128_f32<0>(value), xe::m128_f32<1>(value),
xe::m128_f32<2>(value), xe::m128_f32<3>(value), xe::m128_i32<0>(value),
xe::m128_i32<1>(value), xe::m128_i32<2>(value),
xe::m128_i32<3>(value));
}
void TraceMemoryLoadI8(void* raw_context, uint32_t address, uint8_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("{} ({:X}) = load.i8 {:08X}\n", (int8_t)value, value, address);
}
void TraceMemoryLoadI16(void* raw_context, uint32_t address, uint16_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("{} ({:X}) = load.i16 {:08X}\n", (int16_t)value, value, address);
}
void TraceMemoryLoadI32(void* raw_context, uint32_t address, uint32_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("{} ({:X}) = load.i32 {:08X}\n", (int32_t)value, value, address);
}
void TraceMemoryLoadI64(void* raw_context, uint32_t address, uint64_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("{} ({:X}) = load.i64 {:08X}\n", (int64_t)value, value, address);
}
void TraceMemoryLoadF32(void* raw_context, uint32_t address,
float32x4_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("{} ({:X}) = load.f32 {:08X}\n", xe::m128_f32<0>(value),
xe::m128_i32<0>(value), address);
}
void TraceMemoryLoadF64(void* raw_context, uint32_t address,
float64x2_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("{} ({:X}) = load.f64 {:08X}\n", xe::m128_f64<0>(value),
xe::m128_i64<0>(value), address);
}
void TraceMemoryLoadV128(void* raw_context, uint32_t address,
float32x4_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT(
"[{}, {}, {}, {}] [{:08X}, {:08X}, {:08X}, {:08X}] = load.v128 {:08X}\n",
xe::m128_f32<0>(value), xe::m128_f32<1>(value), xe::m128_f32<2>(value),
xe::m128_f32<3>(value), xe::m128_i32<0>(value), xe::m128_i32<1>(value),
xe::m128_i32<2>(value), xe::m128_i32<3>(value), address);
}
void TraceMemoryStoreI8(void* raw_context, uint32_t address, uint8_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("store.i8 {:08X} = {} ({:X})\n", address, (int8_t)value, value);
}
void TraceMemoryStoreI16(void* raw_context, uint32_t address, uint16_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("store.i16 {:08X} = {} ({:X})\n", address, (int16_t)value, value);
}
void TraceMemoryStoreI32(void* raw_context, uint32_t address, uint32_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("store.i32 {:08X} = {} ({:X})\n", address, (int32_t)value, value);
}
void TraceMemoryStoreI64(void* raw_context, uint32_t address, uint64_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("store.i64 {:08X} = {} ({:X})\n", address, (int64_t)value, value);
}
void TraceMemoryStoreF32(void* raw_context, uint32_t address,
float32x4_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("store.f32 {:08X} = {} ({:X})\n", address, xe::m128_f32<0>(value),
xe::m128_i32<0>(value));
}
void TraceMemoryStoreF64(void* raw_context, uint32_t address,
float64x2_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("store.f64 {:08X} = {} ({:X})\n", address, xe::m128_f64<0>(value),
xe::m128_i64<0>(value));
}
void TraceMemoryStoreV128(void* raw_context, uint32_t address,
float32x4_t value) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT(
"store.v128 {:08X} = [{}, {}, {}, {}] [{:08X}, {:08X}, {:08X}, {:08X}]\n",
address, xe::m128_f32<0>(value), xe::m128_f32<1>(value),
xe::m128_f32<2>(value), xe::m128_f32<3>(value), xe::m128_i32<0>(value),
xe::m128_i32<1>(value), xe::m128_i32<2>(value), xe::m128_i32<3>(value));
}
void TraceMemset(void* raw_context, uint32_t address, uint8_t value,
uint32_t length) {
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
DPRINT("memset {:08X}-{:08X} ({}) = {:02X}", address, address + length,
length, value);
}
} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe

View File

@ -0,0 +1,82 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_CPU_BACKEND_A64_A64_TRACERS_H_
#define XENIA_CPU_BACKEND_A64_A64_TRACERS_H_
#include <arm64_neon.h>
#include <cstdint>
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {
class A64Emitter;
enum TracingMode {
TRACING_INSTR = (1 << 1),
TRACING_DATA = (1 << 2),
};
uint32_t GetTracingMode();
inline bool IsTracingInstr() { return (GetTracingMode() & TRACING_INSTR) != 0; }
inline bool IsTracingData() { return (GetTracingMode() & TRACING_DATA) != 0; }
void TraceString(void* raw_context, const char* str);
void TraceContextLoadI8(void* raw_context, uint64_t offset, uint8_t value);
void TraceContextLoadI16(void* raw_context, uint64_t offset, uint16_t value);
void TraceContextLoadI32(void* raw_context, uint64_t offset, uint32_t value);
void TraceContextLoadI64(void* raw_context, uint64_t offset, uint64_t value);
void TraceContextLoadF32(void* raw_context, uint64_t offset, float32x4_t value);
void TraceContextLoadF64(void* raw_context, uint64_t offset,
const double* value);
void TraceContextLoadV128(void* raw_context, uint64_t offset,
float32x4_t value);
void TraceContextStoreI8(void* raw_context, uint64_t offset, uint8_t value);
void TraceContextStoreI16(void* raw_context, uint64_t offset, uint16_t value);
void TraceContextStoreI32(void* raw_context, uint64_t offset, uint32_t value);
void TraceContextStoreI64(void* raw_context, uint64_t offset, uint64_t value);
void TraceContextStoreF32(void* raw_context, uint64_t offset,
float32x4_t value);
void TraceContextStoreF64(void* raw_context, uint64_t offset,
const double* value);
void TraceContextStoreV128(void* raw_context, uint64_t offset,
float32x4_t value);
void TraceMemoryLoadI8(void* raw_context, uint32_t address, uint8_t value);
void TraceMemoryLoadI16(void* raw_context, uint32_t address, uint16_t value);
void TraceMemoryLoadI32(void* raw_context, uint32_t address, uint32_t value);
void TraceMemoryLoadI64(void* raw_context, uint32_t address, uint64_t value);
void TraceMemoryLoadF32(void* raw_context, uint32_t address, float32x4_t value);
void TraceMemoryLoadF64(void* raw_context, uint32_t address, float64x2_t value);
void TraceMemoryLoadV128(void* raw_context, uint32_t address,
float32x4_t value);
void TraceMemoryStoreI8(void* raw_context, uint32_t address, uint8_t value);
void TraceMemoryStoreI16(void* raw_context, uint32_t address, uint16_t value);
void TraceMemoryStoreI32(void* raw_context, uint32_t address, uint32_t value);
void TraceMemoryStoreI64(void* raw_context, uint32_t address, uint64_t value);
void TraceMemoryStoreF32(void* raw_context, uint32_t address,
float32x4_t value);
void TraceMemoryStoreF64(void* raw_context, uint32_t address,
float64x2_t value);
void TraceMemoryStoreV128(void* raw_context, uint32_t address,
float32x4_t value);
void TraceMemset(void* raw_context, uint32_t address, uint8_t value,
uint32_t length);
} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe
#endif // XENIA_CPU_BACKEND_A64_A64_TRACERS_H_

View File

@ -0,0 +1,95 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2024 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_CPU_BACKEND_A64_A64_UTIL_H_
#define XENIA_CPU_BACKEND_A64_A64_UTIL_H_
#include "xenia/base/vec128.h"
#include "xenia/cpu/backend/a64/a64_backend.h"
#include "xenia/cpu/backend/a64/a64_emitter.h"
namespace xe {
namespace cpu {
namespace backend {
namespace a64 {
// Attempts to convert an fp32 bit-value into an fp8-immediate value for FMOV
// returns false if the value cannot be represented
// C2.2.3 Modified immediate constants in A64 ing-point instructions
// abcdefgh
// V
// aBbbbbbc defgh000 00000000 00000000
// B = NOT(b)
constexpr bool f32_to_fimm8(uint32_t u32, oaknut::FImm8& fp8) {
const uint32_t sign = (u32 >> 31) & 1;
int32_t exp = ((u32 >> 23) & 0xff) - 127;
int64_t mantissa = u32 & 0x7fffff;
// Too many mantissa bits
if (mantissa & 0x7ffff) {
return false;
}
// Too many exp bits
if (exp < -3 || exp > 4) {
return false;
}
// mantissa = (16 + e:f:g:h) / 16.
mantissa >>= 19;
if ((mantissa & 0b1111) != mantissa) {
return false;
}
// exp = (NOT(b):c:d) - 3
exp = ((exp + 3) & 0b111) ^ 0b100;
fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa));
return true;
}
// Attempts to convert an fp64 bit-value into an fp8-immediate value for FMOV
// returns false if the value cannot be represented
// C2.2.3 Modified immediate constants in A64 floating-point instructions
// abcdefgh
// V
// aBbbbbbb bbcdefgh 00000000 00000000 00000000 00000000 00000000 00000000
// B = NOT(b)
constexpr bool f64_to_fimm8(uint64_t u64, oaknut::FImm8& fp8) {
const uint32_t sign = (u64 >> 63) & 1;
int32_t exp = ((u64 >> 52) & 0x7ff) - 1023;
int64_t mantissa = u64 & 0xfffffffffffffULL;
// Too many mantissa bits
if (mantissa & 0xffffffffffffULL) {
return false;
}
// Too many exp bits
if (exp < -3 || exp > 4) {
return false;
}
// mantissa = (16 + e:f:g:h) / 16.
mantissa >>= 48;
if ((mantissa & 0b1111) != mantissa) {
return false;
}
// exp = (NOT(b):c:d) - 3
exp = ((exp + 3) & 0b111) ^ 0b100;
fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa));
return true;
}
} // namespace a64
} // namespace backend
} // namespace cpu
} // namespace xe
#endif // XENIA_CPU_BACKEND_A64_A64_UTIL_H_

View File

@ -0,0 +1,31 @@
project_root = "../../../../.."
include(project_root.."/tools/build")
group("src")
project("xenia-cpu-backend-a64")
uuid("495f3f3e-f5e8-489a-bd0f-289d0495bc08")
filter("architecture:ARM64")
kind("StaticLib")
filter("architecture:not ARM64")
kind("None")
filter({})
language("C++")
cppdialect("C++20")
links({
"fmt",
"xenia-base",
"xenia-cpu",
})
defines({
})
disablewarnings({
-- Silence errors in oaknut
"4146", -- unary minus operator applied to unsigned type, result still unsigned
"4267" -- 'initializing': conversion from 'size_t' to 'uint32_t', possible loss of data
})
includedirs({
project_root.."/third_party/oaknut/include",
})
local_platform_files()

View File

@ -4,7 +4,11 @@ include(project_root.."/tools/build")
group("src")
project("xenia-cpu-backend-x64")
uuid("7d8d5dce-4696-4197-952a-09506f725afe")
kind("StaticLib")
filter("architecture:x86_64")
kind("StaticLib")
filter("architecture:not x86_64")
kind("None")
filter({})
language("C++")
links({
"capstone",

View File

@ -48,7 +48,8 @@ std::string Breakpoint::to_string() const {
str += " " + functions[0]->name();
return str;
} else {
return std::string("x64 ") + xe::string_util::to_hex_string(host_address());
return std::string(XE_HOST_ARCH_NAME " ") +
xe::string_util::to_hex_string(host_address());
}
}

View File

@ -9,7 +9,7 @@
#include "xenia/cpu/cpu_flags.h"
DEFINE_string(cpu, "any", "CPU backend [any, x64].", "CPU");
DEFINE_string(cpu, "any", "CPU backend [any, x64, a64].", "CPU");
DEFINE_string(
load_module_map, "",

View File

@ -23,6 +23,8 @@
#if XE_ARCH_AMD64
#include "xenia/cpu/backend/x64/x64_backend.h"
#elif XE_ARCH_ARM64
#include "xenia/cpu/backend/a64/a64_backend.h"
#endif // XE_ARCH
#if XE_COMPILER_MSVC
@ -203,11 +205,17 @@ class TestRunner {
if (cvars::cpu == "x64") {
backend.reset(new xe::cpu::backend::x64::X64Backend());
}
#elif XE_ARCH_ARM64
if (cvars::cpu == "a64") {
backend.reset(new xe::cpu::backend::a64::A64Backend());
}
#endif // XE_ARCH
if (cvars::cpu == "any") {
if (!backend) {
#if XE_ARCH_AMD64
backend.reset(new xe::cpu::backend::x64::X64Backend());
#elif XE_ARCH_ARM64
backend.reset(new xe::cpu::backend::a64::A64Backend());
#endif // XE_ARCH
}
}

View File

@ -27,7 +27,11 @@ project("xenia-cpu-ppc-tests")
links({
"xenia-cpu-backend-x64",
})
filter("platforms:Windows")
filter("architecture:ARM64")
links({
"xenia-cpu-backend-a64",
})
filter("platforms:Windows-*")
debugdir(project_root)
debugargs({
"2>&1",

View File

@ -34,7 +34,11 @@
#include "xenia/cpu/xex_module.h"
// TODO(benvanik): based on compiler support
#ifdef XE_ARCH_AMD64
#include "xenia/cpu/backend/x64/x64_backend.h"
#elif XE_ARCH_ARM64
#include "xenia/cpu/backend/a64/a64_backend.h"
#endif // XE_ARCH
#if 0 && DEBUG
#define DEFAULT_DEBUG_FLAG true

View File

@ -162,7 +162,7 @@ class Processor {
// This will cancel any active step operations and resume all threads.
void Continue();
// Steps the given thread a single x64 host instruction.
// Steps the given thread a single host instruction.
// If the step is over a branch the branch will be followed.
void StepHostInstruction(uint32_t thread_id);

View File

@ -58,6 +58,12 @@ LPSYMFUNCTIONTABLEACCESS64 sym_function_table_access_64_ = nullptr;
LPSYMGETMODULEBASE64 sym_get_module_base_64_ = nullptr;
LPSYMGETSYMFROMADDR64 sym_get_sym_from_addr_64_ = nullptr;
#if XE_ARCH_AMD64
static const DWORD kMachineType = IMAGE_FILE_MACHINE_AMD64;
#elif XE_ARCH_ARM64
static const DWORD kMachineType = IMAGE_FILE_MACHINE_ARM64;
#endif
namespace xe {
namespace cpu {
@ -173,40 +179,70 @@ class Win32StackWalker : public StackWalker {
} else {
// Copy thread context local. We will be modifying it during stack
// walking, so we don't want to mess with the incoming copy.
#if XE_ARCH_AMD64
thread_context.Rip = in_host_context->rip;
thread_context.EFlags = in_host_context->eflags;
std::memcpy(&thread_context.Rax, in_host_context->int_registers,
sizeof(in_host_context->int_registers));
std::memcpy(&thread_context.Xmm0, in_host_context->xmm_registers,
sizeof(in_host_context->xmm_registers));
#elif XE_ARCH_ARM64
thread_context.Pc = in_host_context->pc;
thread_context.Cpsr = in_host_context->cpsr;
std::memcpy(thread_context.X, in_host_context->x,
sizeof(in_host_context->x));
std::memcpy(&thread_context.V, in_host_context->v,
sizeof(in_host_context->v));
#endif
}
if (out_host_context) {
// Write out the captured thread context if the caller asked for it.
#if XE_ARCH_AMD64
out_host_context->rip = thread_context.Rip;
out_host_context->eflags = thread_context.EFlags;
std::memcpy(out_host_context->int_registers, &thread_context.Rax,
sizeof(out_host_context->int_registers));
std::memcpy(out_host_context->xmm_registers, &thread_context.Xmm0,
sizeof(out_host_context->xmm_registers));
#elif XE_ARCH_ARM64
out_host_context->pc = thread_context.Pc;
out_host_context->cpsr = thread_context.Cpsr;
std::memcpy(out_host_context->x, &thread_context.X,
sizeof(out_host_context->x));
std::memcpy(out_host_context->v, &thread_context.V,
sizeof(out_host_context->v));
#endif
}
// Setup the frame for walking.
STACKFRAME64 stack_frame = {0};
stack_frame.AddrPC.Mode = AddrModeFlat;
#if XE_ARCH_AMD64
stack_frame.AddrPC.Offset = thread_context.Rip;
#elif XE_ARCH_ARM64
stack_frame.AddrPC.Offset = thread_context.Pc;
#endif
stack_frame.AddrFrame.Mode = AddrModeFlat;
#if XE_ARCH_AMD64
stack_frame.AddrFrame.Offset = thread_context.Rbp;
#elif XE_ARCH_ARM64
stack_frame.AddrFrame.Offset = thread_context.Fp;
#endif
stack_frame.AddrStack.Mode = AddrModeFlat;
#if XE_ARCH_AMD64
stack_frame.AddrStack.Offset = thread_context.Rsp;
#elif XE_ARCH_ARM64
stack_frame.AddrStack.Offset = thread_context.Sp;
#endif
// Walk the stack.
// Note that StackWalk64 is thread safe, though other dbghelp functions are
// not.
size_t frame_index = 0;
while (frame_index < frame_count &&
stack_walk_64_(IMAGE_FILE_MACHINE_AMD64, GetCurrentProcess(),
thread_handle, &stack_frame, &thread_context, nullptr,
stack_walk_64_(kMachineType, GetCurrentProcess(), thread_handle,
&stack_frame, &thread_context, nullptr,
XSymFunctionTableAccess64, XSymGetModuleBase64,
nullptr) == TRUE) {
if (frame_index >= frame_offset) {
@ -237,7 +273,7 @@ class Win32StackWalker : public StackWalker {
if (function) {
frame.guest_symbol.function = function;
// Figure out where in guest code we are by looking up the
// displacement in x64 from the JIT'ed code start to the PC.
// displacement in bytes from the JIT'ed code start to the PC.
if (function->is_guest()) {
auto guest_function = static_cast<GuestFunction*>(function);
// Adjust the host PC by -1 so that we will go back into whatever

View File

@ -19,6 +19,12 @@ test_suite("xenia-cpu-tests", project_root, ".", {
links = {
"xenia-cpu-backend-x64",
},
}
},
{
filter = 'architecture:ARM64',
links = {
"xenia-cpu-backend-a64",
},
},
},
})

View File

@ -13,7 +13,12 @@
#include <vector>
#include "xenia/base/platform.h"
#if XE_ARCH_AMD64
#include "xenia/cpu/backend/x64/x64_backend.h"
#elif XE_ARCH_ARM64
#include "xenia/cpu/backend/a64/a64_backend.h"
#endif // XE_ARCH
#include "xenia/cpu/hir/hir_builder.h"
#include "xenia/cpu/ppc/ppc_context.h"
#include "xenia/cpu/ppc/ppc_frontend.h"
@ -39,6 +44,8 @@ class TestFunction {
std::unique_ptr<xe::cpu::backend::Backend> backend;
#if XE_ARCH_AMD64
backend.reset(new xe::cpu::backend::x64::X64Backend());
#elif XE_ARCH_ARM64
backend.reset(new xe::cpu::backend::a64::A64Backend());
#endif // XE_ARCH
if (backend) {
auto processor = std::make_unique<Processor>(memory.get(), nullptr);
@ -74,7 +81,7 @@ class TestFunction {
uint32_t stack_address = memory_size - stack_size;
uint32_t thread_state_address = stack_address - 0x1000;
auto thread_state = std::make_unique<ThreadState>(processor.get(), 0x100);
assert_always(); // TODO: Allocate a thread stack!!!
// assert_always(); // TODO: Allocate a thread stack!!!
auto ctx = thread_state->context();
ctx->lr = 0xBCBCBCBC;

View File

@ -63,7 +63,13 @@ DebugWindow::DebugWindow(Emulator* emulator,
processor_(emulator->processor()),
app_context_(app_context),
window_(xe::ui::Window::Create(app_context_, kBaseTitle, 1500, 1000)) {
if (cs_open(CS_ARCH_X86, CS_MODE_64, &capstone_handle_) != CS_ERR_OK) {
if (
#ifdef XE_ARCH_AMD64
cs_open(CS_ARCH_X86, CS_MODE_64, &capstone_handle_)
#elif XE_ARCH_ARM64
cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &capstone_handle_)
#endif
!= CS_ERR_OK) {
assert_always("Failed to initialize capstone");
}
cs_option(capstone_handle_, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL);
@ -338,7 +344,7 @@ void DebugWindow::DrawSourcePane() {
// copy button
// address start - end
// name text box (editable)
// combo for interleaved + [ppc, hir, opt hir, x64 + byte with sizes]
// combo for interleaved + [ppc, hir, opt hir, asm + byte with sizes]
ImGui::AlignTextToFramePadding();
ImGui::Text("%s", function->module()->name().c_str());
ImGui::SameLine();
@ -383,11 +389,11 @@ void DebugWindow::DrawSourcePane() {
}
ImGui::SameLine();
if (state_.source_display_mode > 0) {
// Only show x64 step button if we have x64 visible.
// Only show asm step button if we have asm visible.
ImGui::Dummy(ImVec2(4, 0));
ImGui::SameLine();
ImGui::PushButtonRepeat(true);
if (ImGui::ButtonEx("Step x64", ImVec2(0, 0),
if (ImGui::ButtonEx("Step " XE_HOST_ARCH_NAME, ImVec2(0, 0),
can_step ? 0 : ImGuiItemFlags_Disabled)) {
// By enabling the button when stepping we allow repeat behavior.
if (processor_->execution_state() != cpu::ExecutionState::kStepping) {
@ -396,8 +402,8 @@ void DebugWindow::DrawSourcePane() {
}
ImGui::PopButtonRepeat();
if (ImGui::IsItemHovered()) {
ImGui::SetTooltip(
"Step one x64 instruction on the current thread (hold for many).");
ImGui::SetTooltip("Step one " XE_HOST_ARCH_NAME
" instruction on the current thread (hold for many).");
}
ImGui::SameLine();
}
@ -412,9 +418,9 @@ void DebugWindow::DrawSourcePane() {
if (function->is_guest()) {
const char* kSourceDisplayModes[] = {
"PPC",
"PPC+HIR+x64",
"PPC+HIR (opt)+x64",
"PPC+x64",
"PPC+HIR+" XE_HOST_ARCH_NAME,
"PPC+HIR (opt)+" XE_HOST_ARCH_NAME,
"PPC+" XE_HOST_ARCH_NAME,
};
ImGui::PushItemWidth(90);
ImGui::Combo("##display_mode", &state_.source_display_mode,
@ -459,7 +465,7 @@ void DebugWindow::DrawGuestFunctionSource() {
// labels get their own line with duped addresses
// show xrefs to labels?
// hir greyed and offset (background color change?)
// x64 greyed and offset with native address
// asm greyed and offset with native address
// hover on registers/etc for tooltip/highlight others
// click register to go to location of last write
// click code address to jump to code
@ -472,18 +478,18 @@ void DebugWindow::DrawGuestFunctionSource() {
bool draw_hir = false;
bool draw_hir_opt = false;
bool draw_x64 = false;
bool draw_asm = false;
switch (state_.source_display_mode) {
case 1:
draw_hir = true;
draw_x64 = true;
draw_asm = true;
break;
case 2:
draw_hir_opt = true;
draw_x64 = true;
draw_asm = true;
break;
case 3:
draw_x64 = true;
draw_asm = true;
break;
}
@ -498,8 +504,8 @@ void DebugWindow::DrawGuestFunctionSource() {
if (draw_hir_opt) {
// TODO(benvanik): get HIR and draw preamble.
}
if (draw_x64) {
// x64 preamble.
if (draw_asm) {
// asm preamble.
DrawMachineCodeSource(function->machine_code(), source_map[0].code_offset);
}
@ -512,7 +518,7 @@ void DebugWindow::DrawGuestFunctionSource() {
bool is_current_instr = address == guest_pc;
if (is_current_instr) {
ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.0f, 1.0f, 0.0f, 1.0f));
if (!draw_x64) {
if (!draw_asm) {
ScrollToSourceIfPcChanged();
}
}
@ -548,7 +554,7 @@ void DebugWindow::DrawGuestFunctionSource() {
if (draw_hir_opt) {
// TODO(benvanik): get HIR and draw for this PPC function.
}
if (draw_x64) {
if (draw_asm) {
const uint8_t* machine_code_start =
function->machine_code() + source_map[source_map_index].code_offset;
const size_t machine_code_length =
@ -851,10 +857,10 @@ void DebugWindow::DrawRegistersPane() {
if (state_.register_group == RegisterGroup::kHostGeneral) {
ImGui::PushStyleColor(ImGuiCol_Button,
ImGui::GetStyle().Colors[ImGuiCol_ButtonActive]);
ImGui::Button("x64");
ImGui::Button(XE_HOST_ARCH_NAME);
ImGui::PopStyleColor();
} else {
if (ImGui::Button("x64")) {
if (ImGui::Button(XE_HOST_ARCH_NAME)) {
state_.register_group = RegisterGroup::kHostGeneral;
}
}
@ -862,10 +868,10 @@ void DebugWindow::DrawRegistersPane() {
if (state_.register_group == RegisterGroup::kHostVector) {
ImGui::PushStyleColor(ImGuiCol_Button,
ImGui::GetStyle().Colors[ImGuiCol_ButtonActive]);
ImGui::Button("XMM");
ImGui::Button(XE_HOST_ARCH_NAME "-vec");
ImGui::PopStyleColor();
} else {
if (ImGui::Button("XMM")) {
if (ImGui::Button(XE_HOST_ARCH_NAME "-vec")) {
state_.register_group = RegisterGroup::kHostVector;
}
}
@ -958,6 +964,7 @@ void DebugWindow::DrawRegistersPane() {
} break;
case RegisterGroup::kHostGeneral: {
ImGui::BeginChild("##host_general");
#if XE_ARCH_AMD64
for (int i = 0; i < 18; ++i) {
auto reg = static_cast<X64Register>(i);
ImGui::BeginGroup();
@ -995,6 +1002,46 @@ void DebugWindow::DrawRegistersPane() {
i, thread_info->host_context.xmm_registers[i].f32);
ImGui::EndGroup();
}
#elif XE_ARCH_ARM64
// TODO(wunkolo): print ARM64 registers
for (int i = 0; i < 34; ++i) {
auto reg = static_cast<Arm64Register>(i);
ImGui::BeginGroup();
ImGui::AlignTextToFramePadding();
ImGui::Text("%3s", HostThreadContext::GetRegisterName(reg));
ImGui::SameLine();
ImGui::Dummy(ImVec2(4, 0));
ImGui::SameLine();
if (reg == Arm64Register::kPc) {
dirty_guest_context |=
DrawRegisterTextBox(i, &thread_info->host_context.pc);
} else if (reg == Arm64Register::kPstate) {
dirty_guest_context =
DrawRegisterTextBox(i, &thread_info->host_context.cpsr);
} else {
dirty_guest_context |=
DrawRegisterTextBox(i, &thread_info->host_context.x[i]);
}
ImGui::EndGroup();
}
ImGui::EndChild();
} break;
case RegisterGroup::kHostVector: {
ImGui::BeginChild("##host_vector");
for (int i = 0; i < 32; ++i) {
auto reg = static_cast<Arm64Register>(
static_cast<int>(Arm64Register::kV0) + i);
ImGui::BeginGroup();
ImGui::AlignTextToFramePadding();
ImGui::Text("%5s", HostThreadContext::GetRegisterName(reg));
ImGui::SameLine();
ImGui::Dummy(ImVec2(4, 0));
ImGui::SameLine();
dirty_host_context |=
DrawRegisterTextBoxes(i, thread_info->host_context.v[i].f32);
ImGui::EndGroup();
}
#endif
ImGui::EndChild();
}
}
@ -1144,7 +1191,8 @@ void DebugWindow::DrawBreakpointsPane() {
ImGui::OpenPopup("##add_code_breakpoint");
}
if (ImGui::IsItemHovered()) {
ImGui::SetTooltip("Add a code breakpoint for either PPC or x64.");
ImGui::SetTooltip(
"Add a code breakpoint for either PPC or " XE_HOST_ARCH_NAME ".");
}
// TODO(benvanik): remove this set focus workaround when imgui is fixed:
// https://github.com/ocornut/imgui/issues/343
@ -1178,15 +1226,15 @@ void DebugWindow::DrawBreakpointsPane() {
ImGui::Dummy(ImVec2(0, 2));
ImGui::AlignTextToFramePadding();
ImGui::Text("x64");
ImGui::Text(XE_HOST_ARCH_NAME);
ImGui::SameLine();
ImGui::Dummy(ImVec2(2, 0));
ImGui::SameLine();
static char x64_buffer[64] = {0};
static char asm_buffer[64] = {0};
ImGui::PushItemWidth(100);
if (ImGui::InputText("##host_address", x64_buffer, 17, input_flags)) {
uint64_t address = string_util::from_string<uint64_t>(x64_buffer, true);
x64_buffer[0] = 0;
if (ImGui::InputText("##host_address", asm_buffer, 17, input_flags)) {
uint64_t address = string_util::from_string<uint64_t>(asm_buffer, true);
asm_buffer[0] = 0;
CreateCodeBreakpoint(Breakpoint::AddressType::kHost, address);
ImGui::CloseCurrentPopup();
}

View File

@ -53,6 +53,8 @@
#if XE_ARCH_AMD64
#include "xenia/cpu/backend/x64/x64_backend.h"
#elif XE_ARCH_ARM64
#include "xenia/cpu/backend/a64/a64_backend.h"
#endif // XE_ARCH
DECLARE_int32(user_language);
@ -172,11 +174,18 @@ X_STATUS Emulator::Setup(
if (cvars::cpu == "x64") {
backend.reset(new xe::cpu::backend::x64::X64Backend());
}
#elif XE_ARCH_ARM64
if (cvars::cpu == "a64") {
backend.reset(new xe::cpu::backend::a64::A64Backend());
}
#endif // XE_ARCH
if (cvars::cpu == "any") {
if (!backend) {
#if XE_ARCH_AMD64
backend.reset(new xe::cpu::backend::x64::X64Backend());
#elif XE_ARCH_ARM64
// TODO(wunkolo): Arm64 backend
backend.reset(new xe::cpu::backend::a64::A64Backend());
#endif // XE_ARCH
}
}

View File

@ -70,6 +70,11 @@ project("xenia-gpu-d3d12-trace-viewer")
"xenia-cpu-backend-x64",
})
filter("architecture:ARM64")
links({
"xenia-cpu-backend-a64",
})
group("src")
project("xenia-gpu-d3d12-trace-dump")
uuid("686b859c-0046-44c4-a02c-41fc3fb75698")
@ -120,3 +125,8 @@ project("xenia-gpu-d3d12-trace-dump")
links({
"xenia-cpu-backend-x64",
})
filter("architecture:ARM64")
links({
"xenia-cpu-backend-a64",
})

View File

@ -43,7 +43,7 @@ project("xenia-gpu-shader-compiler")
"../base/console_app_main_"..platform_suffix..".cc",
})
filter("platforms:Windows")
filter("platforms:Windows-*")
-- Only create the .user file if it doesn't already exist.
local user_file = project_root.."/build/xenia-gpu-shader-compiler.vcxproj.user"
if not os.isfile(user_file) then

View File

@ -68,6 +68,11 @@ project("xenia-gpu-vulkan-trace-viewer")
"xenia-cpu-backend-x64",
})
filter("architecture:ARM64")
links({
"xenia-cpu-backend-a64",
})
filter("platforms:Linux")
links({
"X11",
@ -75,7 +80,7 @@ project("xenia-gpu-vulkan-trace-viewer")
"X11-xcb",
})
filter("platforms:Windows")
filter("platforms:Windows-*")
-- Only create the .user file if it doesn't already exist.
local user_file = project_root.."/build/xenia-gpu-vulkan-trace-viewer.vcxproj.user"
if not os.isfile(user_file) then
@ -131,6 +136,11 @@ project("xenia-gpu-vulkan-trace-dump")
"xenia-cpu-backend-x64",
})
filter("architecture:ARM64")
links({
"xenia-cpu-backend-a64",
})
filter("platforms:Linux")
links({
"X11",
@ -138,7 +148,7 @@ project("xenia-gpu-vulkan-trace-dump")
"X11-xcb",
})
filter("platforms:Windows")
filter("platforms:Windows-*")
-- Only create the .user file if it doesn't already exist.
local user_file = project_root.."/build/xenia-gpu-vulkan-trace-dump.vcxproj.user"
if not os.isfile(user_file) then

View File

@ -53,7 +53,7 @@ project("xenia-hid-demo")
"X11-xcb",
})
filter("platforms:Windows")
filter("platforms:Windows-*")
links({
"xenia-hid-winkey",
"xenia-hid-xinput",

View File

@ -19,7 +19,7 @@ project("xenia-ui")
-- Exports JNI functions.
wholelib("On")
filter("platforms:Windows")
filter("platforms:Windows-*")
links({
"dwmapi",
"dxgi",

View File

@ -26,7 +26,7 @@ end
-- Call this function in project scope to include the SDL2 headers.
--
function sdl2_include()
filter("platforms:Windows")
filter("platforms:Windows-*")
includedirs({
path.getrelative(".", third_party_path) .. "/SDL2/include",
})

View File

@ -4,13 +4,37 @@ project("capstone")
kind("StaticLib")
language("C")
defines({
"CAPSTONE_X86_ATT_DISABLE",
"CAPSTONE_DIET_NO",
"CAPSTONE_X86_REDUCE_NO",
"CAPSTONE_HAS_X86",
"CAPSTONE_USE_SYS_DYN_MEM",
"_LIB",
})
filter("architecture:x86_64")
defines({
"CAPSTONE_HAS_X86",
"CAPSTONE_X86_ATT_DISABLE",
"CAPSTONE_X86_REDUCE_NO",
})
files({
"capstone/arch/X86/*.c",
"capstone/arch/X86/*.h",
"capstone/arch/X86/*.inc",
})
force_compile_as_c({
"capstone/arch/X86/**.c",
})
filter("architecture:ARM64")
defines({
"CAPSTONE_HAS_ARM64",
})
files({
"capstone/arch/AArch64/*.c",
"capstone/arch/AArch64/*.h",
"capstone/arch/AArch64/*.inc",
})
force_compile_as_c({
"capstone/arch/AArch64/**.c",
})
filter({})
includedirs({
"capstone",
"capstone/include",
@ -32,12 +56,7 @@ project("capstone")
"capstone/SStream.h",
"capstone/utils.c",
"capstone/utils.h",
"capstone/arch/X86/*.c",
"capstone/arch/X86/*.h",
"capstone/arch/X86/*.inc",
})
force_compile_as_c({
"capstone/**.c",
"capstone/arch/X86/**.c",
})
"capstone/**.c",
})

View File

@ -30,7 +30,7 @@ project("discord-rpc")
files({
"discord-rpc/src/discord_register_osx.m"
})
filter("platforms:Windows")
filter("platforms:Windows-*")
files({
"discord-rpc/src/connection_win.cpp",
"discord-rpc/src/discord_register_win.cpp"

View File

@ -3252,7 +3252,7 @@ void MicroProfileDraw(uint32_t nWidth, uint32_t nHeight)
#if MICROPROFILE_CONTEXT_SWITCH_TRACE
MicroProfileStringArrayAddLiteral(&Debug, "Context Switch");
MicroProfileStringArrayFormat(&Debug, "%9d [%7d]", S.nContextSwitchUsage, MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE / S.nContextSwitchUsage );
MicroProfileStringArrayFormat(&Debug, "%9d [%7d]", S.nContextSwitchUsage, S.nContextSwitchUsage ? MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE / S.nContextSwitchUsage : 0 );
#endif
for(int i = 0; i < MICROPROFILE_MAX_THREADS; ++i)

View File

@ -28,7 +28,7 @@ project("mspack")
"mspack/system.h",
})
filter("platforms:Windows")
filter("platforms:Windows-*")
defines({
})
filter("platforms:Linux")

1
third_party/oaknut vendored Submodule

@ -0,0 +1 @@
Subproject commit 94c726ce0338b054eb8cb5ea91de8fe6c19f4392

View File

@ -18,5 +18,5 @@ project("snappy")
"snappy/snappy.h",
})
filter("platforms:Windows")
filter("platforms:Windows-*")
warnings("Off") -- Too many warnings.

View File

@ -20,7 +20,7 @@ local function match_platform_files(base_path, base_match)
removefiles({base_path.."/".."**_android.h", base_path.."/".."**_android.cc"})
removefiles({base_path.."/".."**_mac.h", base_path.."/".."**_mac.cc"})
removefiles({base_path.."/".."**_win.h", base_path.."/".."**_win.cc"})
filter("platforms:Windows")
filter("platforms:Windows-*")
files({
base_path.."/"..base_match.."_win.h",
base_path.."/"..base_match.."_win.cc",

View File

@ -781,6 +781,8 @@ class BaseBuildCommand(Command):
self.parser.add_argument(
'--target', action='append', default=[],
help='Builds only the given target(s).')
self.parser.add_argument(
'--arch', default='x86_64', help='Builds only the given architecture')
self.parser.add_argument(
'--force', action='store_true',
help='Forces a full rebuild.')
@ -823,6 +825,7 @@ class BaseBuildCommand(Command):
'/m',
'/v:m',
'/p:Configuration=' + args['config'],
'/p:Platform=' + "Windows-" + args['arch'],
] + ([targets] if targets is not None else []) + pass_args,
shell=False)
elif sys.platform == 'darwin':