Merge a8b9cd8e65
into 3d30b2eec3
This commit is contained in:
commit
4a008c624e
|
@ -85,3 +85,6 @@
|
|||
[submodule "third_party/VulkanMemoryAllocator"]
|
||||
path = third_party/VulkanMemoryAllocator
|
||||
url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator.git
|
||||
[submodule "third_party/oaknut"]
|
||||
path = third_party/oaknut
|
||||
url = https://github.com/merryhime/oaknut.git
|
||||
|
|
27
premake5.lua
27
premake5.lua
|
@ -54,7 +54,7 @@ filter("configurations:Checked")
|
|||
defines({
|
||||
"DEBUG",
|
||||
})
|
||||
filter({"configurations:Checked", "platforms:Windows"})
|
||||
filter({"configurations:Checked", "platforms:Windows-*"})
|
||||
buildoptions({
|
||||
"/RTCsu", -- Full Run-Time Checks.
|
||||
})
|
||||
|
@ -153,7 +153,7 @@ filter("platforms:Android-*")
|
|||
"log",
|
||||
})
|
||||
|
||||
filter("platforms:Windows")
|
||||
filter("platforms:Windows-*")
|
||||
system("windows")
|
||||
toolset("msc")
|
||||
buildoptions({
|
||||
|
@ -179,8 +179,12 @@ filter("platforms:Windows")
|
|||
"_CRT_SECURE_NO_WARNINGS",
|
||||
"WIN32",
|
||||
"_WIN64=1",
|
||||
"_AMD64=1",
|
||||
})
|
||||
filter("architecture:x86_64")
|
||||
defines({
|
||||
"_AMD64=1",
|
||||
})
|
||||
filter({})
|
||||
linkoptions({
|
||||
"/ignore:4006", -- Ignores complaints about empty obj files.
|
||||
"/ignore:4221",
|
||||
|
@ -198,7 +202,7 @@ filter("platforms:Windows")
|
|||
})
|
||||
|
||||
-- Embed the manifest for things like dependencies and DPI awareness.
|
||||
filter({"platforms:Windows", "kind:ConsoleApp or WindowedApp"})
|
||||
filter({"platforms:Windows-*", "kind:ConsoleApp or WindowedApp"})
|
||||
files({
|
||||
"src/xenia/base/app_win32.manifest"
|
||||
})
|
||||
|
@ -228,7 +232,12 @@ workspace("xenia")
|
|||
["ARCHS"] = "x86_64"
|
||||
})
|
||||
elseif os.istarget("windows") then
|
||||
platforms({"Windows"})
|
||||
platforms({"Windows-ARM64", "Windows-x86_64"})
|
||||
filter("platforms:Windows-ARM64")
|
||||
architecture("ARM64")
|
||||
filter("platforms:Windows-x86_64")
|
||||
architecture("x86_64")
|
||||
filter({})
|
||||
-- 10.0.15063.0: ID3D12GraphicsCommandList1::SetSamplePositions.
|
||||
-- 10.0.19041.0: D3D12_HEAP_FLAG_CREATE_NOT_ZEROED.
|
||||
-- 10.0.22000.0: DWMWA_WINDOW_CORNER_PREFERENCE.
|
||||
|
@ -284,7 +293,13 @@ workspace("xenia")
|
|||
include("src/xenia/apu/nop")
|
||||
include("src/xenia/base")
|
||||
include("src/xenia/cpu")
|
||||
include("src/xenia/cpu/backend/x64")
|
||||
|
||||
filter("architecture:x86_64")
|
||||
include("src/xenia/cpu/backend/x64")
|
||||
filter("architecture:ARM64")
|
||||
include("src/xenia/cpu/backend/a64")
|
||||
filter({})
|
||||
|
||||
include("src/xenia/debug/ui")
|
||||
include("src/xenia/gpu")
|
||||
include("src/xenia/gpu/null")
|
||||
|
|
|
@ -32,6 +32,7 @@ project("xenia-app")
|
|||
"libavcodec",
|
||||
"libavutil",
|
||||
"mspack",
|
||||
"SDL2",
|
||||
"snappy",
|
||||
"xxhash",
|
||||
})
|
||||
|
@ -72,13 +73,18 @@ project("xenia-app")
|
|||
"xenia-cpu-backend-x64",
|
||||
})
|
||||
|
||||
filter("architecture:ARM64")
|
||||
links({
|
||||
"xenia-cpu-backend-a64",
|
||||
})
|
||||
|
||||
-- TODO(Triang3l): The emulator itself on Android.
|
||||
filter("platforms:not Android-*")
|
||||
files({
|
||||
"xenia_main.cc",
|
||||
})
|
||||
|
||||
filter("platforms:Windows")
|
||||
filter("platforms:Windows-*")
|
||||
files({
|
||||
"main_resources.rc",
|
||||
})
|
||||
|
@ -104,7 +110,7 @@ project("xenia-app")
|
|||
"SDL2",
|
||||
})
|
||||
|
||||
filter("platforms:Windows")
|
||||
filter("platforms:Windows-*")
|
||||
links({
|
||||
"xenia-apu-xaudio2",
|
||||
"xenia-gpu-d3d12",
|
||||
|
@ -113,13 +119,13 @@ project("xenia-app")
|
|||
"xenia-ui-d3d12",
|
||||
})
|
||||
|
||||
filter({"platforms:Windows", SINGLE_LIBRARY_FILTER})
|
||||
filter({"platforms:Windows-*", SINGLE_LIBRARY_FILTER})
|
||||
links({
|
||||
"xenia-gpu-d3d12-trace-viewer",
|
||||
"xenia-ui-window-d3d12-demo",
|
||||
})
|
||||
|
||||
filter("platforms:Windows")
|
||||
filter("platforms:Windows-*")
|
||||
-- Only create the .user file if it doesn't already exist.
|
||||
local user_file = project_root.."/build/xenia-app.vcxproj.user"
|
||||
if not os.isfile(user_file) then
|
||||
|
|
|
@ -21,8 +21,9 @@ DEFINE_bool(clock_no_scaling, false,
|
|||
"Guest system time is directly pulled from host.",
|
||||
"CPU");
|
||||
DEFINE_bool(clock_source_raw, false,
|
||||
"Use the RDTSC instruction as the time source. "
|
||||
"Host CPU must support invariant TSC.",
|
||||
"On x64, Use the RDTSC instruction as the time source. Requires "
|
||||
"invariant TSC. "
|
||||
"On a64, Use the CNTVCT_EL0 register as the time source",
|
||||
"CPU");
|
||||
|
||||
namespace xe {
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
|
||||
#if XE_ARCH_AMD64
|
||||
#define XE_CLOCK_RAW_AVAILABLE 1
|
||||
#elif XE_ARCH_ARM64
|
||||
#define XE_CLOCK_RAW_AVAILABLE 1
|
||||
#endif
|
||||
|
||||
DECLARE_bool(clock_no_scaling);
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/base/clock.h"
|
||||
#include "xenia/base/platform.h"
|
||||
|
||||
#if XE_ARCH_ARM64 && XE_CLOCK_RAW_AVAILABLE
|
||||
|
||||
#include "xenia/base/logging.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <arm64_neon.h>
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
// Wrap all these different cpu compiler intrinsics.
|
||||
#if XE_COMPILER_MSVC
|
||||
constexpr int32_t CNTFRQ_EL0 = ARM64_SYSREG(3, 3, 14, 0, 0);
|
||||
constexpr int32_t CNTVCT_EL0 = ARM64_SYSREG(3, 3, 14, 0, 2);
|
||||
#define xe_cpu_mrs(reg) _ReadStatusReg(reg)
|
||||
#elif XE_COMPILER_CLANG || XE_COMPILER_GNUC
|
||||
constexpr int32_t CNTFRQ_EL0 = 0b11'011'1110'0000'000;
|
||||
constexpr int32_t CNTVCT_EL0 = 0b11'011'1110'0000'010;
|
||||
|
||||
uint64_t xe_cpu_mrs(uint32_t reg) {
|
||||
uint64_t result;
|
||||
__asm__ volatile("mrs \t%0," #reg : "=r"(result));
|
||||
return result;
|
||||
}
|
||||
#else
|
||||
#error \
|
||||
"No cpu instruction wrappers xe_cpu_mrs(CNTVCT_EL0); for current compiler implemented."
|
||||
#endif
|
||||
|
||||
namespace xe {
|
||||
|
||||
uint64_t Clock::host_tick_frequency_raw() { return xe_cpu_mrs(CNTFRQ_EL0); }
|
||||
uint64_t Clock::host_tick_count_raw() { return xe_cpu_mrs(CNTVCT_EL0); }
|
||||
|
||||
} // namespace xe
|
||||
|
||||
#endif
|
|
@ -36,12 +36,22 @@ LONG CALLBACK ExceptionHandlerCallback(PEXCEPTION_POINTERS ex_info) {
|
|||
}
|
||||
|
||||
HostThreadContext thread_context;
|
||||
|
||||
#if XE_ARCH_AMD64
|
||||
thread_context.rip = ex_info->ContextRecord->Rip;
|
||||
thread_context.eflags = ex_info->ContextRecord->EFlags;
|
||||
std::memcpy(thread_context.int_registers, &ex_info->ContextRecord->Rax,
|
||||
sizeof(thread_context.int_registers));
|
||||
std::memcpy(thread_context.xmm_registers, &ex_info->ContextRecord->Xmm0,
|
||||
sizeof(thread_context.xmm_registers));
|
||||
#elif XE_ARCH_ARM64
|
||||
thread_context.pc = ex_info->ContextRecord->Pc;
|
||||
thread_context.cpsr = ex_info->ContextRecord->Cpsr;
|
||||
std::memcpy(thread_context.x, &ex_info->ContextRecord->X,
|
||||
sizeof(thread_context.x));
|
||||
std::memcpy(thread_context.v, &ex_info->ContextRecord->V,
|
||||
sizeof(thread_context.v));
|
||||
#endif
|
||||
|
||||
// https://msdn.microsoft.com/en-us/library/ms679331(v=vs.85).aspx
|
||||
// https://msdn.microsoft.com/en-us/library/aa363082(v=vs.85).aspx
|
||||
|
@ -78,6 +88,7 @@ LONG CALLBACK ExceptionHandlerCallback(PEXCEPTION_POINTERS ex_info) {
|
|||
for (size_t i = 0; i < xe::countof(handlers_) && handlers_[i].first; ++i) {
|
||||
if (handlers_[i].first(&ex, handlers_[i].second)) {
|
||||
// Exception handled.
|
||||
#if XE_ARCH_AMD64
|
||||
ex_info->ContextRecord->Rip = thread_context.rip;
|
||||
ex_info->ContextRecord->EFlags = thread_context.eflags;
|
||||
uint32_t modified_register_index;
|
||||
|
@ -98,6 +109,28 @@ LONG CALLBACK ExceptionHandlerCallback(PEXCEPTION_POINTERS ex_info) {
|
|||
&thread_context.xmm_registers[modified_register_index],
|
||||
sizeof(vec128_t));
|
||||
}
|
||||
#elif XE_ARCH_ARM64
|
||||
ex_info->ContextRecord->Pc = thread_context.pc;
|
||||
ex_info->ContextRecord->Cpsr = thread_context.cpsr;
|
||||
uint32_t modified_register_index;
|
||||
uint16_t modified_int_registers_remaining = ex.modified_x_registers();
|
||||
while (xe::bit_scan_forward(modified_int_registers_remaining,
|
||||
&modified_register_index)) {
|
||||
modified_int_registers_remaining &=
|
||||
~(UINT16_C(1) << modified_register_index);
|
||||
ex_info->ContextRecord->X[modified_register_index] =
|
||||
thread_context.x[modified_register_index];
|
||||
}
|
||||
uint16_t modified_xmm_registers_remaining = ex.modified_v_registers();
|
||||
while (xe::bit_scan_forward(modified_xmm_registers_remaining,
|
||||
&modified_register_index)) {
|
||||
modified_xmm_registers_remaining &=
|
||||
~(UINT16_C(1) << modified_register_index);
|
||||
std::memcpy(&ex_info->ContextRecord->V + modified_register_index,
|
||||
&thread_context.v[modified_register_index],
|
||||
sizeof(vec128_t));
|
||||
}
|
||||
#endif
|
||||
return EXCEPTION_CONTINUE_EXECUTION;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -67,7 +67,7 @@ std::string HostThreadContext::GetStringFromValue(HostRegister reg,
|
|||
case Arm64Register::kPc:
|
||||
return hex ? string_util::to_hex_string(pc) : std::to_string(pc);
|
||||
case Arm64Register::kPstate:
|
||||
return hex ? string_util::to_hex_string(pstate) : std::to_string(pstate);
|
||||
return hex ? string_util::to_hex_string(cpsr) : std::to_string(cpsr);
|
||||
case Arm64Register::kFpsr:
|
||||
return hex ? string_util::to_hex_string(fpsr) : std::to_string(fpsr);
|
||||
case Arm64Register::kFpcr:
|
||||
|
|
|
@ -202,7 +202,7 @@ class HostThreadContext {
|
|||
uint64_t x[31];
|
||||
uint64_t sp;
|
||||
uint64_t pc;
|
||||
uint64_t pstate;
|
||||
uint32_t cpsr;
|
||||
uint32_t fpsr;
|
||||
uint32_t fpcr;
|
||||
vec128_t v[32];
|
||||
|
|
|
@ -11,6 +11,8 @@
|
|||
|
||||
#include <cstdlib>
|
||||
|
||||
#if XE_ARCH_AMD64
|
||||
|
||||
// Includes Windows headers, so it goes after platform_win.h.
|
||||
#include "third_party/xbyak/xbyak/xbyak_util.h"
|
||||
|
||||
|
@ -39,3 +41,5 @@ class StartupAvxCheck {
|
|||
#pragma warning(suppress : 4073)
|
||||
#pragma init_seg(lib)
|
||||
static StartupAvxCheck gStartupAvxCheck;
|
||||
|
||||
#endif
|
|
@ -31,6 +31,8 @@
|
|||
|
||||
#if XE_ARCH_AMD64
|
||||
#include <xmmintrin.h>
|
||||
#elif XE_ARCH_ARM64
|
||||
#include <arm64_neon.h>
|
||||
#endif
|
||||
|
||||
namespace xe {
|
||||
|
@ -135,10 +137,17 @@ constexpr inline uint32_t bit_count(T v) {
|
|||
}
|
||||
#else
|
||||
#if XE_COMPILER_MSVC || XE_COMPILER_INTEL
|
||||
#if XE_ARCH_AMD64
|
||||
inline uint32_t bit_count(uint32_t v) { return __popcnt(v); }
|
||||
inline uint32_t bit_count(uint64_t v) {
|
||||
return static_cast<uint32_t>(__popcnt64(v));
|
||||
}
|
||||
#elif XE_ARCH_ARM64
|
||||
inline uint32_t bit_count(uint32_t v) { return _CountOneBits(v); }
|
||||
inline uint32_t bit_count(uint64_t v) {
|
||||
return static_cast<uint32_t>(_CountOneBits64(v));
|
||||
}
|
||||
#endif
|
||||
#elif XE_COMPILER_GCC || XE_COMPILER_CLANG
|
||||
static_assert(sizeof(unsigned int) == sizeof(uint32_t));
|
||||
static_assert(sizeof(unsigned long long) == sizeof(uint64_t));
|
||||
|
@ -372,6 +381,24 @@ template <int N>
|
|||
int64_t m128_i64(const __m128& v) {
|
||||
return m128_i64<N>(_mm_castps_pd(v));
|
||||
}
|
||||
#elif XE_ARCH_ARM64
|
||||
// Utilities for NEON values.
|
||||
template <int N>
|
||||
float m128_f32(const float32x4_t& v) {
|
||||
return vgetq_lane_f32(v, N);
|
||||
}
|
||||
template <int N>
|
||||
int32_t m128_i32(const int32x4_t& v) {
|
||||
return vgetq_lane_s32(v, N);
|
||||
}
|
||||
template <int N>
|
||||
double m128_f64(const float64x2_t& v) {
|
||||
return vgetq_lane_f64(v, N);
|
||||
}
|
||||
template <int N>
|
||||
int64_t m128_i64(const int64x2_t& v) {
|
||||
return vgetq_lane_s64(v, N);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Similar to the C++ implementation of XMConvertFloatToHalf and
|
||||
|
|
|
@ -66,6 +66,14 @@
|
|||
#define XE_ARCH_PPC 1
|
||||
#endif
|
||||
|
||||
#ifdef XE_ARCH_AMD64
|
||||
#define XE_HOST_ARCH_NAME "x64"
|
||||
#elif XE_ARCH_ARM64
|
||||
#define XE_HOST_ARCH_NAME "a64"
|
||||
#elif XE_ARCH_PPC
|
||||
#define XE_HOST_ARCH_NAME "ppc"
|
||||
#endif
|
||||
|
||||
#if XE_PLATFORM_WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#define NOMINMAX // Don't want windows.h including min/max macros.
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/cpu/backend/a64/a64_assembler.h"
|
||||
|
||||
#include <climits>
|
||||
|
||||
#include "third_party/capstone/include/capstone/arm64.h"
|
||||
#include "third_party/capstone/include/capstone/capstone.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/base/reset_scope.h"
|
||||
#include "xenia/base/string.h"
|
||||
#include "xenia/cpu/backend/a64/a64_backend.h"
|
||||
#include "xenia/cpu/backend/a64/a64_code_cache.h"
|
||||
#include "xenia/cpu/backend/a64/a64_emitter.h"
|
||||
#include "xenia/cpu/backend/a64/a64_function.h"
|
||||
#include "xenia/cpu/cpu_flags.h"
|
||||
#include "xenia/cpu/hir/hir_builder.h"
|
||||
#include "xenia/cpu/hir/label.h"
|
||||
#include "xenia/cpu/processor.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {
|
||||
|
||||
using xe::cpu::hir::HIRBuilder;
|
||||
|
||||
A64Assembler::A64Assembler(A64Backend* backend)
|
||||
: Assembler(backend), a64_backend_(backend), capstone_handle_(0) {
|
||||
if (cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &capstone_handle_) !=
|
||||
CS_ERR_OK) {
|
||||
assert_always("Failed to initialize capstone");
|
||||
}
|
||||
cs_option(capstone_handle_, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL);
|
||||
cs_option(capstone_handle_, CS_OPT_DETAIL, CS_OPT_OFF);
|
||||
}
|
||||
|
||||
A64Assembler::~A64Assembler() {
|
||||
// Emitter must be freed before the allocator.
|
||||
emitter_.reset();
|
||||
|
||||
if (capstone_handle_) {
|
||||
cs_close(&capstone_handle_);
|
||||
}
|
||||
}
|
||||
|
||||
bool A64Assembler::Initialize() {
|
||||
if (!Assembler::Initialize()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
emitter_.reset(new A64Emitter(a64_backend_));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void A64Assembler::Reset() {
|
||||
string_buffer_.Reset();
|
||||
Assembler::Reset();
|
||||
}
|
||||
|
||||
bool A64Assembler::Assemble(GuestFunction* function, HIRBuilder* builder,
|
||||
uint32_t debug_info_flags,
|
||||
std::unique_ptr<FunctionDebugInfo> debug_info) {
|
||||
SCOPE_profile_cpu_f("cpu");
|
||||
|
||||
// Reset when we leave.
|
||||
xe::make_reset_scope(this);
|
||||
|
||||
// Lower HIR -> a64.
|
||||
void* machine_code = nullptr;
|
||||
size_t code_size = 0;
|
||||
if (!emitter_->Emit(function, builder, debug_info_flags, debug_info.get(),
|
||||
&machine_code, &code_size, &function->source_map())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Stash generated machine code.
|
||||
if (debug_info_flags & DebugInfoFlags::kDebugInfoDisasmMachineCode) {
|
||||
DumpMachineCode(machine_code, code_size, function->source_map(),
|
||||
&string_buffer_);
|
||||
debug_info->set_machine_code_disasm(xe_strdup(string_buffer_.buffer()));
|
||||
string_buffer_.Reset();
|
||||
}
|
||||
|
||||
function->set_debug_info(std::move(debug_info));
|
||||
static_cast<A64Function*>(function)->Setup(
|
||||
reinterpret_cast<uint8_t*>(machine_code), code_size);
|
||||
|
||||
// Install into indirection table.
|
||||
const uint64_t host_address = reinterpret_cast<uint64_t>(machine_code);
|
||||
assert_true((host_address >> 32) == 0);
|
||||
reinterpret_cast<A64CodeCache*>(backend_->code_cache())
|
||||
->AddIndirection(function->address(),
|
||||
static_cast<uint32_t>(host_address));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void A64Assembler::DumpMachineCode(
|
||||
void* machine_code, size_t code_size,
|
||||
const std::vector<SourceMapEntry>& source_map, StringBuffer* str) {
|
||||
if (source_map.empty()) {
|
||||
return;
|
||||
}
|
||||
auto source_map_index = 0;
|
||||
uint32_t next_code_offset = source_map[0].code_offset;
|
||||
|
||||
const uint8_t* code_ptr = reinterpret_cast<uint8_t*>(machine_code);
|
||||
size_t remaining_code_size = code_size;
|
||||
uint64_t address = uint64_t(machine_code);
|
||||
cs_insn insn = {0};
|
||||
while (remaining_code_size &&
|
||||
cs_disasm_iter(capstone_handle_, &code_ptr, &remaining_code_size,
|
||||
&address, &insn)) {
|
||||
// Look up source offset.
|
||||
auto code_offset =
|
||||
uint32_t(code_ptr - reinterpret_cast<uint8_t*>(machine_code));
|
||||
if (code_offset >= next_code_offset &&
|
||||
source_map_index < source_map.size()) {
|
||||
auto& source_map_entry = source_map[source_map_index];
|
||||
str->AppendFormat("{:08X} ", source_map_entry.guest_address);
|
||||
++source_map_index;
|
||||
next_code_offset = source_map_index < source_map.size()
|
||||
? source_map[source_map_index].code_offset
|
||||
: UINT_MAX;
|
||||
} else {
|
||||
str->Append(" ");
|
||||
}
|
||||
|
||||
str->AppendFormat("{:08X} {:<6} {}\n", uint32_t(insn.address),
|
||||
insn.mnemonic, insn.op_str);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,59 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_CPU_BACKEND_A64_A64_ASSEMBLER_H_
|
||||
#define XENIA_CPU_BACKEND_A64_A64_ASSEMBLER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/string_buffer.h"
|
||||
#include "xenia/cpu/backend/assembler.h"
|
||||
#include "xenia/cpu/function.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {
|
||||
|
||||
class A64Backend;
|
||||
class A64Emitter;
|
||||
|
||||
class A64Assembler : public Assembler {
|
||||
public:
|
||||
explicit A64Assembler(A64Backend* backend);
|
||||
~A64Assembler() override;
|
||||
|
||||
bool Initialize() override;
|
||||
|
||||
void Reset() override;
|
||||
|
||||
bool Assemble(GuestFunction* function, hir::HIRBuilder* builder,
|
||||
uint32_t debug_info_flags,
|
||||
std::unique_ptr<FunctionDebugInfo> debug_info) override;
|
||||
|
||||
private:
|
||||
void DumpMachineCode(void* machine_code, size_t code_size,
|
||||
const std::vector<SourceMapEntry>& source_map,
|
||||
StringBuffer* str);
|
||||
|
||||
private:
|
||||
A64Backend* a64_backend_;
|
||||
std::unique_ptr<A64Emitter> emitter_;
|
||||
uintptr_t capstone_handle_;
|
||||
|
||||
StringBuffer string_buffer_;
|
||||
};
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_CPU_BACKEND_A64_A64_ASSEMBLER_H_
|
|
@ -0,0 +1,735 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/cpu/backend/a64/a64_backend.h"
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "third_party/capstone/include/capstone/arm64.h"
|
||||
#include "third_party/capstone/include/capstone/capstone.h"
|
||||
|
||||
#include "xenia/base/exception_handler.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/cpu/backend/a64/a64_assembler.h"
|
||||
#include "xenia/cpu/backend/a64/a64_code_cache.h"
|
||||
#include "xenia/cpu/backend/a64/a64_emitter.h"
|
||||
#include "xenia/cpu/backend/a64/a64_function.h"
|
||||
#include "xenia/cpu/backend/a64/a64_sequences.h"
|
||||
#include "xenia/cpu/backend/a64/a64_stack_layout.h"
|
||||
#include "xenia/cpu/breakpoint.h"
|
||||
#include "xenia/cpu/processor.h"
|
||||
#include "xenia/cpu/stack_walker.h"
|
||||
|
||||
DEFINE_int32(a64_extension_mask, -1,
|
||||
"Allow the detection and utilization of specific instruction set "
|
||||
"features.\n"
|
||||
" 0 = armv8.0\n"
|
||||
" 1 = LSE\n"
|
||||
" 2 = F16C\n"
|
||||
" -1 = Detect and utilize all possible processor features\n",
|
||||
"a64");
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {
|
||||
|
||||
using namespace oaknut::util;
|
||||
|
||||
class A64ThunkEmitter : public A64Emitter {
|
||||
public:
|
||||
A64ThunkEmitter(A64Backend* backend);
|
||||
~A64ThunkEmitter() override;
|
||||
HostToGuestThunk EmitHostToGuestThunk();
|
||||
GuestToHostThunk EmitGuestToHostThunk();
|
||||
ResolveFunctionThunk EmitResolveFunctionThunk();
|
||||
|
||||
private:
|
||||
// The following four functions provide save/load functionality for registers.
|
||||
// They assume at least StackLayout::THUNK_STACK_SIZE bytes have been
|
||||
// allocated on the stack.
|
||||
|
||||
// Caller saved:
|
||||
// Dont assume these registers will survive a subroutine call
|
||||
// x0, v0 is not saved for use as arg0/return
|
||||
// x1-x15, x30 | v0-v7 and v16-v31
|
||||
void EmitSaveVolatileRegs();
|
||||
void EmitLoadVolatileRegs();
|
||||
|
||||
// Callee saved:
|
||||
// Subroutines must preserve these registers if they intend to use them
|
||||
// x19-x30 | d8-d15
|
||||
void EmitSaveNonvolatileRegs();
|
||||
void EmitLoadNonvolatileRegs();
|
||||
};
|
||||
|
||||
A64Backend::A64Backend() : Backend(), code_cache_(nullptr) {
|
||||
if (cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &capstone_handle_) !=
|
||||
CS_ERR_OK) {
|
||||
assert_always("Failed to initialize capstone");
|
||||
}
|
||||
cs_option(capstone_handle_, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL);
|
||||
cs_option(capstone_handle_, CS_OPT_DETAIL, CS_OPT_ON);
|
||||
cs_option(capstone_handle_, CS_OPT_SKIPDATA, CS_OPT_OFF);
|
||||
}
|
||||
|
||||
A64Backend::~A64Backend() {
|
||||
if (capstone_handle_) {
|
||||
cs_close(&capstone_handle_);
|
||||
}
|
||||
|
||||
A64Emitter::FreeConstData(emitter_data_);
|
||||
ExceptionHandler::Uninstall(&ExceptionCallbackThunk, this);
|
||||
}
|
||||
|
||||
bool A64Backend::Initialize(Processor* processor) {
|
||||
if (!Backend::Initialize(processor)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto& gprs = machine_info_.register_sets[0];
|
||||
gprs.id = 0;
|
||||
std::strcpy(gprs.name, "x");
|
||||
gprs.types = MachineInfo::RegisterSet::INT_TYPES;
|
||||
gprs.count = A64Emitter::GPR_COUNT;
|
||||
|
||||
auto& fprs = machine_info_.register_sets[1];
|
||||
fprs.id = 1;
|
||||
std::strcpy(fprs.name, "v");
|
||||
fprs.types = MachineInfo::RegisterSet::FLOAT_TYPES |
|
||||
MachineInfo::RegisterSet::VEC_TYPES;
|
||||
fprs.count = A64Emitter::FPR_COUNT;
|
||||
|
||||
code_cache_ = A64CodeCache::Create();
|
||||
Backend::code_cache_ = code_cache_.get();
|
||||
if (!code_cache_->Initialize()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Generate thunks used to transition between jitted code and host code.
|
||||
A64ThunkEmitter thunk_emitter(this);
|
||||
host_to_guest_thunk_ = thunk_emitter.EmitHostToGuestThunk();
|
||||
guest_to_host_thunk_ = thunk_emitter.EmitGuestToHostThunk();
|
||||
resolve_function_thunk_ = thunk_emitter.EmitResolveFunctionThunk();
|
||||
|
||||
// Set the code cache to use the ResolveFunction thunk for default
|
||||
// indirections.
|
||||
assert_zero(uint64_t(resolve_function_thunk_) & 0xFFFFFFFF00000000ull);
|
||||
code_cache_->set_indirection_default(
|
||||
uint32_t(uint64_t(resolve_function_thunk_)));
|
||||
|
||||
// Allocate some special indirections.
|
||||
code_cache_->CommitExecutableRange(0x9FFF0000, 0x9FFFFFFF);
|
||||
|
||||
// Allocate emitter constant data.
|
||||
emitter_data_ = A64Emitter::PlaceConstData();
|
||||
|
||||
// Setup exception callback
|
||||
ExceptionHandler::Install(&ExceptionCallbackThunk, this);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void A64Backend::CommitExecutableRange(uint32_t guest_low,
|
||||
uint32_t guest_high) {
|
||||
code_cache_->CommitExecutableRange(guest_low, guest_high);
|
||||
}
|
||||
|
||||
std::unique_ptr<Assembler> A64Backend::CreateAssembler() {
|
||||
return std::make_unique<A64Assembler>(this);
|
||||
}
|
||||
|
||||
std::unique_ptr<GuestFunction> A64Backend::CreateGuestFunction(
|
||||
Module* module, uint32_t address) {
|
||||
return std::make_unique<A64Function>(module, address);
|
||||
}
|
||||
|
||||
uint64_t ReadCapstoneReg(HostThreadContext* context, arm64_reg reg) {
|
||||
switch (reg) {
|
||||
case ARM64_REG_X0:
|
||||
return context->x[0];
|
||||
case ARM64_REG_X1:
|
||||
return context->x[1];
|
||||
case ARM64_REG_X2:
|
||||
return context->x[2];
|
||||
case ARM64_REG_X3:
|
||||
return context->x[3];
|
||||
case ARM64_REG_X4:
|
||||
return context->x[4];
|
||||
case ARM64_REG_X5:
|
||||
return context->x[5];
|
||||
case ARM64_REG_X6:
|
||||
return context->x[6];
|
||||
case ARM64_REG_X7:
|
||||
return context->x[7];
|
||||
case ARM64_REG_X8:
|
||||
return context->x[8];
|
||||
case ARM64_REG_X9:
|
||||
return context->x[9];
|
||||
case ARM64_REG_X10:
|
||||
return context->x[10];
|
||||
case ARM64_REG_X11:
|
||||
return context->x[11];
|
||||
case ARM64_REG_X12:
|
||||
return context->x[12];
|
||||
case ARM64_REG_X13:
|
||||
return context->x[13];
|
||||
case ARM64_REG_X14:
|
||||
return context->x[14];
|
||||
case ARM64_REG_X15:
|
||||
return context->x[15];
|
||||
case ARM64_REG_X16:
|
||||
return context->x[16];
|
||||
case ARM64_REG_X17:
|
||||
return context->x[17];
|
||||
case ARM64_REG_X18:
|
||||
return context->x[18];
|
||||
case ARM64_REG_X19:
|
||||
return context->x[19];
|
||||
case ARM64_REG_X20:
|
||||
return context->x[20];
|
||||
case ARM64_REG_X21:
|
||||
return context->x[21];
|
||||
case ARM64_REG_X22:
|
||||
return context->x[22];
|
||||
case ARM64_REG_X23:
|
||||
return context->x[23];
|
||||
case ARM64_REG_X24:
|
||||
return context->x[24];
|
||||
case ARM64_REG_X25:
|
||||
return context->x[25];
|
||||
case ARM64_REG_X26:
|
||||
return context->x[26];
|
||||
case ARM64_REG_X27:
|
||||
return context->x[27];
|
||||
case ARM64_REG_X28:
|
||||
return context->x[28];
|
||||
case ARM64_REG_X29:
|
||||
return context->x[29];
|
||||
case ARM64_REG_X30:
|
||||
return context->x[30];
|
||||
case ARM64_REG_W0:
|
||||
return uint32_t(context->x[0]);
|
||||
case ARM64_REG_W1:
|
||||
return uint32_t(context->x[1]);
|
||||
case ARM64_REG_W2:
|
||||
return uint32_t(context->x[2]);
|
||||
case ARM64_REG_W3:
|
||||
return uint32_t(context->x[3]);
|
||||
case ARM64_REG_W4:
|
||||
return uint32_t(context->x[4]);
|
||||
case ARM64_REG_W5:
|
||||
return uint32_t(context->x[5]);
|
||||
case ARM64_REG_W6:
|
||||
return uint32_t(context->x[6]);
|
||||
case ARM64_REG_W7:
|
||||
return uint32_t(context->x[7]);
|
||||
case ARM64_REG_W8:
|
||||
return uint32_t(context->x[8]);
|
||||
case ARM64_REG_W9:
|
||||
return uint32_t(context->x[9]);
|
||||
case ARM64_REG_W10:
|
||||
return uint32_t(context->x[10]);
|
||||
case ARM64_REG_W11:
|
||||
return uint32_t(context->x[11]);
|
||||
case ARM64_REG_W12:
|
||||
return uint32_t(context->x[12]);
|
||||
case ARM64_REG_W13:
|
||||
return uint32_t(context->x[13]);
|
||||
case ARM64_REG_W14:
|
||||
return uint32_t(context->x[14]);
|
||||
case ARM64_REG_W15:
|
||||
return uint32_t(context->x[15]);
|
||||
case ARM64_REG_W16:
|
||||
return uint32_t(context->x[16]);
|
||||
case ARM64_REG_W17:
|
||||
return uint32_t(context->x[17]);
|
||||
case ARM64_REG_W18:
|
||||
return uint32_t(context->x[18]);
|
||||
case ARM64_REG_W19:
|
||||
return uint32_t(context->x[19]);
|
||||
case ARM64_REG_W20:
|
||||
return uint32_t(context->x[20]);
|
||||
case ARM64_REG_W21:
|
||||
return uint32_t(context->x[21]);
|
||||
case ARM64_REG_W22:
|
||||
return uint32_t(context->x[22]);
|
||||
case ARM64_REG_W23:
|
||||
return uint32_t(context->x[23]);
|
||||
case ARM64_REG_W24:
|
||||
return uint32_t(context->x[24]);
|
||||
case ARM64_REG_W25:
|
||||
return uint32_t(context->x[25]);
|
||||
case ARM64_REG_W26:
|
||||
return uint32_t(context->x[26]);
|
||||
case ARM64_REG_W27:
|
||||
return uint32_t(context->x[27]);
|
||||
case ARM64_REG_W28:
|
||||
return uint32_t(context->x[28]);
|
||||
case ARM64_REG_W29:
|
||||
return uint32_t(context->x[29]);
|
||||
case ARM64_REG_W30:
|
||||
return uint32_t(context->x[30]);
|
||||
default:
|
||||
assert_unhandled_case(reg);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool TestCapstonePstate(arm64_cc cond, uint32_t pstate) {
|
||||
// https://devblogs.microsoft.com/oldnewthing/20220815-00/?p=106975
|
||||
// Upper 4 bits of pstate are NZCV
|
||||
const bool N = !!(pstate & 0x80000000);
|
||||
const bool Z = !!(pstate & 0x40000000);
|
||||
const bool C = !!(pstate & 0x20000000);
|
||||
const bool V = !!(pstate & 0x10000000);
|
||||
switch (cond) {
|
||||
case ARM64_CC_EQ:
|
||||
return (Z == true);
|
||||
case ARM64_CC_NE:
|
||||
return (Z == false);
|
||||
case ARM64_CC_HS:
|
||||
return (C == true);
|
||||
case ARM64_CC_LO:
|
||||
return (C == false);
|
||||
case ARM64_CC_MI:
|
||||
return (N == true);
|
||||
case ARM64_CC_PL:
|
||||
return (N == false);
|
||||
case ARM64_CC_VS:
|
||||
return (V == true);
|
||||
case ARM64_CC_VC:
|
||||
return (V == false);
|
||||
case ARM64_CC_HI:
|
||||
return ((C == true) && (Z == false));
|
||||
case ARM64_CC_LS:
|
||||
return ((C == false) || (Z == true));
|
||||
case ARM64_CC_GE:
|
||||
return (N == V);
|
||||
case ARM64_CC_LT:
|
||||
return (N != V);
|
||||
case ARM64_CC_GT:
|
||||
return ((Z == false) && (N == V));
|
||||
case ARM64_CC_LE:
|
||||
return ((Z == true) || (N != V));
|
||||
case ARM64_CC_AL:
|
||||
return true;
|
||||
case ARM64_CC_NV:
|
||||
return false;
|
||||
default:
|
||||
assert_unhandled_case(cond);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t A64Backend::CalculateNextHostInstruction(ThreadDebugInfo* thread_info,
|
||||
uint64_t current_pc) {
|
||||
auto machine_code_ptr = reinterpret_cast<const uint8_t*>(current_pc);
|
||||
size_t remaining_machine_code_size = 64;
|
||||
uint64_t host_address = current_pc;
|
||||
cs_insn insn = {0};
|
||||
cs_detail all_detail = {0};
|
||||
insn.detail = &all_detail;
|
||||
cs_disasm_iter(capstone_handle_, &machine_code_ptr,
|
||||
&remaining_machine_code_size, &host_address, &insn);
|
||||
const auto& detail = all_detail.arm64;
|
||||
switch (insn.id) {
|
||||
case ARM64_INS_B:
|
||||
case ARM64_INS_BL: {
|
||||
assert_true(detail.operands[0].type == ARM64_OP_IMM);
|
||||
const int64_t pc_offset = static_cast<int64_t>(detail.operands[0].imm);
|
||||
const bool test_passed =
|
||||
TestCapstonePstate(detail.cc, thread_info->host_context.cpsr);
|
||||
if (test_passed) {
|
||||
return current_pc + pc_offset;
|
||||
} else {
|
||||
return current_pc + insn.size;
|
||||
}
|
||||
} break;
|
||||
case ARM64_INS_BR:
|
||||
case ARM64_INS_BLR: {
|
||||
assert_true(detail.operands[0].type == ARM64_OP_REG);
|
||||
const uint64_t target_pc =
|
||||
ReadCapstoneReg(&thread_info->host_context, detail.operands[0].reg);
|
||||
return target_pc;
|
||||
} break;
|
||||
case ARM64_INS_RET: {
|
||||
assert_true(detail.operands[0].type == ARM64_OP_REG);
|
||||
const uint64_t target_pc =
|
||||
ReadCapstoneReg(&thread_info->host_context, detail.operands[0].reg);
|
||||
return target_pc;
|
||||
} break;
|
||||
case ARM64_INS_CBNZ: {
|
||||
assert_true(detail.operands[0].type == ARM64_OP_REG);
|
||||
assert_true(detail.operands[1].type == ARM64_OP_IMM);
|
||||
const int64_t pc_offset = static_cast<int64_t>(detail.operands[1].imm);
|
||||
const bool test_passed = (0 != ReadCapstoneReg(&thread_info->host_context,
|
||||
detail.operands[0].reg));
|
||||
if (test_passed) {
|
||||
return current_pc + pc_offset;
|
||||
} else {
|
||||
return current_pc + insn.size;
|
||||
}
|
||||
} break;
|
||||
case ARM64_INS_CBZ: {
|
||||
assert_true(detail.operands[0].type == ARM64_OP_REG);
|
||||
assert_true(detail.operands[1].type == ARM64_OP_IMM);
|
||||
const int64_t pc_offset = static_cast<int64_t>(detail.operands[1].imm);
|
||||
const bool test_passed = (0 == ReadCapstoneReg(&thread_info->host_context,
|
||||
detail.operands[0].reg));
|
||||
if (test_passed) {
|
||||
return current_pc + pc_offset;
|
||||
} else {
|
||||
return current_pc + insn.size;
|
||||
}
|
||||
} break;
|
||||
default: {
|
||||
// Not a branching instruction - just move over it.
|
||||
return current_pc + insn.size;
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
void A64Backend::InstallBreakpoint(Breakpoint* breakpoint) {
|
||||
breakpoint->ForEachHostAddress([breakpoint](uint64_t host_address) {
|
||||
auto ptr = reinterpret_cast<void*>(host_address);
|
||||
auto original_bytes = xe::load_and_swap<uint32_t>(ptr);
|
||||
assert_true(original_bytes != 0x0000'dead);
|
||||
xe::store_and_swap<uint32_t>(ptr, 0x0000'dead);
|
||||
breakpoint->backend_data().emplace_back(host_address, original_bytes);
|
||||
});
|
||||
}
|
||||
|
||||
void A64Backend::InstallBreakpoint(Breakpoint* breakpoint, Function* fn) {
|
||||
assert_true(breakpoint->address_type() == Breakpoint::AddressType::kGuest);
|
||||
assert_true(fn->is_guest());
|
||||
auto guest_function = reinterpret_cast<cpu::GuestFunction*>(fn);
|
||||
auto host_address =
|
||||
guest_function->MapGuestAddressToMachineCode(breakpoint->guest_address());
|
||||
if (!host_address) {
|
||||
assert_always();
|
||||
return;
|
||||
}
|
||||
|
||||
// Assume we haven't already installed a breakpoint in this spot.
|
||||
auto ptr = reinterpret_cast<void*>(host_address);
|
||||
auto original_bytes = xe::load_and_swap<uint32_t>(ptr);
|
||||
assert_true(original_bytes != 0x0000'dead);
|
||||
xe::store_and_swap<uint32_t>(ptr, 0x0000'dead);
|
||||
breakpoint->backend_data().emplace_back(host_address, original_bytes);
|
||||
}
|
||||
|
||||
void A64Backend::UninstallBreakpoint(Breakpoint* breakpoint) {
|
||||
for (auto& pair : breakpoint->backend_data()) {
|
||||
auto ptr = reinterpret_cast<uint8_t*>(pair.first);
|
||||
auto instruction_bytes = xe::load_and_swap<uint32_t>(ptr);
|
||||
assert_true(instruction_bytes == 0x0000'dead);
|
||||
xe::store_and_swap<uint32_t>(ptr, static_cast<uint32_t>(pair.second));
|
||||
}
|
||||
breakpoint->backend_data().clear();
|
||||
}
|
||||
|
||||
bool A64Backend::ExceptionCallbackThunk(Exception* ex, void* data) {
|
||||
auto backend = reinterpret_cast<A64Backend*>(data);
|
||||
return backend->ExceptionCallback(ex);
|
||||
}
|
||||
|
||||
bool A64Backend::ExceptionCallback(Exception* ex) {
|
||||
if (ex->code() != Exception::Code::kIllegalInstruction) {
|
||||
// We only care about illegal instructions. Other things will be handled by
|
||||
// other handlers (probably). If nothing else picks it up we'll be called
|
||||
// with OnUnhandledException to do real crash handling.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Verify an expected illegal instruction.
|
||||
auto instruction_bytes =
|
||||
xe::load_and_swap<uint32_t>(reinterpret_cast<void*>(ex->pc()));
|
||||
if (instruction_bytes != 0x0000'dead) {
|
||||
// Not our `udf #0xdead` - not us.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Let the processor handle things.
|
||||
return processor()->OnThreadBreakpointHit(ex);
|
||||
}
|
||||
|
||||
A64ThunkEmitter::A64ThunkEmitter(A64Backend* backend) : A64Emitter(backend) {}
|
||||
|
||||
A64ThunkEmitter::~A64ThunkEmitter() {}
|
||||
|
||||
HostToGuestThunk A64ThunkEmitter::EmitHostToGuestThunk() {
|
||||
// X0 = target
|
||||
// X1 = arg0 (context)
|
||||
// X2 = arg1 (guest return address)
|
||||
|
||||
struct _code_offsets {
|
||||
size_t prolog;
|
||||
size_t prolog_stack_alloc;
|
||||
size_t body;
|
||||
size_t epilog;
|
||||
size_t tail;
|
||||
} code_offsets = {};
|
||||
|
||||
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
|
||||
|
||||
code_offsets.prolog = offset();
|
||||
|
||||
SUB(SP, SP, stack_size);
|
||||
|
||||
code_offsets.prolog_stack_alloc = offset();
|
||||
code_offsets.body = offset();
|
||||
|
||||
EmitSaveNonvolatileRegs();
|
||||
|
||||
MOV(X16, X0);
|
||||
MOV(GetContextReg(), X1); // context
|
||||
MOV(X0, X2); // return address
|
||||
BLR(X16);
|
||||
|
||||
EmitLoadNonvolatileRegs();
|
||||
|
||||
code_offsets.epilog = offset();
|
||||
|
||||
ADD(SP, SP, stack_size);
|
||||
|
||||
RET();
|
||||
|
||||
code_offsets.tail = offset();
|
||||
|
||||
assert_zero(code_offsets.prolog);
|
||||
EmitFunctionInfo func_info = {};
|
||||
func_info.code_size.total = offset();
|
||||
func_info.code_size.prolog = code_offsets.body - code_offsets.prolog;
|
||||
func_info.code_size.body = code_offsets.epilog - code_offsets.body;
|
||||
func_info.code_size.epilog = code_offsets.tail - code_offsets.epilog;
|
||||
func_info.code_size.tail = offset() - code_offsets.tail;
|
||||
func_info.prolog_stack_alloc_offset =
|
||||
code_offsets.prolog_stack_alloc - code_offsets.prolog;
|
||||
func_info.stack_size = stack_size;
|
||||
|
||||
void* fn = Emplace(func_info);
|
||||
return (HostToGuestThunk)fn;
|
||||
}
|
||||
|
||||
GuestToHostThunk A64ThunkEmitter::EmitGuestToHostThunk() {
|
||||
// X0 = target function
|
||||
// X1 = arg0
|
||||
// X2 = arg1
|
||||
// X3 = arg2
|
||||
|
||||
struct _code_offsets {
|
||||
size_t prolog;
|
||||
size_t prolog_stack_alloc;
|
||||
size_t body;
|
||||
size_t epilog;
|
||||
size_t tail;
|
||||
} code_offsets = {};
|
||||
|
||||
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
|
||||
|
||||
code_offsets.prolog = offset();
|
||||
|
||||
SUB(SP, SP, stack_size);
|
||||
|
||||
code_offsets.prolog_stack_alloc = offset();
|
||||
code_offsets.body = offset();
|
||||
|
||||
EmitSaveVolatileRegs();
|
||||
|
||||
MOV(X16, X0); // function
|
||||
MOV(X0, GetContextReg()); // context
|
||||
BLR(X16);
|
||||
|
||||
EmitLoadVolatileRegs();
|
||||
|
||||
code_offsets.epilog = offset();
|
||||
|
||||
ADD(SP, SP, stack_size);
|
||||
RET();
|
||||
|
||||
code_offsets.tail = offset();
|
||||
|
||||
assert_zero(code_offsets.prolog);
|
||||
EmitFunctionInfo func_info = {};
|
||||
func_info.code_size.total = offset();
|
||||
func_info.code_size.prolog = code_offsets.body - code_offsets.prolog;
|
||||
func_info.code_size.body = code_offsets.epilog - code_offsets.body;
|
||||
func_info.code_size.epilog = code_offsets.tail - code_offsets.epilog;
|
||||
func_info.code_size.tail = offset() - code_offsets.tail;
|
||||
func_info.prolog_stack_alloc_offset =
|
||||
code_offsets.prolog_stack_alloc - code_offsets.prolog;
|
||||
func_info.stack_size = stack_size;
|
||||
|
||||
void* fn = Emplace(func_info);
|
||||
return (GuestToHostThunk)fn;
|
||||
}
|
||||
|
||||
// A64Emitter handles actually resolving functions.
|
||||
uint64_t ResolveFunction(void* raw_context, uint64_t target_address);
|
||||
|
||||
ResolveFunctionThunk A64ThunkEmitter::EmitResolveFunctionThunk() {
|
||||
// Entry:
|
||||
// W17 = target PPC address
|
||||
// X0 = context
|
||||
|
||||
struct _code_offsets {
|
||||
size_t prolog;
|
||||
size_t prolog_stack_alloc;
|
||||
size_t body;
|
||||
size_t epilog;
|
||||
size_t tail;
|
||||
} code_offsets = {};
|
||||
|
||||
const size_t stack_size = StackLayout::THUNK_STACK_SIZE;
|
||||
|
||||
code_offsets.prolog = offset();
|
||||
|
||||
// Preserve context register
|
||||
STP(ZR, X0, SP, PRE_INDEXED, -16);
|
||||
|
||||
SUB(SP, SP, stack_size);
|
||||
|
||||
code_offsets.prolog_stack_alloc = offset();
|
||||
code_offsets.body = offset();
|
||||
|
||||
EmitSaveVolatileRegs();
|
||||
|
||||
// mov(rcx, rsi); // context
|
||||
// mov(rdx, rbx);
|
||||
// mov(rax, reinterpret_cast<uint64_t>(&ResolveFunction));
|
||||
// call(rax)
|
||||
MOV(X0, GetContextReg()); // context
|
||||
MOV(W1, W17);
|
||||
MOV(X16, reinterpret_cast<uint64_t>(&ResolveFunction));
|
||||
BLR(X16);
|
||||
MOV(X16, X0);
|
||||
|
||||
EmitLoadVolatileRegs();
|
||||
|
||||
code_offsets.epilog = offset();
|
||||
|
||||
// add(rsp, stack_size);
|
||||
// jmp(rax);
|
||||
ADD(SP, SP, stack_size);
|
||||
|
||||
// Reload context register
|
||||
LDP(ZR, X0, SP, POST_INDEXED, 16);
|
||||
BR(X16);
|
||||
|
||||
code_offsets.tail = offset();
|
||||
|
||||
assert_zero(code_offsets.prolog);
|
||||
EmitFunctionInfo func_info = {};
|
||||
func_info.code_size.total = offset();
|
||||
func_info.code_size.prolog = code_offsets.body - code_offsets.prolog;
|
||||
func_info.code_size.body = code_offsets.epilog - code_offsets.body;
|
||||
func_info.code_size.epilog = code_offsets.tail - code_offsets.epilog;
|
||||
func_info.code_size.tail = offset() - code_offsets.tail;
|
||||
func_info.prolog_stack_alloc_offset =
|
||||
code_offsets.prolog_stack_alloc - code_offsets.prolog;
|
||||
func_info.stack_size = stack_size;
|
||||
|
||||
void* fn = Emplace(func_info);
|
||||
return (ResolveFunctionThunk)fn;
|
||||
}
|
||||
|
||||
void A64ThunkEmitter::EmitSaveVolatileRegs() {
|
||||
// Save off volatile registers.
|
||||
// Preserve arguments passed to and returned from a subroutine
|
||||
// STR(X0, SP, offsetof(StackLayout::Thunk, r[0]));
|
||||
STP(X1, X2, SP, offsetof(StackLayout::Thunk, r[0]));
|
||||
STP(X3, X4, SP, offsetof(StackLayout::Thunk, r[2]));
|
||||
STP(X5, X6, SP, offsetof(StackLayout::Thunk, r[4]));
|
||||
STP(X7, X8, SP, offsetof(StackLayout::Thunk, r[6]));
|
||||
STP(X9, X10, SP, offsetof(StackLayout::Thunk, r[8]));
|
||||
STP(X11, X12, SP, offsetof(StackLayout::Thunk, r[10]));
|
||||
STP(X13, X14, SP, offsetof(StackLayout::Thunk, r[12]));
|
||||
STP(X15, X30, SP, offsetof(StackLayout::Thunk, r[14]));
|
||||
|
||||
// Preserve arguments passed to and returned from a subroutine
|
||||
// STR(Q0, SP, offsetof(StackLayout::Thunk, xmm[0]));
|
||||
STP(Q1, Q2, SP, offsetof(StackLayout::Thunk, xmm[0]));
|
||||
STP(Q3, Q4, SP, offsetof(StackLayout::Thunk, xmm[2]));
|
||||
STP(Q5, Q6, SP, offsetof(StackLayout::Thunk, xmm[4]));
|
||||
STP(Q7, Q16, SP, offsetof(StackLayout::Thunk, xmm[6]));
|
||||
STP(Q17, Q18, SP, offsetof(StackLayout::Thunk, xmm[8]));
|
||||
STP(Q19, Q20, SP, offsetof(StackLayout::Thunk, xmm[10]));
|
||||
STP(Q21, Q22, SP, offsetof(StackLayout::Thunk, xmm[12]));
|
||||
STP(Q23, Q24, SP, offsetof(StackLayout::Thunk, xmm[14]));
|
||||
STP(Q25, Q26, SP, offsetof(StackLayout::Thunk, xmm[16]));
|
||||
STP(Q27, Q28, SP, offsetof(StackLayout::Thunk, xmm[18]));
|
||||
STP(Q29, Q30, SP, offsetof(StackLayout::Thunk, xmm[20]));
|
||||
STR(Q31, SP, offsetof(StackLayout::Thunk, xmm[21]));
|
||||
}
|
||||
|
||||
void A64ThunkEmitter::EmitLoadVolatileRegs() {
|
||||
// Preserve arguments passed to and returned from a subroutine
|
||||
// LDR(X0, SP, offsetof(StackLayout::Thunk, r[0]));
|
||||
LDP(X1, X2, SP, offsetof(StackLayout::Thunk, r[0]));
|
||||
LDP(X3, X4, SP, offsetof(StackLayout::Thunk, r[2]));
|
||||
LDP(X5, X6, SP, offsetof(StackLayout::Thunk, r[4]));
|
||||
LDP(X7, X8, SP, offsetof(StackLayout::Thunk, r[6]));
|
||||
LDP(X9, X10, SP, offsetof(StackLayout::Thunk, r[8]));
|
||||
LDP(X11, X12, SP, offsetof(StackLayout::Thunk, r[10]));
|
||||
LDP(X13, X14, SP, offsetof(StackLayout::Thunk, r[12]));
|
||||
LDP(X15, X30, SP, offsetof(StackLayout::Thunk, r[14]));
|
||||
|
||||
// Preserve arguments passed to and returned from a subroutine
|
||||
// LDR(Q0, SP, offsetof(StackLayout::Thunk, xmm[0]));
|
||||
LDP(Q1, Q2, SP, offsetof(StackLayout::Thunk, xmm[0]));
|
||||
LDP(Q3, Q4, SP, offsetof(StackLayout::Thunk, xmm[2]));
|
||||
LDP(Q5, Q6, SP, offsetof(StackLayout::Thunk, xmm[4]));
|
||||
LDP(Q7, Q16, SP, offsetof(StackLayout::Thunk, xmm[6]));
|
||||
LDP(Q17, Q18, SP, offsetof(StackLayout::Thunk, xmm[8]));
|
||||
LDP(Q19, Q20, SP, offsetof(StackLayout::Thunk, xmm[10]));
|
||||
LDP(Q21, Q22, SP, offsetof(StackLayout::Thunk, xmm[12]));
|
||||
LDP(Q23, Q24, SP, offsetof(StackLayout::Thunk, xmm[14]));
|
||||
LDP(Q25, Q26, SP, offsetof(StackLayout::Thunk, xmm[16]));
|
||||
LDP(Q27, Q28, SP, offsetof(StackLayout::Thunk, xmm[18]));
|
||||
LDP(Q29, Q30, SP, offsetof(StackLayout::Thunk, xmm[20]));
|
||||
LDR(Q31, SP, offsetof(StackLayout::Thunk, xmm[21]));
|
||||
}
|
||||
|
||||
void A64ThunkEmitter::EmitSaveNonvolatileRegs() {
|
||||
STP(X19, X20, SP, offsetof(StackLayout::Thunk, r[0]));
|
||||
STP(X21, X22, SP, offsetof(StackLayout::Thunk, r[2]));
|
||||
STP(X23, X24, SP, offsetof(StackLayout::Thunk, r[4]));
|
||||
STP(X25, X26, SP, offsetof(StackLayout::Thunk, r[6]));
|
||||
STP(X27, X28, SP, offsetof(StackLayout::Thunk, r[8]));
|
||||
STP(X29, X30, SP, offsetof(StackLayout::Thunk, r[10]));
|
||||
|
||||
STR(X17, SP, offsetof(StackLayout::Thunk, r[12]));
|
||||
|
||||
STP(D8, D9, SP, offsetof(StackLayout::Thunk, xmm[0]));
|
||||
STP(D10, D11, SP, offsetof(StackLayout::Thunk, xmm[1]));
|
||||
STP(D12, D13, SP, offsetof(StackLayout::Thunk, xmm[2]));
|
||||
STP(D14, D15, SP, offsetof(StackLayout::Thunk, xmm[3]));
|
||||
}
|
||||
|
||||
void A64ThunkEmitter::EmitLoadNonvolatileRegs() {
|
||||
LDP(X19, X20, SP, offsetof(StackLayout::Thunk, r[0]));
|
||||
LDP(X21, X22, SP, offsetof(StackLayout::Thunk, r[2]));
|
||||
LDP(X23, X24, SP, offsetof(StackLayout::Thunk, r[4]));
|
||||
LDP(X25, X26, SP, offsetof(StackLayout::Thunk, r[6]));
|
||||
LDP(X27, X28, SP, offsetof(StackLayout::Thunk, r[8]));
|
||||
LDP(X29, X30, SP, offsetof(StackLayout::Thunk, r[10]));
|
||||
|
||||
LDR(X17, SP, offsetof(StackLayout::Thunk, r[12]));
|
||||
|
||||
LDP(D8, D9, SP, offsetof(StackLayout::Thunk, xmm[0]));
|
||||
LDP(D10, D11, SP, offsetof(StackLayout::Thunk, xmm[1]));
|
||||
LDP(D12, D13, SP, offsetof(StackLayout::Thunk, xmm[2]));
|
||||
LDP(D14, D15, SP, offsetof(StackLayout::Thunk, xmm[3]));
|
||||
}
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,88 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_CPU_BACKEND_A64_A64_BACKEND_H_
|
||||
#define XENIA_CPU_BACKEND_A64_A64_BACKEND_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "xenia/base/cvar.h"
|
||||
#include "xenia/cpu/backend/backend.h"
|
||||
|
||||
DECLARE_int32(a64_extension_mask);
|
||||
|
||||
namespace xe {
|
||||
class Exception;
|
||||
} // namespace xe
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {
|
||||
|
||||
class A64CodeCache;
|
||||
|
||||
typedef void* (*HostToGuestThunk)(void* target, void* arg0, void* arg1);
|
||||
typedef void* (*GuestToHostThunk)(void* target, void* arg0, void* arg1);
|
||||
typedef void (*ResolveFunctionThunk)();
|
||||
|
||||
class A64Backend : public Backend {
|
||||
public:
|
||||
static const uint32_t kForceReturnAddress = 0x9FFF0000u;
|
||||
|
||||
explicit A64Backend();
|
||||
~A64Backend() override;
|
||||
|
||||
A64CodeCache* code_cache() const { return code_cache_.get(); }
|
||||
uintptr_t emitter_data() const { return emitter_data_; }
|
||||
|
||||
// Call a generated function, saving all stack parameters.
|
||||
HostToGuestThunk host_to_guest_thunk() const { return host_to_guest_thunk_; }
|
||||
// Function that guest code can call to transition into host code.
|
||||
GuestToHostThunk guest_to_host_thunk() const { return guest_to_host_thunk_; }
|
||||
// Function that thunks to the ResolveFunction in A64Emitter.
|
||||
ResolveFunctionThunk resolve_function_thunk() const {
|
||||
return resolve_function_thunk_;
|
||||
}
|
||||
|
||||
bool Initialize(Processor* processor) override;
|
||||
|
||||
void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high) override;
|
||||
|
||||
std::unique_ptr<Assembler> CreateAssembler() override;
|
||||
|
||||
std::unique_ptr<GuestFunction> CreateGuestFunction(Module* module,
|
||||
uint32_t address) override;
|
||||
|
||||
uint64_t CalculateNextHostInstruction(ThreadDebugInfo* thread_info,
|
||||
uint64_t current_pc) override;
|
||||
|
||||
void InstallBreakpoint(Breakpoint* breakpoint) override;
|
||||
void InstallBreakpoint(Breakpoint* breakpoint, Function* fn) override;
|
||||
void UninstallBreakpoint(Breakpoint* breakpoint) override;
|
||||
|
||||
private:
|
||||
static bool ExceptionCallbackThunk(Exception* ex, void* data);
|
||||
bool ExceptionCallback(Exception* ex);
|
||||
|
||||
uintptr_t capstone_handle_ = 0;
|
||||
|
||||
std::unique_ptr<A64CodeCache> code_cache_;
|
||||
uintptr_t emitter_data_ = 0;
|
||||
|
||||
HostToGuestThunk host_to_guest_thunk_;
|
||||
GuestToHostThunk guest_to_host_thunk_;
|
||||
ResolveFunctionThunk resolve_function_thunk_;
|
||||
};
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_CPU_BACKEND_A64_A64_BACKEND_H_
|
|
@ -0,0 +1,342 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/cpu/backend/a64/a64_code_cache.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include "third_party/fmt/include/fmt/format.h"
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/clock.h"
|
||||
#include "xenia/base/literals.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/cpu/function.h"
|
||||
#include "xenia/cpu/module.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {
|
||||
|
||||
using namespace xe::literals;
|
||||
|
||||
A64CodeCache::A64CodeCache() = default;
|
||||
|
||||
A64CodeCache::~A64CodeCache() {
|
||||
if (indirection_table_base_) {
|
||||
xe::memory::DeallocFixed(indirection_table_base_, 0,
|
||||
xe::memory::DeallocationType::kRelease);
|
||||
}
|
||||
|
||||
// Unmap all views and close mapping.
|
||||
if (mapping_ != xe::memory::kFileMappingHandleInvalid) {
|
||||
if (generated_code_write_base_ &&
|
||||
generated_code_write_base_ != generated_code_execute_base_) {
|
||||
xe::memory::UnmapFileView(mapping_, generated_code_write_base_,
|
||||
kGeneratedCodeSize);
|
||||
}
|
||||
if (generated_code_execute_base_) {
|
||||
xe::memory::UnmapFileView(mapping_, generated_code_execute_base_,
|
||||
kGeneratedCodeSize);
|
||||
}
|
||||
xe::memory::CloseFileMappingHandle(mapping_, file_name_);
|
||||
mapping_ = xe::memory::kFileMappingHandleInvalid;
|
||||
}
|
||||
}
|
||||
|
||||
bool A64CodeCache::Initialize() {
|
||||
indirection_table_base_ = reinterpret_cast<uint8_t*>(xe::memory::AllocFixed(
|
||||
reinterpret_cast<void*>(kIndirectionTableBase), kIndirectionTableSize,
|
||||
xe::memory::AllocationType::kReserve,
|
||||
xe::memory::PageAccess::kReadWrite));
|
||||
if (!indirection_table_base_) {
|
||||
XELOGE("Unable to allocate code cache indirection table");
|
||||
XELOGE(
|
||||
"This is likely because the {:X}-{:X} range is in use by some other "
|
||||
"system DLL",
|
||||
static_cast<uint64_t>(kIndirectionTableBase),
|
||||
kIndirectionTableBase + kIndirectionTableSize);
|
||||
}
|
||||
|
||||
// Create mmap file. This allows us to share the code cache with the debugger.
|
||||
file_name_ = fmt::format("xenia_code_cache_{}", Clock::QueryHostTickCount());
|
||||
mapping_ = xe::memory::CreateFileMappingHandle(
|
||||
file_name_, kGeneratedCodeSize, xe::memory::PageAccess::kExecuteReadWrite,
|
||||
false);
|
||||
if (mapping_ == xe::memory::kFileMappingHandleInvalid) {
|
||||
XELOGE("Unable to create code cache mmap");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Map generated code region into the file. Pages are committed as required.
|
||||
if (xe::memory::IsWritableExecutableMemoryPreferred()) {
|
||||
generated_code_execute_base_ =
|
||||
reinterpret_cast<uint8_t*>(xe::memory::MapFileView(
|
||||
mapping_, reinterpret_cast<void*>(kGeneratedCodeExecuteBase),
|
||||
kGeneratedCodeSize, xe::memory::PageAccess::kExecuteReadWrite, 0));
|
||||
generated_code_write_base_ = generated_code_execute_base_;
|
||||
if (!generated_code_execute_base_ || !generated_code_write_base_) {
|
||||
XELOGE("Unable to allocate code cache generated code storage");
|
||||
XELOGE(
|
||||
"This is likely because the {:X}-{:X} range is in use by some other "
|
||||
"system DLL",
|
||||
uint64_t(kGeneratedCodeExecuteBase),
|
||||
uint64_t(kGeneratedCodeExecuteBase + kGeneratedCodeSize));
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
generated_code_execute_base_ =
|
||||
reinterpret_cast<uint8_t*>(xe::memory::MapFileView(
|
||||
mapping_, reinterpret_cast<void*>(kGeneratedCodeExecuteBase),
|
||||
kGeneratedCodeSize, xe::memory::PageAccess::kExecuteReadOnly, 0));
|
||||
generated_code_write_base_ =
|
||||
reinterpret_cast<uint8_t*>(xe::memory::MapFileView(
|
||||
mapping_, reinterpret_cast<void*>(kGeneratedCodeWriteBase),
|
||||
kGeneratedCodeSize, xe::memory::PageAccess::kReadWrite, 0));
|
||||
if (!generated_code_execute_base_ || !generated_code_write_base_) {
|
||||
XELOGE("Unable to allocate code cache generated code storage");
|
||||
XELOGE(
|
||||
"This is likely because the {:X}-{:X} and {:X}-{:X} ranges are in "
|
||||
"use by some other system DLL",
|
||||
uint64_t(kGeneratedCodeExecuteBase),
|
||||
uint64_t(kGeneratedCodeExecuteBase + kGeneratedCodeSize),
|
||||
uint64_t(kGeneratedCodeWriteBase),
|
||||
uint64_t(kGeneratedCodeWriteBase + kGeneratedCodeSize));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Preallocate the function map to a large, reasonable size.
|
||||
generated_code_map_.reserve(kMaximumFunctionCount);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void A64CodeCache::set_indirection_default(uint32_t default_value) {
|
||||
indirection_default_value_ = default_value;
|
||||
}
|
||||
|
||||
void A64CodeCache::AddIndirection(uint32_t guest_address,
|
||||
uint32_t host_address) {
|
||||
if (!indirection_table_base_) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t* indirection_slot = reinterpret_cast<uint32_t*>(
|
||||
indirection_table_base_ + (guest_address - kIndirectionTableBase));
|
||||
*indirection_slot = host_address;
|
||||
}
|
||||
|
||||
void A64CodeCache::CommitExecutableRange(uint32_t guest_low,
|
||||
uint32_t guest_high) {
|
||||
if (!indirection_table_base_) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Commit the memory.
|
||||
xe::memory::AllocFixed(
|
||||
indirection_table_base_ + (guest_low - kIndirectionTableBase),
|
||||
guest_high - guest_low, xe::memory::AllocationType::kCommit,
|
||||
xe::memory::PageAccess::kReadWrite);
|
||||
|
||||
// Fill memory with the default value.
|
||||
uint32_t* p = reinterpret_cast<uint32_t*>(indirection_table_base_);
|
||||
for (uint32_t address = guest_low; address < guest_high; ++address) {
|
||||
p[(address - kIndirectionTableBase) / 4] = indirection_default_value_;
|
||||
}
|
||||
}
|
||||
|
||||
void A64CodeCache::PlaceHostCode(uint32_t guest_address, void* machine_code,
|
||||
const EmitFunctionInfo& func_info,
|
||||
void*& code_execute_address_out,
|
||||
void*& code_write_address_out) {
|
||||
// Same for now. We may use different pools or whatnot later on, like when
|
||||
// we only want to place guest code in a serialized cache on disk.
|
||||
PlaceGuestCode(guest_address, machine_code, func_info, nullptr,
|
||||
code_execute_address_out, code_write_address_out);
|
||||
}
|
||||
|
||||
void A64CodeCache::PlaceGuestCode(uint32_t guest_address, void* machine_code,
|
||||
const EmitFunctionInfo& func_info,
|
||||
GuestFunction* function_info,
|
||||
void*& code_execute_address_out,
|
||||
void*& code_write_address_out) {
|
||||
// Hold a lock while we bump the pointers up. This is important as the
|
||||
// unwind table requires entries AND code to be sorted in order.
|
||||
size_t low_mark;
|
||||
size_t high_mark;
|
||||
uint8_t* code_execute_address;
|
||||
UnwindReservation unwind_reservation;
|
||||
{
|
||||
auto global_lock = global_critical_region_.Acquire();
|
||||
|
||||
low_mark = generated_code_offset_;
|
||||
|
||||
// Reserve code.
|
||||
// Always move the code to land on 16b alignment.
|
||||
code_execute_address =
|
||||
generated_code_execute_base_ + generated_code_offset_;
|
||||
code_execute_address_out = code_execute_address;
|
||||
uint8_t* code_write_address =
|
||||
generated_code_write_base_ + generated_code_offset_;
|
||||
code_write_address_out = code_write_address;
|
||||
generated_code_offset_ += xe::round_up(func_info.code_size.total, 16);
|
||||
|
||||
auto tail_write_address =
|
||||
generated_code_write_base_ + generated_code_offset_;
|
||||
|
||||
// Reserve unwind info.
|
||||
// We go on the high size of the unwind info as we don't know how big we
|
||||
// need it, and a few extra bytes of padding isn't the worst thing.
|
||||
unwind_reservation = RequestUnwindReservation(generated_code_write_base_ +
|
||||
generated_code_offset_);
|
||||
generated_code_offset_ += xe::round_up(unwind_reservation.data_size, 16);
|
||||
|
||||
auto end_write_address =
|
||||
generated_code_write_base_ + generated_code_offset_;
|
||||
|
||||
high_mark = generated_code_offset_;
|
||||
|
||||
// Store in map. It is maintained in sorted order of host PC dependent on
|
||||
// us also being append-only.
|
||||
generated_code_map_.emplace_back(
|
||||
(uint64_t(code_execute_address - generated_code_execute_base_) << 32) |
|
||||
generated_code_offset_,
|
||||
function_info);
|
||||
|
||||
// TODO(DrChat): The following code doesn't really need to be under the
|
||||
// global lock except for PlaceCode (but it depends on the previous code
|
||||
// already being ran)
|
||||
|
||||
// If we are going above the high water mark of committed memory, commit
|
||||
// some more. It's ok if multiple threads do this, as redundant commits
|
||||
// aren't harmful.
|
||||
size_t old_commit_mark, new_commit_mark;
|
||||
do {
|
||||
old_commit_mark = generated_code_commit_mark_;
|
||||
if (high_mark <= old_commit_mark) break;
|
||||
|
||||
new_commit_mark = old_commit_mark + 16_MiB;
|
||||
if (generated_code_execute_base_ == generated_code_write_base_) {
|
||||
xe::memory::AllocFixed(generated_code_execute_base_, new_commit_mark,
|
||||
xe::memory::AllocationType::kCommit,
|
||||
xe::memory::PageAccess::kExecuteReadWrite);
|
||||
} else {
|
||||
xe::memory::AllocFixed(generated_code_execute_base_, new_commit_mark,
|
||||
xe::memory::AllocationType::kCommit,
|
||||
xe::memory::PageAccess::kExecuteReadOnly);
|
||||
xe::memory::AllocFixed(generated_code_write_base_, new_commit_mark,
|
||||
xe::memory::AllocationType::kCommit,
|
||||
xe::memory::PageAccess::kReadWrite);
|
||||
}
|
||||
} while (generated_code_commit_mark_.compare_exchange_weak(
|
||||
old_commit_mark, new_commit_mark));
|
||||
|
||||
// Copy code.
|
||||
std::memcpy(code_write_address, machine_code, func_info.code_size.total);
|
||||
|
||||
// Fill unused slots with 0x00
|
||||
std::memset(tail_write_address, 0x00,
|
||||
static_cast<size_t>(end_write_address - tail_write_address));
|
||||
|
||||
// Notify subclasses of placed code.
|
||||
PlaceCode(guest_address, machine_code, func_info, code_execute_address,
|
||||
unwind_reservation);
|
||||
}
|
||||
|
||||
// Now that everything is ready, fix up the indirection table.
|
||||
// Note that we do support code that doesn't have an indirection fixup, so
|
||||
// ignore those when we see them.
|
||||
if (guest_address && indirection_table_base_) {
|
||||
uint32_t* indirection_slot = reinterpret_cast<uint32_t*>(
|
||||
indirection_table_base_ + (guest_address - kIndirectionTableBase));
|
||||
*indirection_slot =
|
||||
uint32_t(reinterpret_cast<uint64_t>(code_execute_address));
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t A64CodeCache::PlaceData(const void* data, size_t length) {
|
||||
// Hold a lock while we bump the pointers up.
|
||||
size_t high_mark;
|
||||
uint8_t* data_address = nullptr;
|
||||
{
|
||||
auto global_lock = global_critical_region_.Acquire();
|
||||
|
||||
// Reserve code.
|
||||
// Always move the code to land on 16b alignment.
|
||||
data_address = generated_code_write_base_ + generated_code_offset_;
|
||||
generated_code_offset_ += xe::round_up(length, 16);
|
||||
|
||||
high_mark = generated_code_offset_;
|
||||
}
|
||||
|
||||
// If we are going above the high water mark of committed memory, commit some
|
||||
// more. It's ok if multiple threads do this, as redundant commits aren't
|
||||
// harmful.
|
||||
size_t old_commit_mark, new_commit_mark;
|
||||
do {
|
||||
old_commit_mark = generated_code_commit_mark_;
|
||||
if (high_mark <= old_commit_mark) break;
|
||||
|
||||
new_commit_mark = old_commit_mark + 16_MiB;
|
||||
if (generated_code_execute_base_ == generated_code_write_base_) {
|
||||
xe::memory::AllocFixed(generated_code_execute_base_, new_commit_mark,
|
||||
xe::memory::AllocationType::kCommit,
|
||||
xe::memory::PageAccess::kExecuteReadWrite);
|
||||
} else {
|
||||
xe::memory::AllocFixed(generated_code_execute_base_, new_commit_mark,
|
||||
xe::memory::AllocationType::kCommit,
|
||||
xe::memory::PageAccess::kExecuteReadOnly);
|
||||
xe::memory::AllocFixed(generated_code_write_base_, new_commit_mark,
|
||||
xe::memory::AllocationType::kCommit,
|
||||
xe::memory::PageAccess::kReadWrite);
|
||||
}
|
||||
} while (generated_code_commit_mark_.compare_exchange_weak(old_commit_mark,
|
||||
new_commit_mark));
|
||||
|
||||
// Copy code.
|
||||
std::memcpy(data_address, data, length);
|
||||
|
||||
return uint32_t(uintptr_t(data_address));
|
||||
}
|
||||
|
||||
GuestFunction* A64CodeCache::LookupFunction(uint64_t host_pc) {
|
||||
uint32_t key = uint32_t(host_pc - kGeneratedCodeExecuteBase);
|
||||
void* fn_entry = std::bsearch(
|
||||
&key, generated_code_map_.data(), generated_code_map_.size() + 1,
|
||||
sizeof(std::pair<uint32_t, Function*>),
|
||||
[](const void* key_ptr, const void* element_ptr) {
|
||||
auto key = *reinterpret_cast<const uint32_t*>(key_ptr);
|
||||
auto element =
|
||||
reinterpret_cast<const std::pair<uint64_t, GuestFunction*>*>(
|
||||
element_ptr);
|
||||
if (key < (element->first >> 32)) {
|
||||
return -1;
|
||||
} else if (key > uint32_t(element->first)) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
});
|
||||
if (fn_entry) {
|
||||
return reinterpret_cast<const std::pair<uint64_t, GuestFunction*>*>(
|
||||
fn_entry)
|
||||
->second;
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,151 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_CPU_BACKEND_A64_A64_CODE_CACHE_H_
|
||||
#define XENIA_CPU_BACKEND_A64_A64_CODE_CACHE_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/base/mutex.h"
|
||||
#include "xenia/cpu/backend/code_cache.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {
|
||||
|
||||
struct EmitFunctionInfo {
|
||||
struct _code_size {
|
||||
size_t prolog;
|
||||
size_t body;
|
||||
size_t epilog;
|
||||
size_t tail;
|
||||
size_t total;
|
||||
} code_size;
|
||||
size_t prolog_stack_alloc_offset; // offset of instruction after stack alloc
|
||||
size_t stack_size;
|
||||
};
|
||||
|
||||
class A64CodeCache : public CodeCache {
|
||||
public:
|
||||
~A64CodeCache() override;
|
||||
|
||||
static std::unique_ptr<A64CodeCache> Create();
|
||||
|
||||
virtual bool Initialize();
|
||||
|
||||
const std::filesystem::path& file_name() const override { return file_name_; }
|
||||
uintptr_t execute_base_address() const override {
|
||||
return kGeneratedCodeExecuteBase;
|
||||
}
|
||||
size_t total_size() const override { return kGeneratedCodeSize; }
|
||||
|
||||
// TODO(benvanik): ELF serialization/etc
|
||||
// TODO(benvanik): keep track of code blocks
|
||||
// TODO(benvanik): padding/guards/etc
|
||||
|
||||
bool has_indirection_table() { return indirection_table_base_ != nullptr; }
|
||||
void set_indirection_default(uint32_t default_value);
|
||||
void AddIndirection(uint32_t guest_address, uint32_t host_address);
|
||||
|
||||
void CommitExecutableRange(uint32_t guest_low, uint32_t guest_high);
|
||||
|
||||
void PlaceHostCode(uint32_t guest_address, void* machine_code,
|
||||
const EmitFunctionInfo& func_info,
|
||||
void*& code_execute_address_out,
|
||||
void*& code_write_address_out);
|
||||
void PlaceGuestCode(uint32_t guest_address, void* machine_code,
|
||||
const EmitFunctionInfo& func_info,
|
||||
GuestFunction* function_info,
|
||||
void*& code_execute_address_out,
|
||||
void*& code_write_address_out);
|
||||
uint32_t PlaceData(const void* data, size_t length);
|
||||
|
||||
GuestFunction* LookupFunction(uint64_t host_pc) override;
|
||||
|
||||
protected:
|
||||
// All executable code falls within 0x80000000 to 0x9FFFFFFF, so we can
|
||||
// only map enough for lookups within that range.
|
||||
static const size_t kIndirectionTableSize = 0x1FFFFFFF;
|
||||
static const uintptr_t kIndirectionTableBase = 0x80000000;
|
||||
// The code range is 512MB, but we know the total code games will have is
|
||||
// pretty small (dozens of mb at most) and our expansion is reasonablish
|
||||
// so 256MB should be more than enough.
|
||||
static const size_t kGeneratedCodeSize = 0x0FFFFFFF;
|
||||
static const uintptr_t kGeneratedCodeExecuteBase = 0xA0000000;
|
||||
// Used for writing when PageAccess::kExecuteReadWrite is not supported.
|
||||
static const uintptr_t kGeneratedCodeWriteBase =
|
||||
kGeneratedCodeExecuteBase + kGeneratedCodeSize + 1;
|
||||
|
||||
// This is picked to be high enough to cover whatever we can reasonably
|
||||
// expect. If we hit issues with this it probably means some corner case
|
||||
// in analysis triggering.
|
||||
static const size_t kMaximumFunctionCount = 100000;
|
||||
|
||||
struct UnwindReservation {
|
||||
size_t data_size = 0;
|
||||
size_t table_slot = 0;
|
||||
uint8_t* entry_address = 0;
|
||||
};
|
||||
|
||||
A64CodeCache();
|
||||
|
||||
virtual UnwindReservation RequestUnwindReservation(uint8_t* entry_address) {
|
||||
return UnwindReservation();
|
||||
}
|
||||
virtual void PlaceCode(uint32_t guest_address, void* machine_code,
|
||||
const EmitFunctionInfo& func_info,
|
||||
void* code_execute_address,
|
||||
UnwindReservation unwind_reservation) {}
|
||||
|
||||
std::filesystem::path file_name_;
|
||||
xe::memory::FileMappingHandle mapping_ =
|
||||
xe::memory::kFileMappingHandleInvalid;
|
||||
|
||||
// NOTE: the global critical region must be held when manipulating the offsets
|
||||
// or counts of anything, to keep the tables consistent and ordered.
|
||||
xe::global_critical_region global_critical_region_;
|
||||
|
||||
// Value that the indirection table will be initialized with upon commit.
|
||||
uint32_t indirection_default_value_ = 0xFEEDF00D;
|
||||
|
||||
// Fixed at kIndirectionTableBase in host space, holding 4 byte pointers into
|
||||
// the generated code table that correspond to the PPC functions in guest
|
||||
// space.
|
||||
uint8_t* indirection_table_base_ = nullptr;
|
||||
// Fixed at kGeneratedCodeExecuteBase and holding all generated code, growing
|
||||
// as needed.
|
||||
uint8_t* generated_code_execute_base_ = nullptr;
|
||||
// View of the memory that backs generated_code_execute_base_ when
|
||||
// PageAccess::kExecuteReadWrite is not supported, for writing the generated
|
||||
// code. Equals to generated_code_execute_base_ when it's supported.
|
||||
uint8_t* generated_code_write_base_ = nullptr;
|
||||
// Current offset to empty space in generated code.
|
||||
size_t generated_code_offset_ = 0;
|
||||
// Current high water mark of COMMITTED code.
|
||||
std::atomic<size_t> generated_code_commit_mark_ = {0};
|
||||
// Sorted map by host PC base offsets to source function info.
|
||||
// This can be used to bsearch on host PC to find the guest function.
|
||||
// The key is [start address | end address].
|
||||
std::vector<std::pair<uint64_t, GuestFunction*>> generated_code_map_;
|
||||
};
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_CPU_BACKEND_A64_A64_CODE_CACHE_H_
|
|
@ -0,0 +1,319 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/cpu/backend/a64/a64_code_cache.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/clock.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/base/platform_win.h"
|
||||
#include "xenia/cpu/function.h"
|
||||
|
||||
// Function pointer definitions
|
||||
using FnRtlAddGrowableFunctionTable = decltype(&RtlAddGrowableFunctionTable);
|
||||
using FnRtlGrowFunctionTable = decltype(&RtlGrowFunctionTable);
|
||||
using FnRtlDeleteGrowableFunctionTable =
|
||||
decltype(&RtlDeleteGrowableFunctionTable);
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {
|
||||
|
||||
// ARM64 unwind-op codes
|
||||
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling#unwind-codes
|
||||
// https://www.corsix.org/content/windows-arm64-unwind-codes
|
||||
typedef enum _UNWIND_OP_CODES {
|
||||
UWOP_NOP = 0xE3,
|
||||
UWOP_ALLOC_S = 0x00, // sub sp, sp, i*16
|
||||
UWOP_ALLOC_L = 0xE0'00'00'00, // sub sp, sp, i*16
|
||||
UWOP_SAVE_FPLR = 0x40, // stp fp, lr, [sp+i*8]
|
||||
UWOP_SAVE_FPLRX = 0x80, // stp fp, lr, [sp-(i+1)*8]!
|
||||
UWOP_SET_FP = 0xE1, // mov fp, sp
|
||||
UWOP_END = 0xE4,
|
||||
} UNWIND_CODE_OPS;
|
||||
|
||||
using UNWIND_CODE = uint32_t;
|
||||
|
||||
static_assert(sizeof(UNWIND_CODE) == sizeof(uint32_t));
|
||||
|
||||
// UNWIND_INFO defines the static part (first 32-bit) of the .xdata record
|
||||
typedef struct _UNWIND_INFO {
|
||||
uint32_t FunctionLength : 18;
|
||||
uint32_t Version : 2;
|
||||
uint32_t X : 1;
|
||||
uint32_t E : 1;
|
||||
uint32_t EpilogCount : 5;
|
||||
uint32_t CodeWords : 5;
|
||||
UNWIND_CODE UnwindCodes[2];
|
||||
} UNWIND_INFO, *PUNWIND_INFO;
|
||||
|
||||
static_assert(offsetof(UNWIND_INFO, UnwindCodes[0]) == 4);
|
||||
static_assert(offsetof(UNWIND_INFO, UnwindCodes[1]) == 8);
|
||||
|
||||
// Size of unwind info per function.
|
||||
static const uint32_t kUnwindInfoSize = sizeof(UNWIND_INFO);
|
||||
|
||||
class Win32A64CodeCache : public A64CodeCache {
|
||||
public:
|
||||
Win32A64CodeCache();
|
||||
~Win32A64CodeCache() override;
|
||||
|
||||
bool Initialize() override;
|
||||
|
||||
void* LookupUnwindInfo(uint64_t host_pc) override;
|
||||
|
||||
private:
|
||||
UnwindReservation RequestUnwindReservation(uint8_t* entry_address) override;
|
||||
void PlaceCode(uint32_t guest_address, void* machine_code,
|
||||
const EmitFunctionInfo& func_info, void* code_execute_address,
|
||||
UnwindReservation unwind_reservation) override;
|
||||
|
||||
void InitializeUnwindEntry(uint8_t* unwind_entry_address,
|
||||
size_t unwind_table_slot,
|
||||
void* code_execute_address,
|
||||
const EmitFunctionInfo& func_info);
|
||||
|
||||
// Growable function table system handle.
|
||||
void* unwind_table_handle_ = nullptr;
|
||||
// Actual unwind table entries.
|
||||
std::vector<RUNTIME_FUNCTION> unwind_table_;
|
||||
// Current number of entries in the table.
|
||||
std::atomic<uint32_t> unwind_table_count_ = {0};
|
||||
// Does this version of Windows support growable funciton tables?
|
||||
bool supports_growable_table_ = false;
|
||||
|
||||
FnRtlAddGrowableFunctionTable add_growable_table_ = nullptr;
|
||||
FnRtlDeleteGrowableFunctionTable delete_growable_table_ = nullptr;
|
||||
FnRtlGrowFunctionTable grow_table_ = nullptr;
|
||||
};
|
||||
|
||||
std::unique_ptr<A64CodeCache> A64CodeCache::Create() {
|
||||
return std::make_unique<Win32A64CodeCache>();
|
||||
}
|
||||
|
||||
Win32A64CodeCache::Win32A64CodeCache() = default;
|
||||
|
||||
Win32A64CodeCache::~Win32A64CodeCache() {
|
||||
if (supports_growable_table_) {
|
||||
if (unwind_table_handle_) {
|
||||
delete_growable_table_(unwind_table_handle_);
|
||||
}
|
||||
} else {
|
||||
if (generated_code_execute_base_) {
|
||||
RtlDeleteFunctionTable(reinterpret_cast<PRUNTIME_FUNCTION>(
|
||||
reinterpret_cast<DWORD64>(generated_code_execute_base_) | 0x3));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool Win32A64CodeCache::Initialize() {
|
||||
if (!A64CodeCache::Initialize()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Compute total number of unwind entries we should allocate.
|
||||
// We don't support reallocing right now, so this should be high.
|
||||
unwind_table_.resize(kMaximumFunctionCount);
|
||||
|
||||
// Check if this version of Windows supports growable function tables.
|
||||
auto ntdll_handle = GetModuleHandleW(L"ntdll.dll");
|
||||
if (!ntdll_handle) {
|
||||
add_growable_table_ = nullptr;
|
||||
delete_growable_table_ = nullptr;
|
||||
grow_table_ = nullptr;
|
||||
} else {
|
||||
add_growable_table_ = (FnRtlAddGrowableFunctionTable)GetProcAddress(
|
||||
ntdll_handle, "RtlAddGrowableFunctionTable");
|
||||
delete_growable_table_ = (FnRtlDeleteGrowableFunctionTable)GetProcAddress(
|
||||
ntdll_handle, "RtlDeleteGrowableFunctionTable");
|
||||
grow_table_ = (FnRtlGrowFunctionTable)GetProcAddress(
|
||||
ntdll_handle, "RtlGrowFunctionTable");
|
||||
}
|
||||
supports_growable_table_ =
|
||||
add_growable_table_ && delete_growable_table_ && grow_table_;
|
||||
|
||||
// Create table and register with the system. It's empty now, but we'll grow
|
||||
// it as functions are added.
|
||||
if (supports_growable_table_) {
|
||||
if (add_growable_table_(
|
||||
&unwind_table_handle_, unwind_table_.data(), unwind_table_count_,
|
||||
DWORD(unwind_table_.size()),
|
||||
reinterpret_cast<ULONG_PTR>(generated_code_execute_base_),
|
||||
reinterpret_cast<ULONG_PTR>(generated_code_execute_base_ +
|
||||
kGeneratedCodeSize))) {
|
||||
XELOGE("Unable to create unwind function table");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// Install a callback that the debugger will use to lookup unwind info on
|
||||
// demand.
|
||||
if (!RtlInstallFunctionTableCallback(
|
||||
reinterpret_cast<DWORD64>(generated_code_execute_base_) | 0x3,
|
||||
reinterpret_cast<DWORD64>(generated_code_execute_base_),
|
||||
kGeneratedCodeSize,
|
||||
[](DWORD64 control_pc, PVOID context) {
|
||||
auto code_cache = reinterpret_cast<Win32A64CodeCache*>(context);
|
||||
return reinterpret_cast<PRUNTIME_FUNCTION>(
|
||||
code_cache->LookupUnwindInfo(control_pc));
|
||||
},
|
||||
this, nullptr)) {
|
||||
XELOGE("Unable to install function table callback");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Win32A64CodeCache::UnwindReservation
|
||||
Win32A64CodeCache::RequestUnwindReservation(uint8_t* entry_address) {
|
||||
assert_false(unwind_table_count_ >= kMaximumFunctionCount);
|
||||
UnwindReservation unwind_reservation;
|
||||
unwind_reservation.data_size = xe::round_up(kUnwindInfoSize, 16);
|
||||
unwind_reservation.table_slot = unwind_table_count_++;
|
||||
unwind_reservation.entry_address = entry_address;
|
||||
return unwind_reservation;
|
||||
}
|
||||
|
||||
void Win32A64CodeCache::PlaceCode(uint32_t guest_address, void* machine_code,
|
||||
const EmitFunctionInfo& func_info,
|
||||
void* code_execute_address,
|
||||
UnwindReservation unwind_reservation) {
|
||||
// Add unwind info.
|
||||
InitializeUnwindEntry(unwind_reservation.entry_address,
|
||||
unwind_reservation.table_slot, code_execute_address,
|
||||
func_info);
|
||||
|
||||
if (supports_growable_table_) {
|
||||
// Notify that the unwind table has grown.
|
||||
// We do this outside of the lock, but with the latest total count.
|
||||
grow_table_(unwind_table_handle_, unwind_table_count_);
|
||||
}
|
||||
|
||||
// https://docs.microsoft.com/en-us/uwp/win32-and-com/win32-apis
|
||||
FlushInstructionCache(GetCurrentProcess(), code_execute_address,
|
||||
func_info.code_size.total);
|
||||
}
|
||||
|
||||
constexpr UNWIND_CODE UnwindOpWord(uint8_t code0 = UWOP_NOP,
|
||||
uint8_t code1 = UWOP_NOP,
|
||||
uint8_t code2 = UWOP_NOP,
|
||||
uint8_t code3 = UWOP_NOP) {
|
||||
return static_cast<uint32_t>(code0) | (static_cast<uint32_t>(code1) << 8) |
|
||||
(static_cast<uint32_t>(code2) << 16) |
|
||||
(static_cast<uint32_t>(code3) << 24);
|
||||
}
|
||||
|
||||
// 8-byte unwind code for "stp fp, lr, [sp, #-16]!
|
||||
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling#unwind-codes
|
||||
static uint8_t OpSaveFpLrX(int16_t pre_index_offset) {
|
||||
assert_true(pre_index_offset <= -8);
|
||||
assert_true(pre_index_offset >= -512);
|
||||
// 16-byte aligned
|
||||
constexpr int IndexShift = 3;
|
||||
constexpr int IndexMask = (1 << IndexShift) - 1;
|
||||
assert_true((pre_index_offset & IndexMask) == 0);
|
||||
const uint32_t encoded_value = (-pre_index_offset >> IndexShift) - 1;
|
||||
return UWOP_SAVE_FPLRX | encoded_value;
|
||||
}
|
||||
|
||||
// Ensure a 16-byte aligned stack
|
||||
static constexpr size_t StackAlignShift = 4; // n / 16
|
||||
static constexpr size_t StackAlignMask = (1 << StackAlignShift) - 1; // n % 16
|
||||
|
||||
// 8-byte unwind code for up to +512-byte "sub sp, sp, #stack_space"
|
||||
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling#unwind-codes
|
||||
static uint8_t OpAllocS(int16_t stack_space) {
|
||||
assert_true(stack_space >= 0);
|
||||
assert_true(stack_space < 512);
|
||||
assert_true((stack_space & StackAlignMask) == 0);
|
||||
return UWOP_ALLOC_S | (stack_space >> StackAlignShift);
|
||||
}
|
||||
|
||||
// 4-byte unwind code for +256MiB "sub sp, sp, #stack_space"
|
||||
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling#unwind-codes
|
||||
uint32_t OpAllocL(int32_t stack_space) {
|
||||
assert_true(stack_space >= 0);
|
||||
assert_true(stack_space < (0xFFFFFF * 16));
|
||||
assert_true((stack_space & StackAlignMask) == 0);
|
||||
return xe::byte_swap(UWOP_ALLOC_L |
|
||||
((stack_space >> StackAlignShift) & 0xFF'FF'FF));
|
||||
}
|
||||
|
||||
void Win32A64CodeCache::InitializeUnwindEntry(
|
||||
uint8_t* unwind_entry_address, size_t unwind_table_slot,
|
||||
void* code_execute_address, const EmitFunctionInfo& func_info) {
|
||||
auto unwind_info = reinterpret_cast<UNWIND_INFO*>(unwind_entry_address);
|
||||
|
||||
*unwind_info = {};
|
||||
// ARM64 instructions are always multiples of 4 bytes
|
||||
// Windows ignores the bottom 2 bits
|
||||
unwind_info->FunctionLength = func_info.code_size.total / 4;
|
||||
unwind_info->CodeWords = 2;
|
||||
|
||||
// https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling?view=msvc-170#unwind-codes
|
||||
// The array of unwind codes is a pool of sequences that describe exactly how
|
||||
// to undo the effects of the prolog. They're stored in the same order the
|
||||
// operations need to be undone. The unwind codes can be thought of as a small
|
||||
// instruction set, encoded as a string of bytes. When execution is complete,
|
||||
// the return address to the calling function is in the lr register. And, all
|
||||
// non-volatile registers are restored to their values at the time the
|
||||
// function was called.
|
||||
|
||||
// Function frames are generally:
|
||||
// STP(X29, X30, SP, PRE_INDEXED, -16);
|
||||
// MOV(X29, XSP);
|
||||
// SUB(XSP, XSP, stack_size);
|
||||
// ... function body ...
|
||||
// ADD(XSP, XSP, stack_size);
|
||||
// MOV(XSP, X29);
|
||||
// LDP(X29, X30, SP, POST_INDEXED, 16);
|
||||
|
||||
// These opcodes must undo the epilog and put the return address within lr
|
||||
unwind_info->UnwindCodes[0] = OpAllocL(func_info.stack_size);
|
||||
unwind_info->UnwindCodes[1] =
|
||||
UnwindOpWord(UWOP_SET_FP, OpSaveFpLrX(-16), UWOP_END);
|
||||
|
||||
// Add entry.
|
||||
RUNTIME_FUNCTION& fn_entry = unwind_table_[unwind_table_slot];
|
||||
fn_entry.BeginAddress =
|
||||
DWORD(reinterpret_cast<uint8_t*>(code_execute_address) -
|
||||
generated_code_execute_base_);
|
||||
fn_entry.UnwindData =
|
||||
DWORD(unwind_entry_address - generated_code_execute_base_);
|
||||
}
|
||||
|
||||
void* Win32A64CodeCache::LookupUnwindInfo(uint64_t host_pc) {
|
||||
return std::bsearch(
|
||||
&host_pc, unwind_table_.data(), unwind_table_count_,
|
||||
sizeof(RUNTIME_FUNCTION),
|
||||
[](const void* key_ptr, const void* element_ptr) {
|
||||
auto key = *reinterpret_cast<const uintptr_t*>(key_ptr) -
|
||||
kGeneratedCodeExecuteBase;
|
||||
auto element = reinterpret_cast<const RUNTIME_FUNCTION*>(element_ptr);
|
||||
if (key < element->BeginAddress) {
|
||||
return -1;
|
||||
} else if (key > (element->BeginAddress + element->FunctionLength)) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,995 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/cpu/backend/a64/a64_emitter.h"
|
||||
#include "xenia/cpu/backend/a64/a64_util.h"
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
|
||||
#include "third_party/fmt/include/fmt/format.h"
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/atomic.h"
|
||||
#include "xenia/base/debugging.h"
|
||||
#include "xenia/base/literals.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/base/vec128.h"
|
||||
#include "xenia/cpu/backend/a64/a64_backend.h"
|
||||
#include "xenia/cpu/backend/a64/a64_code_cache.h"
|
||||
#include "xenia/cpu/backend/a64/a64_function.h"
|
||||
#include "xenia/cpu/backend/a64/a64_sequences.h"
|
||||
#include "xenia/cpu/backend/a64/a64_stack_layout.h"
|
||||
#include "xenia/cpu/cpu_flags.h"
|
||||
#include "xenia/cpu/function.h"
|
||||
#include "xenia/cpu/function_debug_info.h"
|
||||
#include "xenia/cpu/processor.h"
|
||||
#include "xenia/cpu/symbol.h"
|
||||
#include "xenia/cpu/thread_state.h"
|
||||
|
||||
#include "oaknut/feature_detection/cpu_feature.hpp"
|
||||
#include "oaknut/feature_detection/feature_detection.hpp"
|
||||
#include "oaknut/feature_detection/feature_detection_idregs.hpp"
|
||||
|
||||
DEFINE_bool(debugprint_trap_log, false,
|
||||
"Log debugprint traps to the active debugger", "CPU");
|
||||
DEFINE_bool(ignore_undefined_externs, true,
|
||||
"Don't exit when an undefined extern is called.", "CPU");
|
||||
DEFINE_bool(emit_source_annotations, false,
|
||||
"Add extra movs and nops to make disassembly easier to read.",
|
||||
"CPU");
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {
|
||||
|
||||
using xe::cpu::hir::HIRBuilder;
|
||||
using xe::cpu::hir::Instr;
|
||||
using namespace xe::literals;
|
||||
using namespace oaknut::util;
|
||||
|
||||
static const size_t kStashOffset = 32;
|
||||
// static const size_t kStashOffsetHigh = 32 + 32;
|
||||
|
||||
// Register indices that the HIR is allowed to use for operands
|
||||
const uint8_t A64Emitter::gpr_reg_map_[A64Emitter::GPR_COUNT] = {
|
||||
19, 20, 21, 22, 23, 24, 25, 26,
|
||||
};
|
||||
|
||||
const uint8_t A64Emitter::fpr_reg_map_[A64Emitter::FPR_COUNT] = {
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
};
|
||||
|
||||
A64Emitter::A64Emitter(A64Backend* backend)
|
||||
: VectorCodeGenerator(assembly_buffer),
|
||||
processor_(backend->processor()),
|
||||
backend_(backend),
|
||||
code_cache_(backend->code_cache()) {
|
||||
oaknut::CpuFeatures cpu_ = oaknut::detect_features();
|
||||
|
||||
// Combine with id register detection
|
||||
#if OAKNUT_SUPPORTS_READING_ID_REGISTERS > 0
|
||||
#if OAKNUT_SUPPORTS_READING_ID_REGISTERS == 1
|
||||
const std::optional<oaknut::id::IdRegisters> id_registers =
|
||||
oaknut::read_id_registers();
|
||||
#elif OAKNUT_SUPPORTS_READING_ID_REGISTERS == 2
|
||||
const std::optional<oaknut::id::IdRegisters> id_registers =
|
||||
oaknut::read_id_registers(0);
|
||||
#endif
|
||||
if (id_registers.has_value()) {
|
||||
cpu_ = cpu_ | oaknut::detect_features_via_id_registers(*id_registers);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define TEST_EMIT_FEATURE(emit, ext) \
|
||||
if ((cvars::a64_extension_mask & emit) == emit) { \
|
||||
feature_flags_ |= (cpu_.has(ext) ? emit : 0); \
|
||||
}
|
||||
|
||||
TEST_EMIT_FEATURE(kA64EmitLSE, oaknut::CpuFeature::LSE);
|
||||
TEST_EMIT_FEATURE(kA64EmitF16C, oaknut::CpuFeature::FP16Conv);
|
||||
|
||||
#undef TEST_EMIT_FEATURE
|
||||
}
|
||||
|
||||
A64Emitter::~A64Emitter() = default;
|
||||
|
||||
bool A64Emitter::Emit(GuestFunction* function, HIRBuilder* builder,
|
||||
uint32_t debug_info_flags, FunctionDebugInfo* debug_info,
|
||||
void** out_code_address, size_t* out_code_size,
|
||||
std::vector<SourceMapEntry>* out_source_map) {
|
||||
SCOPE_profile_cpu_f("cpu");
|
||||
|
||||
// Reset.
|
||||
debug_info_ = debug_info;
|
||||
debug_info_flags_ = debug_info_flags;
|
||||
trace_data_ = &function->trace_data();
|
||||
source_map_arena_.Reset();
|
||||
|
||||
// Fill the generator with code.
|
||||
EmitFunctionInfo func_info = {};
|
||||
if (!Emit(builder, func_info)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy the final code to the cache and relocate it.
|
||||
*out_code_size = offset();
|
||||
*out_code_address = Emplace(func_info, function);
|
||||
|
||||
// Stash source map.
|
||||
source_map_arena_.CloneContents(out_source_map);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void* A64Emitter::Emplace(const EmitFunctionInfo& func_info,
|
||||
GuestFunction* function) {
|
||||
// Copy the current oaknut instruction-buffer into the code-cache
|
||||
void* new_execute_address;
|
||||
void* new_write_address;
|
||||
|
||||
assert_true(func_info.code_size.total == offset());
|
||||
|
||||
if (function) {
|
||||
code_cache_->PlaceGuestCode(function->address(), assembly_buffer.data(),
|
||||
func_info, function, new_execute_address,
|
||||
new_write_address);
|
||||
} else {
|
||||
code_cache_->PlaceHostCode(0, assembly_buffer.data(), func_info,
|
||||
new_execute_address, new_write_address);
|
||||
}
|
||||
|
||||
// Reset the oaknut instruction-buffer
|
||||
assembly_buffer.clear();
|
||||
label_lookup_.clear();
|
||||
|
||||
return new_execute_address;
|
||||
}
|
||||
|
||||
bool A64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
|
||||
oaknut::Label epilog_label;
|
||||
epilog_label_ = &epilog_label;
|
||||
|
||||
// Calculate stack size. We need to align things to their natural sizes.
|
||||
// This could be much better (sort by type/etc).
|
||||
auto locals = builder->locals();
|
||||
size_t stack_offset = StackLayout::GUEST_STACK_SIZE;
|
||||
for (auto it = locals.begin(); it != locals.end(); ++it) {
|
||||
auto slot = *it;
|
||||
size_t type_size = GetTypeSize(slot->type);
|
||||
|
||||
// Align to natural size.
|
||||
stack_offset = xe::align(stack_offset, type_size);
|
||||
slot->set_constant((uint32_t)stack_offset);
|
||||
stack_offset += type_size;
|
||||
}
|
||||
|
||||
// Ensure 16b alignment.
|
||||
stack_offset -= StackLayout::GUEST_STACK_SIZE;
|
||||
stack_offset = xe::align(stack_offset, static_cast<size_t>(16));
|
||||
|
||||
struct _code_offsets {
|
||||
size_t prolog;
|
||||
size_t prolog_stack_alloc;
|
||||
size_t body;
|
||||
size_t epilog;
|
||||
size_t tail;
|
||||
} code_offsets = {};
|
||||
|
||||
code_offsets.prolog = offset();
|
||||
|
||||
// Function prolog.
|
||||
// Must be 16b aligned.
|
||||
// Windows is very strict about the form of this and the epilog:
|
||||
// https://docs.microsoft.com/en-us/cpp/build/prolog-and-epilog?view=vs-2017
|
||||
// IMPORTANT: any changes to the prolog must be kept in sync with
|
||||
// A64CodeCache, which dynamically generates exception information.
|
||||
// Adding or changing anything here must be matched!
|
||||
size_t stack_size = StackLayout::GUEST_STACK_SIZE + stack_offset;
|
||||
|
||||
// The SUB instruction can only encode immediates withi 0xFFF or 0xFFF000
|
||||
// If the stack size is greater than 0xFFF, then just align it to 0x1000
|
||||
if (stack_size > 0xFFF) {
|
||||
stack_size = xe::align(stack_size, static_cast<size_t>(0x1000));
|
||||
}
|
||||
|
||||
assert_true(stack_size % 16 == 0);
|
||||
func_info.stack_size = stack_size;
|
||||
stack_size_ = stack_size;
|
||||
|
||||
STP(X29, X30, SP, PRE_INDEXED, -16);
|
||||
MOV(X29, SP);
|
||||
|
||||
SUB(SP, SP, (uint32_t)stack_size);
|
||||
|
||||
code_offsets.prolog_stack_alloc = offset();
|
||||
code_offsets.body = offset();
|
||||
|
||||
STR(GetContextReg(), SP, StackLayout::GUEST_CTX_HOME);
|
||||
STR(X0, SP, StackLayout::GUEST_RET_ADDR);
|
||||
STR(XZR, SP, StackLayout::GUEST_CALL_RET_ADDR);
|
||||
|
||||
// Safe now to do some tracing.
|
||||
if (debug_info_flags_ & DebugInfoFlags::kDebugInfoTraceFunctions) {
|
||||
// We require 32-bit addresses.
|
||||
assert_true(uint64_t(trace_data_->header()) < UINT_MAX);
|
||||
auto trace_header = trace_data_->header();
|
||||
|
||||
// Call count.
|
||||
MOV(W0, 1);
|
||||
MOV(X5, reinterpret_cast<uintptr_t>(
|
||||
low_address(&trace_header->function_call_count)));
|
||||
LDADDAL(X0, X0, X5);
|
||||
|
||||
// Get call history slot.
|
||||
static_assert(FunctionTraceData::kFunctionCallerHistoryCount == 4,
|
||||
"bitmask depends on count");
|
||||
LDR(X0, X5);
|
||||
AND(W0, W0, 0b00000011);
|
||||
|
||||
// Record call history value into slot (guest addr in W1).
|
||||
MOV(X5, reinterpret_cast<uintptr_t>(
|
||||
low_address(&trace_header->function_caller_history)));
|
||||
STR(W1, X5, X0, oaknut::IndexExt::LSL, 2);
|
||||
|
||||
// Calling thread. Load X0 with thread ID.
|
||||
EmitGetCurrentThreadId();
|
||||
MOV(W5, 1);
|
||||
LSL(W0, W5, W0);
|
||||
|
||||
MOV(X5, reinterpret_cast<uintptr_t>(
|
||||
low_address(&trace_header->function_thread_use)));
|
||||
LDSET(W0, WZR, X5);
|
||||
}
|
||||
|
||||
// Load membase.
|
||||
LDR(GetMembaseReg(), GetContextReg(),
|
||||
offsetof(ppc::PPCContext, virtual_membase));
|
||||
|
||||
// Body.
|
||||
auto block = builder->first_block();
|
||||
while (block) {
|
||||
// Mark block labels.
|
||||
auto label = block->label_head;
|
||||
while (label) {
|
||||
l(label_lookup_[label->name]);
|
||||
label = label->next;
|
||||
}
|
||||
|
||||
// Process instructions.
|
||||
const Instr* instr = block->instr_head;
|
||||
while (instr) {
|
||||
const Instr* new_tail = instr;
|
||||
if (!SelectSequence(this, instr, &new_tail)) {
|
||||
// No sequence found!
|
||||
// NOTE: If you encounter this after adding a new instruction, do a full
|
||||
// rebuild!
|
||||
assert_always();
|
||||
XELOGE("Unable to process HIR opcode {}", instr->opcode->name);
|
||||
break;
|
||||
}
|
||||
instr = new_tail;
|
||||
}
|
||||
|
||||
block = block->next;
|
||||
}
|
||||
|
||||
// Function epilog.
|
||||
l(epilog_label);
|
||||
epilog_label_ = nullptr;
|
||||
EmitTraceUserCallReturn();
|
||||
LDR(GetContextReg(), SP, StackLayout::GUEST_CTX_HOME);
|
||||
|
||||
code_offsets.epilog = offset();
|
||||
|
||||
ADD(SP, SP, (uint32_t)stack_size);
|
||||
|
||||
MOV(SP, X29);
|
||||
LDP(X29, X30, SP, POST_INDEXED, 16);
|
||||
|
||||
RET();
|
||||
|
||||
code_offsets.tail = offset();
|
||||
|
||||
if (cvars::emit_source_annotations) {
|
||||
NOP();
|
||||
NOP();
|
||||
NOP();
|
||||
NOP();
|
||||
NOP();
|
||||
}
|
||||
|
||||
assert_zero(code_offsets.prolog);
|
||||
func_info.code_size.total = offset();
|
||||
func_info.code_size.prolog = code_offsets.body - code_offsets.prolog;
|
||||
func_info.code_size.body = code_offsets.epilog - code_offsets.body;
|
||||
func_info.code_size.epilog = code_offsets.tail - code_offsets.epilog;
|
||||
func_info.code_size.tail = offset() - code_offsets.tail;
|
||||
func_info.prolog_stack_alloc_offset =
|
||||
code_offsets.prolog_stack_alloc - code_offsets.prolog;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void A64Emitter::MarkSourceOffset(const Instr* i) {
|
||||
auto entry = source_map_arena_.Alloc<SourceMapEntry>();
|
||||
entry->guest_address = static_cast<uint32_t>(i->src1.offset);
|
||||
entry->hir_offset = uint32_t(i->block->ordinal << 16) | i->ordinal;
|
||||
entry->code_offset = static_cast<uint32_t>(offset());
|
||||
|
||||
if (cvars::emit_source_annotations) {
|
||||
NOP();
|
||||
NOP();
|
||||
MOV(X0, entry->guest_address);
|
||||
NOP();
|
||||
NOP();
|
||||
}
|
||||
|
||||
if (debug_info_flags_ & DebugInfoFlags::kDebugInfoTraceFunctionCoverage) {
|
||||
const uint32_t instruction_index =
|
||||
(entry->guest_address - trace_data_->start_address()) / 4;
|
||||
MOV(X0, 1);
|
||||
MOV(X1, reinterpret_cast<uintptr_t>(
|
||||
low_address(trace_data_->instruction_execute_counts() +
|
||||
instruction_index * 8)));
|
||||
LDADDAL(X0, ZR, X1);
|
||||
}
|
||||
}
|
||||
|
||||
void A64Emitter::EmitGetCurrentThreadId() {
|
||||
// X27 must point to context. We could fetch from the stack if needed.
|
||||
LDRH(W0, GetContextReg(), offsetof(ppc::PPCContext, thread_id));
|
||||
}
|
||||
|
||||
void A64Emitter::EmitTraceUserCallReturn() {}
|
||||
|
||||
void A64Emitter::DebugBreak() { BRK(0xF000); }
|
||||
|
||||
uint64_t TrapDebugPrint(void* raw_context, uint64_t address) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
uint32_t str_ptr = uint32_t(thread_state->context()->r[3]);
|
||||
// uint16_t str_len = uint16_t(thread_state->context()->r[4]);
|
||||
auto str = thread_state->memory()->TranslateVirtual<const char*>(str_ptr);
|
||||
// TODO(benvanik): truncate to length?
|
||||
XELOGD("(DebugPrint) {}", str);
|
||||
|
||||
if (cvars::debugprint_trap_log) {
|
||||
debugging::DebugPrint("(DebugPrint) {}", str);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t TrapDebugBreak(void* raw_context, uint64_t address) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
XELOGE("tw/td forced trap hit! This should be a crash!");
|
||||
if (cvars::break_on_debugbreak) {
|
||||
xe::debugging::Break();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void A64Emitter::Trap(uint16_t trap_type) {
|
||||
switch (trap_type) {
|
||||
case 20:
|
||||
case 26:
|
||||
// 0x0FE00014 is a 'debug print' where r3 = buffer r4 = length
|
||||
CallNative(TrapDebugPrint, 0);
|
||||
break;
|
||||
case 0:
|
||||
case 22:
|
||||
// Always trap?
|
||||
// TODO(benvanik): post software interrupt to debugger.
|
||||
CallNative(TrapDebugBreak, 0);
|
||||
break;
|
||||
case 25:
|
||||
// ?
|
||||
break;
|
||||
default:
|
||||
XELOGW("Unknown trap type {}", trap_type);
|
||||
BRK(0xF000);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void A64Emitter::UnimplementedInstr(const hir::Instr* i) {
|
||||
// TODO(benvanik): notify debugger.
|
||||
BRK(0xF000);
|
||||
assert_always();
|
||||
}
|
||||
|
||||
// This is used by the A64ThunkEmitter's ResolveFunctionThunk.
|
||||
uint64_t ResolveFunction(void* raw_context, uint64_t target_address) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
|
||||
// TODO(benvanik): required?
|
||||
assert_not_zero(target_address);
|
||||
|
||||
auto fn = thread_state->processor()->ResolveFunction(
|
||||
static_cast<uint32_t>(target_address));
|
||||
assert_not_null(fn);
|
||||
auto a64_fn = static_cast<A64Function*>(fn);
|
||||
uint64_t addr = reinterpret_cast<uint64_t>(a64_fn->machine_code());
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
void A64Emitter::Call(const hir::Instr* instr, GuestFunction* function) {
|
||||
assert_not_null(function);
|
||||
auto fn = static_cast<A64Function*>(function);
|
||||
// Resolve address to the function to call and store in X16.
|
||||
if (fn->machine_code()) {
|
||||
// TODO(benvanik): is it worth it to do this? It removes the need for
|
||||
// a ResolveFunction call, but makes the table less useful.
|
||||
assert_zero(uint64_t(fn->machine_code()) & 0xFFFFFFFF00000000);
|
||||
MOV(X16, uint32_t(uint64_t(fn->machine_code())));
|
||||
} else if (code_cache_->has_indirection_table()) {
|
||||
// Load the pointer to the indirection table maintained in A64CodeCache.
|
||||
// The target dword will either contain the address of the generated code
|
||||
// or a thunk to ResolveAddress.
|
||||
MOV(W17, function->address());
|
||||
LDR(W16, X17);
|
||||
} else {
|
||||
// Old-style resolve.
|
||||
// Not too important because indirection table is almost always available.
|
||||
// TODO: Overwrite the call-site with a straight call.
|
||||
CallNative(&ResolveFunction, function->address());
|
||||
MOV(X16, X0);
|
||||
}
|
||||
|
||||
// Actually jump/call to X16.
|
||||
if (instr->flags & hir::CALL_TAIL) {
|
||||
// Since we skip the prolog we need to mark the return here.
|
||||
EmitTraceUserCallReturn();
|
||||
|
||||
// Pass the callers return address over.
|
||||
LDR(X0, SP, StackLayout::GUEST_RET_ADDR);
|
||||
|
||||
ADD(SP, SP, static_cast<uint32_t>(stack_size()));
|
||||
|
||||
MOV(SP, X29);
|
||||
LDP(X29, X30, SP, POST_INDEXED, 16);
|
||||
|
||||
BR(X16);
|
||||
} else {
|
||||
// Return address is from the previous SET_RETURN_ADDRESS.
|
||||
LDR(X0, SP, StackLayout::GUEST_CALL_RET_ADDR);
|
||||
|
||||
BLR(X16);
|
||||
}
|
||||
}
|
||||
|
||||
void A64Emitter::CallIndirect(const hir::Instr* instr,
|
||||
const oaknut::XReg& reg) {
|
||||
// Check if return.
|
||||
if (instr->flags & hir::CALL_POSSIBLE_RETURN) {
|
||||
LDR(W16, SP, StackLayout::GUEST_RET_ADDR);
|
||||
CMP(reg.toW(), W16);
|
||||
B(oaknut::Cond::EQ, epilog_label());
|
||||
}
|
||||
|
||||
// Load the pointer to the indirection table maintained in A64CodeCache.
|
||||
// The target dword will either contain the address of the generated code
|
||||
// or a thunk to ResolveAddress.
|
||||
if (code_cache_->has_indirection_table()) {
|
||||
if (reg.toW().index() != W17.index()) {
|
||||
MOV(W17, reg.toW());
|
||||
}
|
||||
LDR(W16, X17);
|
||||
} else {
|
||||
// Old-style resolve.
|
||||
// Not too important because indirection table is almost always available.
|
||||
MOV(X0, GetContextReg());
|
||||
MOV(W1, reg.toW());
|
||||
|
||||
MOV(X16, reinterpret_cast<uint64_t>(ResolveFunction));
|
||||
BLR(X16);
|
||||
MOV(X16, X0);
|
||||
}
|
||||
|
||||
// Actually jump/call to X16.
|
||||
if (instr->flags & hir::CALL_TAIL) {
|
||||
// Since we skip the prolog we need to mark the return here.
|
||||
EmitTraceUserCallReturn();
|
||||
|
||||
// Pass the callers return address over.
|
||||
LDR(X0, SP, StackLayout::GUEST_RET_ADDR);
|
||||
|
||||
ADD(SP, SP, static_cast<uint32_t>(stack_size()));
|
||||
|
||||
MOV(SP, X29);
|
||||
LDP(X29, X30, SP, POST_INDEXED, 16);
|
||||
|
||||
BR(X16);
|
||||
} else {
|
||||
// Return address is from the previous SET_RETURN_ADDRESS.
|
||||
LDR(X0, SP, StackLayout::GUEST_CALL_RET_ADDR);
|
||||
|
||||
BLR(X16);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t UndefinedCallExtern(void* raw_context, uint64_t function_ptr) {
|
||||
auto function = reinterpret_cast<Function*>(function_ptr);
|
||||
if (!cvars::ignore_undefined_externs) {
|
||||
xe::FatalError(fmt::format("undefined extern call to {:08X} {}",
|
||||
function->address(), function->name().c_str()));
|
||||
} else {
|
||||
XELOGE("undefined extern call to {:08X} {}", function->address(),
|
||||
function->name());
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
void A64Emitter::CallExtern(const hir::Instr* instr, const Function* function) {
|
||||
bool undefined = true;
|
||||
if (function->behavior() == Function::Behavior::kBuiltin) {
|
||||
auto builtin_function = static_cast<const BuiltinFunction*>(function);
|
||||
if (builtin_function->handler()) {
|
||||
undefined = false;
|
||||
// x0 = target function
|
||||
// x1 = arg0
|
||||
// x2 = arg1
|
||||
// x3 = arg2
|
||||
MOV(X0, reinterpret_cast<uint64_t>(builtin_function->handler()));
|
||||
MOV(X1, reinterpret_cast<uint64_t>(builtin_function->arg0()));
|
||||
MOV(X2, reinterpret_cast<uint64_t>(builtin_function->arg1()));
|
||||
|
||||
auto thunk = backend()->guest_to_host_thunk();
|
||||
MOV(X16, reinterpret_cast<uint64_t>(thunk));
|
||||
BLR(X16);
|
||||
|
||||
// x0 = host return
|
||||
}
|
||||
} else if (function->behavior() == Function::Behavior::kExtern) {
|
||||
auto extern_function = static_cast<const GuestFunction*>(function);
|
||||
if (extern_function->extern_handler()) {
|
||||
undefined = false;
|
||||
// x0 = target function
|
||||
// x1 = arg0
|
||||
// x2 = arg1
|
||||
// x3 = arg2
|
||||
MOV(X0, reinterpret_cast<uint64_t>(extern_function->extern_handler()));
|
||||
LDR(X1, GetContextReg(), offsetof(ppc::PPCContext, kernel_state));
|
||||
|
||||
auto thunk = backend()->guest_to_host_thunk();
|
||||
MOV(X16, reinterpret_cast<uint64_t>(thunk));
|
||||
BLR(X16);
|
||||
|
||||
// x0 = host return
|
||||
}
|
||||
}
|
||||
if (undefined) {
|
||||
CallNative(UndefinedCallExtern, reinterpret_cast<uint64_t>(function));
|
||||
}
|
||||
}
|
||||
|
||||
void A64Emitter::CallNative(void* fn) { CallNativeSafe(fn); }
|
||||
|
||||
void A64Emitter::CallNative(uint64_t (*fn)(void* raw_context)) {
|
||||
CallNativeSafe(reinterpret_cast<void*>(fn));
|
||||
}
|
||||
|
||||
void A64Emitter::CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0)) {
|
||||
CallNativeSafe(reinterpret_cast<void*>(fn));
|
||||
}
|
||||
|
||||
void A64Emitter::CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0),
|
||||
uint64_t arg0) {
|
||||
MOV(GetNativeParam(0), arg0);
|
||||
CallNativeSafe(reinterpret_cast<void*>(fn));
|
||||
}
|
||||
|
||||
void A64Emitter::CallNativeSafe(void* fn) {
|
||||
// X0 = target function
|
||||
// X1 = arg0
|
||||
// X2 = arg1
|
||||
// X3 = arg2
|
||||
auto thunk = backend()->guest_to_host_thunk();
|
||||
|
||||
MOV(X0, reinterpret_cast<uint64_t>(fn));
|
||||
|
||||
MOV(X16, reinterpret_cast<uint64_t>(thunk));
|
||||
BLR(X16);
|
||||
|
||||
// X0 = host return
|
||||
}
|
||||
|
||||
void A64Emitter::SetReturnAddress(uint64_t value) {
|
||||
MOV(X0, value);
|
||||
STR(X0, SP, StackLayout::GUEST_CALL_RET_ADDR);
|
||||
}
|
||||
|
||||
oaknut::XReg A64Emitter::GetNativeParam(uint32_t param) {
|
||||
if (param == 0)
|
||||
return X1;
|
||||
else if (param == 1)
|
||||
return X2;
|
||||
else if (param == 2)
|
||||
return X3;
|
||||
|
||||
assert_always();
|
||||
return X3;
|
||||
}
|
||||
|
||||
// Important: If you change these, you must update the thunks in a64_backend.cc!
|
||||
oaknut::XReg A64Emitter::GetContextReg() { return X27; }
|
||||
oaknut::XReg A64Emitter::GetMembaseReg() { return X28; }
|
||||
|
||||
void A64Emitter::ReloadContext() {
|
||||
LDR(GetContextReg(), SP, StackLayout::GUEST_CTX_HOME);
|
||||
}
|
||||
|
||||
void A64Emitter::ReloadMembase() {
|
||||
LDR(GetMembaseReg(), GetContextReg(),
|
||||
offsetof(ppc::PPCContext, virtual_membase));
|
||||
}
|
||||
|
||||
bool A64Emitter::ConstantFitsIn32Reg(uint64_t v) {
|
||||
if ((v & ~0x7FFFFFFF) == 0) {
|
||||
// Fits under 31 bits, so just load using normal mov.
|
||||
return true;
|
||||
} else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) {
|
||||
// Negative number that fits in 32bits.
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void A64Emitter::MovMem64(const oaknut::XRegSp& addr, intptr_t offset,
|
||||
uint64_t v) {
|
||||
if (v == 0) {
|
||||
STR(XZR, addr, offset);
|
||||
} else if (!(v >> 32)) {
|
||||
// All high bits are zero, 32-bit MOV
|
||||
MOV(W0, static_cast<uint32_t>(v));
|
||||
STR(X0, addr, offset);
|
||||
} else {
|
||||
// 64bit number that needs double movs.
|
||||
MOV(X0, v);
|
||||
STR(X0, addr, offset);
|
||||
}
|
||||
}
|
||||
|
||||
static const vec128_t v_consts[] = {
|
||||
/* VZero */ vec128f(0.0f),
|
||||
/* VOnePD */ vec128d(1.0),
|
||||
/* VNegativeOne */ vec128f(-1.0f, -1.0f, -1.0f, -1.0f),
|
||||
/* VFFFF */
|
||||
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu),
|
||||
/* VMaskX16Y16 */
|
||||
vec128i(0x0000FFFFu, 0xFFFF0000u, 0x00000000u, 0x00000000u),
|
||||
/* VFlipX16Y16 */
|
||||
vec128i(0x00008000u, 0x00000000u, 0x00000000u, 0x00000000u),
|
||||
/* VFixX16Y16 */ vec128f(-32768.0f, 0.0f, 0.0f, 0.0f),
|
||||
/* VNormalizeX16Y16 */
|
||||
vec128f(1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f),
|
||||
/* V0001 */ vec128f(0.0f, 0.0f, 0.0f, 1.0f),
|
||||
/* V3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f),
|
||||
/* V3331 */ vec128f(3.0f, 3.0f, 3.0f, 1.0f),
|
||||
/* V3333 */ vec128f(3.0f, 3.0f, 3.0f, 3.0f),
|
||||
/* VSignMaskPS */
|
||||
vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
|
||||
/* VSignMaskPD */
|
||||
vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u),
|
||||
/* VAbsMaskPS */
|
||||
vec128i(0x7FFFFFFFu, 0x7FFFFFFFu, 0x7FFFFFFFu, 0x7FFFFFFFu),
|
||||
/* VAbsMaskPD */
|
||||
vec128i(0xFFFFFFFFu, 0x7FFFFFFFu, 0xFFFFFFFFu, 0x7FFFFFFFu),
|
||||
/* VByteSwapMask */
|
||||
vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu),
|
||||
/* VByteOrderMask */
|
||||
vec128i(0x01000302u, 0x05040706u, 0x09080B0Au, 0x0D0C0F0Eu),
|
||||
/* VPermuteControl15 */ vec128b(15),
|
||||
/* VPermuteByteMask */ vec128b(0x1F),
|
||||
/* VPackD3DCOLORSat */ vec128i(0x404000FFu),
|
||||
/* VPackD3DCOLOR */
|
||||
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x0C000408u),
|
||||
/* VUnpackD3DCOLOR */
|
||||
vec128i(0xFFFFFF0Eu, 0xFFFFFF0Du, 0xFFFFFF0Cu, 0xFFFFFF0Fu),
|
||||
/* VPackFLOAT16_2 */
|
||||
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x01000302u),
|
||||
/* VUnpackFLOAT16_2 */
|
||||
vec128i(0x0D0C0F0Eu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu),
|
||||
/* VPackFLOAT16_4 */
|
||||
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x01000302u, 0x05040706u),
|
||||
/* VUnpackFLOAT16_4 */
|
||||
vec128i(0x09080B0Au, 0x0D0C0F0Eu, 0xFFFFFFFFu, 0xFFFFFFFFu),
|
||||
/* VPackSHORT_Min */ vec128i(0x403F8001u),
|
||||
/* VPackSHORT_Max */ vec128i(0x40407FFFu),
|
||||
/* VPackSHORT_2 */
|
||||
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0x01000504u),
|
||||
/* VPackSHORT_4 */
|
||||
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x01000504u, 0x09080D0Cu),
|
||||
/* VUnpackSHORT_2 */
|
||||
vec128i(0xFFFF0F0Eu, 0xFFFF0D0Cu, 0xFFFFFFFFu, 0xFFFFFFFFu),
|
||||
/* VUnpackSHORT_4 */
|
||||
vec128i(0xFFFF0B0Au, 0xFFFF0908u, 0xFFFF0F0Eu, 0xFFFF0D0Cu),
|
||||
/* VUnpackSHORT_Overflow */ vec128i(0x403F8000u),
|
||||
/* VPackUINT_2101010_MinUnpacked */
|
||||
vec128i(0x403FFE01u, 0x403FFE01u, 0x403FFE01u, 0x40400000u),
|
||||
/* VPackUINT_2101010_MaxUnpacked */
|
||||
vec128i(0x404001FFu, 0x404001FFu, 0x404001FFu, 0x40400003u),
|
||||
/* VPackUINT_2101010_MaskUnpacked */
|
||||
vec128i(0x3FFu, 0x3FFu, 0x3FFu, 0x3u),
|
||||
/* VPackUINT_2101010_MaskPacked */
|
||||
vec128i(0x3FFu, 0x3FFu << 10, 0x3FFu << 20, 0x3u << 30),
|
||||
/* VPackUINT_2101010_Shift */ vec128i(0, 10, 20, 30),
|
||||
/* VUnpackUINT_2101010_Overflow */ vec128i(0x403FFE00u),
|
||||
/* VPackULONG_4202020_MinUnpacked */
|
||||
vec128i(0x40380001u, 0x40380001u, 0x40380001u, 0x40400000u),
|
||||
/* VPackULONG_4202020_MaxUnpacked */
|
||||
vec128i(0x4047FFFFu, 0x4047FFFFu, 0x4047FFFFu, 0x4040000Fu),
|
||||
/* VPackULONG_4202020_MaskUnpacked */
|
||||
vec128i(0xFFFFFu, 0xFFFFFu, 0xFFFFFu, 0xFu),
|
||||
/* VPackULONG_4202020_PermuteXZ */
|
||||
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x0A0908FFu, 0xFF020100u),
|
||||
/* VPackULONG_4202020_PermuteYW */
|
||||
vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x0CFFFF06u, 0x0504FFFFu),
|
||||
/* VUnpackULONG_4202020_Permute */
|
||||
vec128i(0xFF0E0D0Cu, 0xFF0B0A09u, 0xFF080F0Eu, 0xFFFFFF0Bu),
|
||||
/* VUnpackULONG_4202020_Overflow */ vec128i(0x40380000u),
|
||||
/* VOneOver255 */ vec128f(1.0f / 255.0f),
|
||||
/* VMaskEvenPI16 */
|
||||
vec128i(0x0000FFFFu, 0x0000FFFFu, 0x0000FFFFu, 0x0000FFFFu),
|
||||
/* VShiftMaskEvenPI16 */
|
||||
vec128i(0x0000000Fu, 0x0000000Fu, 0x0000000Fu, 0x0000000Fu),
|
||||
/* VShiftMaskPS */
|
||||
vec128i(0x0000001Fu, 0x0000001Fu, 0x0000001Fu, 0x0000001Fu),
|
||||
/* VShiftByteMask */
|
||||
vec128i(0x000000FFu, 0x000000FFu, 0x000000FFu, 0x000000FFu),
|
||||
/* VSwapWordMask */
|
||||
vec128i(0x03030303u, 0x03030303u, 0x03030303u, 0x03030303u),
|
||||
/* VUnsignedDwordMax */
|
||||
vec128i(0xFFFFFFFFu, 0x00000000u, 0xFFFFFFFFu, 0x00000000u),
|
||||
/* V255 */ vec128f(255.0f),
|
||||
/* VPI32 */ vec128i(32),
|
||||
/* VSignMaskI8 */
|
||||
vec128i(0x80808080u, 0x80808080u, 0x80808080u, 0x80808080u),
|
||||
/* VSignMaskI16 */
|
||||
vec128i(0x80008000u, 0x80008000u, 0x80008000u, 0x80008000u),
|
||||
/* VSignMaskI32 */
|
||||
vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
|
||||
/* VSignMaskF32 */
|
||||
vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u),
|
||||
/* VShortMinPS */ vec128f(SHRT_MIN),
|
||||
/* VShortMaxPS */ vec128f(SHRT_MAX),
|
||||
/* VIntMin */ vec128i(INT_MIN),
|
||||
/* VIntMax */ vec128i(INT_MAX),
|
||||
/* VIntMaxPD */ vec128d(INT_MAX),
|
||||
/* VPosIntMinPS */ vec128f((float)0x80000000u),
|
||||
/* VQNaN */ vec128i(0x7FC00000u),
|
||||
/* VInt127 */ vec128i(0x7Fu),
|
||||
/* V2To32 */ vec128f(0x1.0p32f),
|
||||
};
|
||||
|
||||
// First location to try and place constants.
|
||||
static const uintptr_t kConstDataLocation = 0x20000000;
|
||||
static const uintptr_t kConstDataSize = sizeof(v_consts);
|
||||
|
||||
// Increment the location by this amount for every allocation failure.
|
||||
static const uintptr_t kConstDataIncrement = 0x00001000;
|
||||
|
||||
// This function places constant data that is used by the emitter later on.
|
||||
// Only called once and used by multiple instances of the emitter.
|
||||
//
|
||||
// TODO(DrChat): This should be placed in the code cache with the code, but
|
||||
// doing so requires RIP-relative addressing, which is difficult to support
|
||||
// given the current setup.
|
||||
uintptr_t A64Emitter::PlaceConstData() {
|
||||
uint8_t* ptr = reinterpret_cast<uint8_t*>(kConstDataLocation);
|
||||
void* mem = nullptr;
|
||||
while (!mem) {
|
||||
mem = memory::AllocFixed(
|
||||
ptr, xe::round_up(kConstDataSize, memory::page_size()),
|
||||
memory::AllocationType::kReserveCommit, memory::PageAccess::kReadWrite);
|
||||
|
||||
ptr += kConstDataIncrement;
|
||||
}
|
||||
|
||||
// The pointer must not be greater than 31 bits.
|
||||
assert_zero(reinterpret_cast<uintptr_t>(mem) & ~0x7FFFFFFF);
|
||||
std::memcpy(mem, v_consts, sizeof(v_consts));
|
||||
memory::Protect(mem, kConstDataSize, memory::PageAccess::kReadOnly, nullptr);
|
||||
|
||||
return reinterpret_cast<uintptr_t>(mem);
|
||||
}
|
||||
|
||||
void A64Emitter::FreeConstData(uintptr_t data) {
|
||||
memory::DeallocFixed(reinterpret_cast<void*>(data), 0,
|
||||
memory::DeallocationType::kRelease);
|
||||
}
|
||||
|
||||
uintptr_t A64Emitter::GetVConstPtr() const { return backend_->emitter_data(); }
|
||||
|
||||
uintptr_t A64Emitter::GetVConstPtr(VConst id) const {
|
||||
// Load through fixed constant table setup by PlaceConstData.
|
||||
// It's important that the pointer is not signed, as it will be sign-extended.
|
||||
return GetVConstPtr() + GetVConstOffset(id);
|
||||
}
|
||||
|
||||
// Implies possible StashV(0, ...)!
|
||||
void A64Emitter::LoadConstantV(oaknut::QReg dest, const vec128_t& v) {
|
||||
if (!v.low && !v.high) {
|
||||
// 0000...
|
||||
// MOVI is implemented as a register-rename while EOR(x, x, x) is not
|
||||
// https://dougallj.github.io/applecpu/firestorm.html
|
||||
MOVI(dest.B16(), 0);
|
||||
} else if (v.low == ~uint64_t(0) && v.high == ~uint64_t(0)) {
|
||||
// 1111...
|
||||
MOVI(dest.B16(), 0xFF);
|
||||
} else {
|
||||
// Try to figure out some common splat-patterns to utilize MOVI rather than
|
||||
// stashing to memory.
|
||||
const bool all_same_u8 =
|
||||
std::adjacent_find(std::cbegin(v.u8), std::cend(v.u8),
|
||||
std::not_equal_to<>()) == std::cend(v.u8);
|
||||
|
||||
if (all_same_u8) {
|
||||
// 0xXX, 0xXX, 0xXX...
|
||||
MOVI(dest.B16(), v.u8[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
const bool all_same_u16 =
|
||||
std::adjacent_find(std::cbegin(v.u16), std::cend(v.u16),
|
||||
std::not_equal_to<>()) == std::cend(v.u16);
|
||||
|
||||
if (all_same_u16) {
|
||||
if ((v.u16[0] & 0xFF00) == 0) {
|
||||
// 0x00XX, 0x00XX, 0x00XX...
|
||||
MOVI(dest.H8(), uint8_t(v.u16[0]));
|
||||
return;
|
||||
} else if ((v.u16[0] & 0x00FF) == 0) {
|
||||
// 0xXX00, 0xXX00, 0xXX00...
|
||||
MOVI(dest.H8(), uint8_t(v.u16[0] >> 8), oaknut::util::LSL, 8);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const bool all_same_u32 =
|
||||
std::adjacent_find(std::cbegin(v.u32), std::cend(v.u32),
|
||||
std::not_equal_to<>()) == std::cend(v.u32);
|
||||
|
||||
if (all_same_u32) {
|
||||
if ((v.u32[0] & 0x00FFFFFF) == 0) {
|
||||
// This is used a lot for certain float-splats and should be checked
|
||||
// first before the others
|
||||
// 0xXX000000, 0xXX000000, 0xXX000000...
|
||||
MOVI(dest.S4(), uint8_t(v.u32[0] >> 24), oaknut::util::LSL, 24);
|
||||
return;
|
||||
} else if ((v.u32[0] & 0xFFFFFF00) == 0) {
|
||||
// 0x000000XX, 0x000000XX, 0x000000XX...
|
||||
MOVI(dest.S4(), uint8_t(v.u32[0]));
|
||||
return;
|
||||
} else if ((v.u32[0] & 0xFFFF00FF) == 0) {
|
||||
// 0x0000XX00, 0x0000XX00, 0x0000XX00...
|
||||
MOVI(dest.S4(), uint8_t(v.u32[0] >> 8), oaknut::util::LSL, 8);
|
||||
return;
|
||||
} else if ((v.u32[0] & 0xFF00FFFF) == 0) {
|
||||
// 0x00XX0000, 0x00XX0000, 0x00XX0000...
|
||||
MOVI(dest.S4(), uint8_t(v.u32[0] >> 16), oaknut::util::LSL, 16);
|
||||
return;
|
||||
}
|
||||
|
||||
// Try to utilize FMOV if possible
|
||||
oaknut::FImm8 fp8(0);
|
||||
if (f32_to_fimm8(v.u32[0], fp8)) {
|
||||
FMOV(dest.S4(), fp8);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(benvanik): see what other common values are.
|
||||
// TODO(benvanik): build constant table - 99% are reused.
|
||||
MovMem64(SP, kStashOffset, v.low);
|
||||
MovMem64(SP, kStashOffset + 8, v.high);
|
||||
LDR(dest, SP, kStashOffset);
|
||||
}
|
||||
}
|
||||
|
||||
void A64Emitter::LoadConstantV(oaknut::QReg dest, float v) {
|
||||
union {
|
||||
float f;
|
||||
uint32_t i;
|
||||
} x = {v};
|
||||
if (!x.i) {
|
||||
// +0.0f (but not -0.0f because it may be used to flip the sign via xor).
|
||||
MOVI(dest.B16(), 0);
|
||||
} else if (x.i == ~uint32_t(0)) {
|
||||
// 1111...
|
||||
MOVI(dest.B16(), 0xFF);
|
||||
} else {
|
||||
// TODO(benvanik): see what other common values are.
|
||||
// TODO(benvanik): build constant table - 99% are reused.
|
||||
|
||||
// Try to utilize FMOV if possible
|
||||
oaknut::FImm8 fp8(0);
|
||||
if (f32_to_fimm8(x.i, fp8)) {
|
||||
FMOV(dest.toS(), fp8);
|
||||
return;
|
||||
}
|
||||
|
||||
MOV(W0, x.i);
|
||||
FMOV(dest.toS(), W0);
|
||||
}
|
||||
}
|
||||
|
||||
void A64Emitter::LoadConstantV(oaknut::QReg dest, double v) {
|
||||
union {
|
||||
double d;
|
||||
uint64_t i;
|
||||
} x = {v};
|
||||
if (!x.i) {
|
||||
// +0.0 (but not -0.0 because it may be used to flip the sign via xor).
|
||||
MOVI(dest.toD(), oaknut::RepImm(0));
|
||||
} else if (x.i == ~uint64_t(0)) {
|
||||
// 1111...
|
||||
MOVI(dest.toD(), oaknut::RepImm(0xFF));
|
||||
} else {
|
||||
// TODO(benvanik): see what other common values are.
|
||||
// TODO(benvanik): build constant table - 99% are reused.
|
||||
|
||||
// Try to utilize FMOV if possible
|
||||
oaknut::FImm8 fp8(0);
|
||||
if (f64_to_fimm8(x.i, fp8)) {
|
||||
FMOV(dest.toD(), fp8);
|
||||
return;
|
||||
}
|
||||
|
||||
MOV(X0, x.i);
|
||||
FMOV(dest.toD(), X0);
|
||||
}
|
||||
}
|
||||
|
||||
uintptr_t A64Emitter::StashV(int index, const oaknut::QReg& r) {
|
||||
// auto addr = ptr[rsp + kStashOffset + (index * 16)];
|
||||
// vmovups(addr, r);
|
||||
const auto addr = kStashOffset + (index * 16);
|
||||
STR(r, SP, addr);
|
||||
return addr;
|
||||
}
|
||||
|
||||
uintptr_t A64Emitter::StashConstantV(int index, float v) {
|
||||
union {
|
||||
float f;
|
||||
uint32_t i;
|
||||
} x = {v};
|
||||
const auto addr = kStashOffset + (index * 16);
|
||||
MovMem64(SP, addr, x.i);
|
||||
MovMem64(SP, addr + 8, 0);
|
||||
return addr;
|
||||
}
|
||||
|
||||
uintptr_t A64Emitter::StashConstantV(int index, double v) {
|
||||
union {
|
||||
double d;
|
||||
uint64_t i;
|
||||
} x = {v};
|
||||
const auto addr = kStashOffset + (index * 16);
|
||||
MovMem64(SP, addr, x.i);
|
||||
MovMem64(SP, addr + 8, 0);
|
||||
return addr;
|
||||
}
|
||||
|
||||
uintptr_t A64Emitter::StashConstantV(int index, const vec128_t& v) {
|
||||
const auto addr = kStashOffset + (index * 16);
|
||||
MovMem64(SP, addr, v.low);
|
||||
MovMem64(SP, addr + 8, v.high);
|
||||
return addr;
|
||||
}
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,267 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_CPU_BACKEND_A64_A64_EMITTER_H_
|
||||
#define XENIA_CPU_BACKEND_A64_A64_EMITTER_H_
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/arena.h"
|
||||
#include "xenia/cpu/function.h"
|
||||
#include "xenia/cpu/function_trace_data.h"
|
||||
#include "xenia/cpu/hir/hir_builder.h"
|
||||
#include "xenia/cpu/hir/instr.h"
|
||||
#include "xenia/cpu/hir/value.h"
|
||||
#include "xenia/memory.h"
|
||||
|
||||
#include "oaknut/code_block.hpp"
|
||||
#include "oaknut/oaknut.hpp"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
class Processor;
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {
|
||||
|
||||
class A64Backend;
|
||||
class A64CodeCache;
|
||||
|
||||
struct EmitFunctionInfo;
|
||||
|
||||
enum RegisterFlags {
|
||||
REG_DEST = (1 << 0),
|
||||
REG_ABCD = (1 << 1),
|
||||
};
|
||||
|
||||
enum VConst {
|
||||
VZero = 0,
|
||||
VOnePD,
|
||||
VNegativeOne,
|
||||
VFFFF,
|
||||
VMaskX16Y16,
|
||||
VFlipX16Y16,
|
||||
VFixX16Y16,
|
||||
VNormalizeX16Y16,
|
||||
V0001,
|
||||
V3301,
|
||||
V3331,
|
||||
V3333,
|
||||
VSignMaskPS,
|
||||
VSignMaskPD,
|
||||
VAbsMaskPS,
|
||||
VAbsMaskPD,
|
||||
VByteSwapMask,
|
||||
VByteOrderMask,
|
||||
VPermuteControl15,
|
||||
VPermuteByteMask,
|
||||
VPackD3DCOLORSat,
|
||||
VPackD3DCOLOR,
|
||||
VUnpackD3DCOLOR,
|
||||
VPackFLOAT16_2,
|
||||
VUnpackFLOAT16_2,
|
||||
VPackFLOAT16_4,
|
||||
VUnpackFLOAT16_4,
|
||||
VPackSHORT_Min,
|
||||
VPackSHORT_Max,
|
||||
VPackSHORT_2,
|
||||
VPackSHORT_4,
|
||||
VUnpackSHORT_2,
|
||||
VUnpackSHORT_4,
|
||||
VUnpackSHORT_Overflow,
|
||||
VPackUINT_2101010_MinUnpacked,
|
||||
VPackUINT_2101010_MaxUnpacked,
|
||||
VPackUINT_2101010_MaskUnpacked,
|
||||
VPackUINT_2101010_MaskPacked,
|
||||
VPackUINT_2101010_Shift,
|
||||
VUnpackUINT_2101010_Overflow,
|
||||
VPackULONG_4202020_MinUnpacked,
|
||||
VPackULONG_4202020_MaxUnpacked,
|
||||
VPackULONG_4202020_MaskUnpacked,
|
||||
VPackULONG_4202020_PermuteXZ,
|
||||
VPackULONG_4202020_PermuteYW,
|
||||
VUnpackULONG_4202020_Permute,
|
||||
VUnpackULONG_4202020_Overflow,
|
||||
VOneOver255,
|
||||
VMaskEvenPI16,
|
||||
VShiftMaskEvenPI16,
|
||||
VShiftMaskPS,
|
||||
VShiftByteMask,
|
||||
VSwapWordMask,
|
||||
VUnsignedDwordMax,
|
||||
V255,
|
||||
VPI32,
|
||||
VSignMaskI8,
|
||||
VSignMaskI16,
|
||||
VSignMaskI32,
|
||||
VSignMaskF32,
|
||||
VShortMinPS,
|
||||
VShortMaxPS,
|
||||
VIntMin,
|
||||
VIntMax,
|
||||
VIntMaxPD,
|
||||
VPosIntMinPS,
|
||||
VQNaN,
|
||||
VInt127,
|
||||
V2To32,
|
||||
};
|
||||
|
||||
enum A64EmitterFeatureFlags {
|
||||
kA64EmitLSE = 1 << 0,
|
||||
kA64EmitF16C = 1 << 1,
|
||||
};
|
||||
|
||||
class A64Emitter : public oaknut::VectorCodeGenerator {
|
||||
public:
|
||||
A64Emitter(A64Backend* backend);
|
||||
virtual ~A64Emitter();
|
||||
|
||||
Processor* processor() const { return processor_; }
|
||||
A64Backend* backend() const { return backend_; }
|
||||
|
||||
static uintptr_t PlaceConstData();
|
||||
static void FreeConstData(uintptr_t data);
|
||||
|
||||
bool Emit(GuestFunction* function, hir::HIRBuilder* builder,
|
||||
uint32_t debug_info_flags, FunctionDebugInfo* debug_info,
|
||||
void** out_code_address, size_t* out_code_size,
|
||||
std::vector<SourceMapEntry>* out_source_map);
|
||||
|
||||
public:
|
||||
// Reserved: XSP, X27, X28
|
||||
// Scratch: X1-X15, X30 | V0-v7 and V16-V31
|
||||
// V0-2
|
||||
// Available: X19-X26
|
||||
// V4-V15 (save to get V3)
|
||||
static const size_t GPR_COUNT = 8;
|
||||
static const size_t FPR_COUNT = 8;
|
||||
|
||||
static void SetupReg(const hir::Value* v, oaknut::WReg& r) {
|
||||
const auto idx = gpr_reg_map_[v->reg.index];
|
||||
r = oaknut::WReg(idx);
|
||||
}
|
||||
static void SetupReg(const hir::Value* v, oaknut::XReg& r) {
|
||||
const auto idx = gpr_reg_map_[v->reg.index];
|
||||
r = oaknut::XReg(idx);
|
||||
}
|
||||
static void SetupReg(const hir::Value* v, oaknut::SReg& r) {
|
||||
const auto idx = fpr_reg_map_[v->reg.index];
|
||||
r = oaknut::SReg(idx);
|
||||
}
|
||||
static void SetupReg(const hir::Value* v, oaknut::DReg& r) {
|
||||
const auto idx = fpr_reg_map_[v->reg.index];
|
||||
r = oaknut::DReg(idx);
|
||||
}
|
||||
static void SetupReg(const hir::Value* v, oaknut::QReg& r) {
|
||||
const auto idx = fpr_reg_map_[v->reg.index];
|
||||
r = oaknut::QReg(idx);
|
||||
}
|
||||
|
||||
// Gets(and possibly create) an HIR label with the specified name
|
||||
oaknut::Label* lookup_label(const char* label_name) {
|
||||
return &label_lookup_[label_name];
|
||||
}
|
||||
|
||||
oaknut::Label& epilog_label() { return *epilog_label_; }
|
||||
|
||||
void MarkSourceOffset(const hir::Instr* i);
|
||||
|
||||
void DebugBreak();
|
||||
void Trap(uint16_t trap_type = 0);
|
||||
void UnimplementedInstr(const hir::Instr* i);
|
||||
|
||||
void Call(const hir::Instr* instr, GuestFunction* function);
|
||||
void CallIndirect(const hir::Instr* instr, const oaknut::XReg& reg);
|
||||
void CallExtern(const hir::Instr* instr, const Function* function);
|
||||
void CallNative(void* fn);
|
||||
void CallNative(uint64_t (*fn)(void* raw_context));
|
||||
void CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0));
|
||||
void CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0),
|
||||
uint64_t arg0);
|
||||
void CallNativeSafe(void* fn);
|
||||
void SetReturnAddress(uint64_t value);
|
||||
|
||||
static oaknut::XReg GetNativeParam(uint32_t param);
|
||||
|
||||
static oaknut::XReg GetContextReg();
|
||||
static oaknut::XReg GetMembaseReg();
|
||||
void ReloadContext();
|
||||
void ReloadMembase();
|
||||
|
||||
// Moves a 64bit immediate into memory.
|
||||
static bool ConstantFitsIn32Reg(uint64_t v);
|
||||
void MovMem64(const oaknut::XRegSp& addr, intptr_t offset, uint64_t v);
|
||||
|
||||
uintptr_t GetVConstPtr() const;
|
||||
uintptr_t GetVConstPtr(VConst id) const;
|
||||
static constexpr uintptr_t GetVConstOffset(VConst id) {
|
||||
return sizeof(vec128_t) * id;
|
||||
}
|
||||
void LoadConstantV(oaknut::QReg dest, float v);
|
||||
void LoadConstantV(oaknut::QReg dest, double v);
|
||||
void LoadConstantV(oaknut::QReg dest, const vec128_t& v);
|
||||
|
||||
// Returned addresses are relative to XSP
|
||||
uintptr_t StashV(int index, const oaknut::QReg& r);
|
||||
uintptr_t StashConstantV(int index, float v);
|
||||
uintptr_t StashConstantV(int index, double v);
|
||||
uintptr_t StashConstantV(int index, const vec128_t& v);
|
||||
|
||||
bool IsFeatureEnabled(uint32_t feature_flag) const {
|
||||
return (feature_flags_ & feature_flag) == feature_flag;
|
||||
}
|
||||
|
||||
FunctionDebugInfo* debug_info() const { return debug_info_; }
|
||||
|
||||
size_t stack_size() const { return stack_size_; }
|
||||
|
||||
protected:
|
||||
void* Emplace(const EmitFunctionInfo& func_info,
|
||||
GuestFunction* function = nullptr);
|
||||
bool Emit(hir::HIRBuilder* builder, EmitFunctionInfo& func_info);
|
||||
void EmitGetCurrentThreadId();
|
||||
void EmitTraceUserCallReturn();
|
||||
|
||||
protected:
|
||||
Processor* processor_ = nullptr;
|
||||
A64Backend* backend_ = nullptr;
|
||||
A64CodeCache* code_cache_ = nullptr;
|
||||
uint32_t feature_flags_ = 0;
|
||||
|
||||
std::vector<std::uint32_t> assembly_buffer;
|
||||
|
||||
oaknut::Label* epilog_label_ = nullptr;
|
||||
|
||||
// Convert from plain-text label-names into oaknut-labels
|
||||
std::unordered_map<std::string, oaknut::Label> label_lookup_;
|
||||
|
||||
hir::Instr* current_instr_ = nullptr;
|
||||
|
||||
FunctionDebugInfo* debug_info_ = nullptr;
|
||||
uint32_t debug_info_flags_ = 0;
|
||||
FunctionTraceData* trace_data_ = nullptr;
|
||||
Arena source_map_arena_;
|
||||
|
||||
size_t stack_size_ = 0;
|
||||
|
||||
static const uint8_t gpr_reg_map_[GPR_COUNT];
|
||||
static const uint8_t fpr_reg_map_[FPR_COUNT];
|
||||
};
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_CPU_BACKEND_A64_A64_EMITTER_H_
|
|
@ -0,0 +1,45 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/cpu/backend/a64/a64_function.h"
|
||||
|
||||
#include "xenia/cpu/backend/a64/a64_backend.h"
|
||||
#include "xenia/cpu/processor.h"
|
||||
#include "xenia/cpu/thread_state.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {
|
||||
|
||||
A64Function::A64Function(Module* module, uint32_t address)
|
||||
: GuestFunction(module, address) {}
|
||||
|
||||
A64Function::~A64Function() {
|
||||
// machine_code_ is freed by code cache.
|
||||
}
|
||||
|
||||
void A64Function::Setup(uint8_t* machine_code, size_t machine_code_length) {
|
||||
machine_code_ = machine_code;
|
||||
machine_code_length_ = machine_code_length;
|
||||
}
|
||||
|
||||
bool A64Function::CallImpl(ThreadState* thread_state, uint32_t return_address) {
|
||||
auto backend =
|
||||
reinterpret_cast<A64Backend*>(thread_state->processor()->backend());
|
||||
auto thunk = backend->host_to_guest_thunk();
|
||||
thunk(machine_code_, thread_state->context(),
|
||||
reinterpret_cast<void*>(uintptr_t(return_address)));
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,44 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_CPU_BACKEND_A64_A64_FUNCTION_H_
|
||||
#define XENIA_CPU_BACKEND_A64_A64_FUNCTION_H_
|
||||
|
||||
#include "xenia/cpu/function.h"
|
||||
#include "xenia/cpu/thread_state.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {
|
||||
|
||||
class A64Function : public GuestFunction {
|
||||
public:
|
||||
A64Function(Module* module, uint32_t address);
|
||||
~A64Function() override;
|
||||
|
||||
uint8_t* machine_code() const override { return machine_code_; }
|
||||
size_t machine_code_length() const override { return machine_code_length_; }
|
||||
|
||||
void Setup(uint8_t* machine_code, size_t machine_code_length);
|
||||
|
||||
protected:
|
||||
bool CallImpl(ThreadState* thread_state, uint32_t return_address) override;
|
||||
|
||||
private:
|
||||
uint8_t* machine_code_ = nullptr;
|
||||
size_t machine_code_length_ = 0;
|
||||
};
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_CPU_BACKEND_A64_A64_FUNCTION_H_
|
|
@ -0,0 +1,618 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Xenia Developers. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
#ifndef XENIA_CPU_BACKEND_A64_A64_OP_H_
|
||||
#define XENIA_CPU_BACKEND_A64_A64_OP_H_
|
||||
|
||||
#include "xenia/cpu/backend/a64/a64_emitter.h"
|
||||
|
||||
#include "xenia/cpu/hir/instr.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {
|
||||
|
||||
// TODO(benvanik): direct usings.
|
||||
using namespace xe::cpu;
|
||||
using namespace xe::cpu::hir;
|
||||
using namespace oaknut;
|
||||
using namespace oaknut::util;
|
||||
|
||||
// Selects the right byte/word/etc from a vector. We need to flip logical
|
||||
// indices (0,1,2,3,4,5,6,7,...) = (3,2,1,0,7,6,5,4,...)
|
||||
#define VEC128_B(n) ((n) ^ 0x3)
|
||||
#define VEC128_W(n) ((n) ^ 0x1)
|
||||
#define VEC128_D(n) (n)
|
||||
#define VEC128_F(n) (n)
|
||||
|
||||
enum KeyType {
|
||||
KEY_TYPE_X = OPCODE_SIG_TYPE_X,
|
||||
KEY_TYPE_L = OPCODE_SIG_TYPE_L,
|
||||
KEY_TYPE_O = OPCODE_SIG_TYPE_O,
|
||||
KEY_TYPE_S = OPCODE_SIG_TYPE_S,
|
||||
KEY_TYPE_V_I8 = OPCODE_SIG_TYPE_V + INT8_TYPE,
|
||||
KEY_TYPE_V_I16 = OPCODE_SIG_TYPE_V + INT16_TYPE,
|
||||
KEY_TYPE_V_I32 = OPCODE_SIG_TYPE_V + INT32_TYPE,
|
||||
KEY_TYPE_V_I64 = OPCODE_SIG_TYPE_V + INT64_TYPE,
|
||||
KEY_TYPE_V_F32 = OPCODE_SIG_TYPE_V + FLOAT32_TYPE,
|
||||
KEY_TYPE_V_F64 = OPCODE_SIG_TYPE_V + FLOAT64_TYPE,
|
||||
KEY_TYPE_V_V128 = OPCODE_SIG_TYPE_V + VEC128_TYPE,
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
union InstrKey {
|
||||
uint32_t value;
|
||||
struct {
|
||||
uint32_t opcode : 8;
|
||||
uint32_t dest : 5;
|
||||
uint32_t src1 : 5;
|
||||
uint32_t src2 : 5;
|
||||
uint32_t src3 : 5;
|
||||
uint32_t reserved : 4;
|
||||
};
|
||||
|
||||
operator uint32_t() const { return value; }
|
||||
|
||||
InstrKey() : value(0) { static_assert_size(*this, sizeof(value)); }
|
||||
InstrKey(uint32_t v) : value(v) {}
|
||||
InstrKey(const Instr* i) : value(0) {
|
||||
opcode = i->opcode->num;
|
||||
uint32_t sig = i->opcode->signature;
|
||||
dest =
|
||||
GET_OPCODE_SIG_TYPE_DEST(sig) ? OPCODE_SIG_TYPE_V + i->dest->type : 0;
|
||||
src1 = GET_OPCODE_SIG_TYPE_SRC1(sig);
|
||||
if (src1 == OPCODE_SIG_TYPE_V) {
|
||||
src1 += i->src1.value->type;
|
||||
}
|
||||
src2 = GET_OPCODE_SIG_TYPE_SRC2(sig);
|
||||
if (src2 == OPCODE_SIG_TYPE_V) {
|
||||
src2 += i->src2.value->type;
|
||||
}
|
||||
src3 = GET_OPCODE_SIG_TYPE_SRC3(sig);
|
||||
if (src3 == OPCODE_SIG_TYPE_V) {
|
||||
src3 += i->src3.value->type;
|
||||
}
|
||||
}
|
||||
|
||||
template <Opcode OPCODE, KeyType DEST = KEY_TYPE_X, KeyType SRC1 = KEY_TYPE_X,
|
||||
KeyType SRC2 = KEY_TYPE_X, KeyType SRC3 = KEY_TYPE_X>
|
||||
struct Construct {
|
||||
static const uint32_t value =
|
||||
(OPCODE) | (DEST << 8) | (SRC1 << 13) | (SRC2 << 18) | (SRC3 << 23);
|
||||
};
|
||||
};
|
||||
#pragma pack(pop)
|
||||
static_assert(sizeof(InstrKey) <= 4, "Key must be 4 bytes");
|
||||
|
||||
template <typename... Ts>
|
||||
struct CombinedStruct;
|
||||
template <>
|
||||
struct CombinedStruct<> {};
|
||||
template <typename T, typename... Ts>
|
||||
struct CombinedStruct<T, Ts...> : T, CombinedStruct<Ts...> {};
|
||||
|
||||
struct OpBase {};
|
||||
|
||||
template <typename T, KeyType KEY_TYPE>
|
||||
struct Op : OpBase {
|
||||
static const KeyType key_type = KEY_TYPE;
|
||||
};
|
||||
|
||||
struct VoidOp : Op<VoidOp, KEY_TYPE_X> {
|
||||
protected:
|
||||
friend struct Op<VoidOp, KEY_TYPE_X>;
|
||||
template <hir::Opcode OPCODE, typename... Ts>
|
||||
friend struct I;
|
||||
void Load(const Instr::Op& op) {}
|
||||
};
|
||||
|
||||
struct OffsetOp : Op<OffsetOp, KEY_TYPE_O> {
|
||||
uint64_t value;
|
||||
|
||||
protected:
|
||||
friend struct Op<OffsetOp, KEY_TYPE_O>;
|
||||
template <hir::Opcode OPCODE, typename... Ts>
|
||||
friend struct I;
|
||||
void Load(const Instr::Op& op) { this->value = op.offset; }
|
||||
};
|
||||
|
||||
struct SymbolOp : Op<SymbolOp, KEY_TYPE_S> {
|
||||
Function* value;
|
||||
|
||||
protected:
|
||||
friend struct Op<SymbolOp, KEY_TYPE_S>;
|
||||
template <hir::Opcode OPCODE, typename... Ts>
|
||||
friend struct I;
|
||||
bool Load(const Instr::Op& op) {
|
||||
this->value = op.symbol;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct LabelOp : Op<LabelOp, KEY_TYPE_L> {
|
||||
hir::Label* value;
|
||||
|
||||
protected:
|
||||
friend struct Op<LabelOp, KEY_TYPE_L>;
|
||||
template <hir::Opcode OPCODE, typename... Ts>
|
||||
friend struct I;
|
||||
void Load(const Instr::Op& op) { this->value = op.label; }
|
||||
};
|
||||
|
||||
template <typename T, KeyType KEY_TYPE, typename REG_TYPE, typename CONST_TYPE>
|
||||
struct ValueOp : Op<ValueOp<T, KEY_TYPE, REG_TYPE, CONST_TYPE>, KEY_TYPE> {
|
||||
typedef REG_TYPE reg_type;
|
||||
const Value* value;
|
||||
bool is_constant;
|
||||
virtual bool ConstantFitsIn32Reg() const { return true; }
|
||||
const REG_TYPE& reg() const {
|
||||
assert_true(!is_constant);
|
||||
return reg_;
|
||||
}
|
||||
operator const REG_TYPE&() const { return reg(); }
|
||||
bool IsEqual(const T& b) const {
|
||||
if (is_constant && b.is_constant) {
|
||||
return reinterpret_cast<const T*>(this)->constant() == b.constant();
|
||||
} else if (!is_constant && !b.is_constant) {
|
||||
return reg_.index() == b.reg_.index();
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
bool IsEqual(const oaknut::Reg& b) const {
|
||||
if (is_constant) {
|
||||
return false;
|
||||
} else if (!is_constant) {
|
||||
return reg_.index() == b.index();
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
bool operator==(const T& b) const { return IsEqual(b); }
|
||||
bool operator!=(const T& b) const { return !IsEqual(b); }
|
||||
bool operator==(const oaknut::Reg& b) const { return IsEqual(b); }
|
||||
bool operator!=(const oaknut::Reg& b) const { return !IsEqual(b); }
|
||||
void Load(const Instr::Op& op) {
|
||||
value = op.value;
|
||||
is_constant = value->IsConstant();
|
||||
if (!is_constant) {
|
||||
A64Emitter::SetupReg(value, reg_);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
REG_TYPE reg_ = REG_TYPE(0);
|
||||
};
|
||||
|
||||
struct I8Op : ValueOp<I8Op, KEY_TYPE_V_I8, WReg, int8_t> {
|
||||
typedef ValueOp<I8Op, KEY_TYPE_V_I8, WReg, int8_t> BASE;
|
||||
const int8_t constant() const {
|
||||
assert_true(BASE::is_constant);
|
||||
return BASE::value->constant.i8;
|
||||
}
|
||||
};
|
||||
struct I16Op : ValueOp<I16Op, KEY_TYPE_V_I16, WReg, int16_t> {
|
||||
typedef ValueOp<I16Op, KEY_TYPE_V_I16, WReg, int16_t> BASE;
|
||||
const int16_t constant() const {
|
||||
assert_true(BASE::is_constant);
|
||||
return BASE::value->constant.i16;
|
||||
}
|
||||
};
|
||||
struct I32Op : ValueOp<I32Op, KEY_TYPE_V_I32, WReg, int32_t> {
|
||||
typedef ValueOp<I32Op, KEY_TYPE_V_I32, WReg, int32_t> BASE;
|
||||
const int32_t constant() const {
|
||||
assert_true(BASE::is_constant);
|
||||
return BASE::value->constant.i32;
|
||||
}
|
||||
};
|
||||
struct I64Op : ValueOp<I64Op, KEY_TYPE_V_I64, XReg, int64_t> {
|
||||
typedef ValueOp<I64Op, KEY_TYPE_V_I64, XReg, int64_t> BASE;
|
||||
const int64_t constant() const {
|
||||
assert_true(BASE::is_constant);
|
||||
return BASE::value->constant.i64;
|
||||
}
|
||||
bool ConstantFitsIn32Reg() const override {
|
||||
int64_t v = BASE::value->constant.i64;
|
||||
if ((v & ~0x7FFFFFFF) == 0) {
|
||||
// Fits under 31 bits, so just load using normal mov.
|
||||
return true;
|
||||
} else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) {
|
||||
// Negative number that fits in 32bits.
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
struct F32Op : ValueOp<F32Op, KEY_TYPE_V_F32, SReg, float> {
|
||||
typedef ValueOp<F32Op, KEY_TYPE_V_F32, SReg, float> BASE;
|
||||
const float constant() const {
|
||||
assert_true(BASE::is_constant);
|
||||
return BASE::value->constant.f32;
|
||||
}
|
||||
};
|
||||
struct F64Op : ValueOp<F64Op, KEY_TYPE_V_F64, DReg, double> {
|
||||
typedef ValueOp<F64Op, KEY_TYPE_V_F64, DReg, double> BASE;
|
||||
const double constant() const {
|
||||
assert_true(BASE::is_constant);
|
||||
return BASE::value->constant.f64;
|
||||
}
|
||||
};
|
||||
struct V128Op : ValueOp<V128Op, KEY_TYPE_V_V128, QReg, vec128_t> {
|
||||
typedef ValueOp<V128Op, KEY_TYPE_V_V128, QReg, vec128_t> BASE;
|
||||
const vec128_t& constant() const {
|
||||
assert_true(BASE::is_constant);
|
||||
return BASE::value->constant.v128;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename DEST, typename... Tf>
|
||||
struct DestField;
|
||||
template <typename DEST>
|
||||
struct DestField<DEST> {
|
||||
DEST dest;
|
||||
|
||||
protected:
|
||||
bool LoadDest(const Instr* i) {
|
||||
Instr::Op op;
|
||||
op.value = i->dest;
|
||||
dest.Load(op);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
template <>
|
||||
struct DestField<VoidOp> {
|
||||
protected:
|
||||
bool LoadDest(const Instr* i) { return true; }
|
||||
};
|
||||
|
||||
template <hir::Opcode OPCODE, typename... Ts>
|
||||
struct I;
|
||||
template <hir::Opcode OPCODE, typename DEST>
|
||||
struct I<OPCODE, DEST> : DestField<DEST> {
|
||||
typedef DestField<DEST> BASE;
|
||||
static const hir::Opcode opcode = OPCODE;
|
||||
static const uint32_t key =
|
||||
InstrKey::Construct<OPCODE, DEST::key_type>::value;
|
||||
static const KeyType dest_type = DEST::key_type;
|
||||
const Instr* instr;
|
||||
|
||||
protected:
|
||||
template <typename SEQ, typename T>
|
||||
friend struct Sequence;
|
||||
bool Load(const Instr* i) {
|
||||
if (InstrKey(i).value == key && BASE::LoadDest(i)) {
|
||||
instr = i;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <hir::Opcode OPCODE, typename DEST, typename SRC1>
|
||||
struct I<OPCODE, DEST, SRC1> : DestField<DEST> {
|
||||
typedef DestField<DEST> BASE;
|
||||
static const hir::Opcode opcode = OPCODE;
|
||||
static const uint32_t key =
|
||||
InstrKey::Construct<OPCODE, DEST::key_type, SRC1::key_type>::value;
|
||||
static const KeyType dest_type = DEST::key_type;
|
||||
static const KeyType src1_type = SRC1::key_type;
|
||||
const Instr* instr;
|
||||
SRC1 src1 = {};
|
||||
|
||||
protected:
|
||||
template <typename SEQ, typename T>
|
||||
friend struct Sequence;
|
||||
bool Load(const Instr* i) {
|
||||
if (InstrKey(i).value == key && BASE::LoadDest(i)) {
|
||||
instr = i;
|
||||
src1.Load(i->src1);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <hir::Opcode OPCODE, typename DEST, typename SRC1, typename SRC2>
|
||||
struct I<OPCODE, DEST, SRC1, SRC2> : DestField<DEST> {
|
||||
typedef DestField<DEST> BASE;
|
||||
static const hir::Opcode opcode = OPCODE;
|
||||
static const uint32_t key =
|
||||
InstrKey::Construct<OPCODE, DEST::key_type, SRC1::key_type,
|
||||
SRC2::key_type>::value;
|
||||
static const KeyType dest_type = DEST::key_type;
|
||||
static const KeyType src1_type = SRC1::key_type;
|
||||
static const KeyType src2_type = SRC2::key_type;
|
||||
const Instr* instr;
|
||||
SRC1 src1;
|
||||
SRC2 src2;
|
||||
|
||||
protected:
|
||||
template <typename SEQ, typename T>
|
||||
friend struct Sequence;
|
||||
bool Load(const Instr* i) {
|
||||
if (InstrKey(i).value == key && BASE::LoadDest(i)) {
|
||||
instr = i;
|
||||
src1.Load(i->src1);
|
||||
src2.Load(i->src2);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
template <hir::Opcode OPCODE, typename DEST, typename SRC1, typename SRC2,
|
||||
typename SRC3>
|
||||
struct I<OPCODE, DEST, SRC1, SRC2, SRC3> : DestField<DEST> {
|
||||
typedef DestField<DEST> BASE;
|
||||
static const hir::Opcode opcode = OPCODE;
|
||||
static const uint32_t key =
|
||||
InstrKey::Construct<OPCODE, DEST::key_type, SRC1::key_type,
|
||||
SRC2::key_type, SRC3::key_type>::value;
|
||||
static const KeyType dest_type = DEST::key_type;
|
||||
static const KeyType src1_type = SRC1::key_type;
|
||||
static const KeyType src2_type = SRC2::key_type;
|
||||
static const KeyType src3_type = SRC3::key_type;
|
||||
const Instr* instr;
|
||||
SRC1 src1;
|
||||
SRC2 src2;
|
||||
SRC3 src3;
|
||||
|
||||
protected:
|
||||
template <typename SEQ, typename T>
|
||||
friend struct Sequence;
|
||||
bool Load(const Instr* i) {
|
||||
if (InstrKey(i).value == key && BASE::LoadDest(i)) {
|
||||
instr = i;
|
||||
src1.Load(i->src1);
|
||||
src2.Load(i->src2);
|
||||
src3.Load(i->src3);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
static const T GetTempReg(A64Emitter& e);
|
||||
template <>
|
||||
const WReg GetTempReg<WReg>(A64Emitter& e) {
|
||||
return W0;
|
||||
}
|
||||
template <>
|
||||
const XReg GetTempReg<XReg>(A64Emitter& e) {
|
||||
return X0;
|
||||
}
|
||||
|
||||
template <typename SEQ, typename T>
|
||||
struct Sequence {
|
||||
typedef T EmitArgType;
|
||||
|
||||
static constexpr uint32_t head_key() { return T::key; }
|
||||
|
||||
static bool Select(A64Emitter& e, const Instr* i) {
|
||||
T args;
|
||||
if (!args.Load(i)) {
|
||||
return false;
|
||||
}
|
||||
SEQ::Emit(e, args);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename REG_FN>
|
||||
static void EmitUnaryOp(A64Emitter& e, const EmitArgType& i,
|
||||
const REG_FN& reg_fn) {
|
||||
if (i.src1.is_constant) {
|
||||
e.MOV(i.dest, i.src1.constant());
|
||||
reg_fn(e, i.dest);
|
||||
} else {
|
||||
if (i.dest != i.src1) {
|
||||
e.MOV(i.dest, i.src1);
|
||||
}
|
||||
reg_fn(e, i.dest);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename REG_REG_FN, typename REG_CONST_FN>
|
||||
static void EmitCommutativeBinaryOp(A64Emitter& e, const EmitArgType& i,
|
||||
const REG_REG_FN& reg_reg_fn,
|
||||
const REG_CONST_FN& reg_const_fn) {
|
||||
if (i.src1.is_constant) {
|
||||
if (i.src2.is_constant) {
|
||||
// Both constants.
|
||||
if (i.src1.ConstantFitsIn32Reg()) {
|
||||
e.MOV(i.dest, i.src2.constant());
|
||||
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src1.constant()));
|
||||
} else if (i.src2.ConstantFitsIn32Reg()) {
|
||||
e.MOV(i.dest, i.src1.constant());
|
||||
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
|
||||
} else {
|
||||
e.MOV(i.dest, i.src1.constant());
|
||||
auto temp = GetTempReg<typename decltype(i.src2)::reg_type>(e);
|
||||
e.MOV(temp, i.src2.constant());
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
}
|
||||
} else {
|
||||
// src1 constant.
|
||||
if (i.dest == i.src2) {
|
||||
if (i.src1.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src1.constant()));
|
||||
} else {
|
||||
auto temp = GetTempReg<typename decltype(i.src1)::reg_type>(e);
|
||||
e.MOV(temp, i.src1.constant());
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
}
|
||||
} else {
|
||||
e.MOV(i.dest, i.src1.constant());
|
||||
reg_reg_fn(e, i.dest, i.src2);
|
||||
}
|
||||
}
|
||||
} else if (i.src2.is_constant) {
|
||||
if (i.dest == i.src1) {
|
||||
if (i.src2.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
|
||||
} else {
|
||||
auto temp = GetTempReg<typename decltype(i.src2)::reg_type>(e);
|
||||
e.MOV(temp, i.src2.constant());
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
}
|
||||
} else {
|
||||
e.MOV(i.dest, i.src2.constant());
|
||||
reg_reg_fn(e, i.dest, i.src1);
|
||||
}
|
||||
} else {
|
||||
if (i.dest == i.src1) {
|
||||
reg_reg_fn(e, i.dest, i.src2);
|
||||
} else if (i.dest == i.src2) {
|
||||
reg_reg_fn(e, i.dest, i.src1);
|
||||
} else {
|
||||
e.MOV(i.dest, i.src1);
|
||||
reg_reg_fn(e, i.dest, i.src2);
|
||||
}
|
||||
}
|
||||
}
|
||||
template <typename REG_REG_FN, typename REG_CONST_FN>
|
||||
static void EmitAssociativeBinaryOp(A64Emitter& e, const EmitArgType& i,
|
||||
const REG_REG_FN& reg_reg_fn,
|
||||
const REG_CONST_FN& reg_const_fn) {
|
||||
if (i.src1.is_constant) {
|
||||
assert_true(!i.src2.is_constant);
|
||||
if (i.dest == i.src2) {
|
||||
auto temp = GetTempReg<typename decltype(i.src2)::reg_type>(e);
|
||||
e.MOV(temp, i.src2);
|
||||
e.MOV(i.dest, i.src1.constant());
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
} else {
|
||||
e.MOV(i.dest, i.src1.constant());
|
||||
reg_reg_fn(e, i.dest, i.src2);
|
||||
}
|
||||
} else if (i.src2.is_constant) {
|
||||
if (i.dest == i.src1) {
|
||||
if (i.src2.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
|
||||
} else {
|
||||
auto temp = GetTempReg<typename decltype(i.src2)::reg_type>(e);
|
||||
e.MOV(temp, i.src2.constant());
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
}
|
||||
} else {
|
||||
e.MOV(i.dest, i.src1);
|
||||
if (i.src2.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.dest, static_cast<int32_t>(i.src2.constant()));
|
||||
} else {
|
||||
auto temp = GetTempReg<typename decltype(i.src2)::reg_type>(e);
|
||||
e.MOV(temp, i.src2.constant());
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (i.dest == i.src1) {
|
||||
reg_reg_fn(e, i.dest, i.src2);
|
||||
} else if (i.dest == i.src2) {
|
||||
auto temp = GetTempReg<typename decltype(i.src2)::reg_type>(e);
|
||||
e.MOV(temp, i.src2);
|
||||
e.MOV(i.dest, i.src1);
|
||||
reg_reg_fn(e, i.dest, temp);
|
||||
} else {
|
||||
e.MOV(i.dest, i.src1);
|
||||
reg_reg_fn(e, i.dest, i.src2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename REG = QReg, typename FN>
|
||||
static void EmitCommutativeBinaryVOp(A64Emitter& e, const EmitArgType& i,
|
||||
const FN& fn) {
|
||||
if (i.src1.is_constant) {
|
||||
assert_true(!i.src2.is_constant);
|
||||
e.LoadConstantV(Q0, i.src1.constant());
|
||||
fn(e, i.dest, REG(0), i.src2);
|
||||
} else if (i.src2.is_constant) {
|
||||
assert_true(!i.src1.is_constant);
|
||||
e.LoadConstantV(Q0, i.src2.constant());
|
||||
fn(e, i.dest, i.src1, REG(0));
|
||||
} else {
|
||||
fn(e, i.dest, i.src1, i.src2);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename REG = QReg, typename FN>
|
||||
static void EmitAssociativeBinaryVOp(A64Emitter& e, const EmitArgType& i,
|
||||
const FN& fn) {
|
||||
if (i.src1.is_constant) {
|
||||
assert_true(!i.src2.is_constant);
|
||||
e.LoadConstantV(Q0, i.src1.constant());
|
||||
fn(e, i.dest, REG(0), i.src2);
|
||||
} else if (i.src2.is_constant) {
|
||||
assert_true(!i.src1.is_constant);
|
||||
e.LoadConstantV(Q0, i.src2.constant());
|
||||
fn(e, i.dest, i.src1, REG(0));
|
||||
} else {
|
||||
fn(e, i.dest, i.src1, i.src2);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename REG_REG_FN, typename REG_CONST_FN>
|
||||
static void EmitCommutativeCompareOp(A64Emitter& e, const EmitArgType& i,
|
||||
const REG_REG_FN& reg_reg_fn,
|
||||
const REG_CONST_FN& reg_const_fn) {
|
||||
if (i.src1.is_constant) {
|
||||
assert_true(!i.src2.is_constant);
|
||||
if (i.src1.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.src2, static_cast<int32_t>(i.src1.constant()));
|
||||
} else {
|
||||
auto temp = GetTempReg<typename decltype(i.src1)::reg_type>(e);
|
||||
e.MOV(temp, i.src1.constant());
|
||||
reg_reg_fn(e, i.src2, temp);
|
||||
}
|
||||
} else if (i.src2.is_constant) {
|
||||
assert_true(!i.src1.is_constant);
|
||||
if (i.src2.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.src1, static_cast<int32_t>(i.src2.constant()));
|
||||
} else {
|
||||
auto temp = GetTempReg<typename decltype(i.src2)::reg_type>(e);
|
||||
e.MOV(temp, i.src2.constant());
|
||||
reg_reg_fn(e, i.src1, temp);
|
||||
}
|
||||
} else {
|
||||
reg_reg_fn(e, i.src1, i.src2);
|
||||
}
|
||||
}
|
||||
template <typename REG_REG_FN, typename REG_CONST_FN>
|
||||
static void EmitAssociativeCompareOp(A64Emitter& e, const EmitArgType& i,
|
||||
const REG_REG_FN& reg_reg_fn,
|
||||
const REG_CONST_FN& reg_const_fn) {
|
||||
if (i.src1.is_constant) {
|
||||
assert_true(!i.src2.is_constant);
|
||||
if (i.src1.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.dest, i.src2, static_cast<int32_t>(i.src1.constant()),
|
||||
true);
|
||||
} else {
|
||||
auto temp = GetTempReg<typename decltype(i.src1)::reg_type>(e);
|
||||
e.MOV(temp, i.src1.constant());
|
||||
reg_reg_fn(e, i.dest, i.src2, temp, true);
|
||||
}
|
||||
} else if (i.src2.is_constant) {
|
||||
assert_true(!i.src1.is_constant);
|
||||
if (i.src2.ConstantFitsIn32Reg()) {
|
||||
reg_const_fn(e, i.dest, i.src1, static_cast<int32_t>(i.src2.constant()),
|
||||
false);
|
||||
} else {
|
||||
auto temp = GetTempReg<typename decltype(i.src2)::reg_type>(e);
|
||||
e.MOV(temp, i.src2.constant());
|
||||
reg_reg_fn(e, i.dest, i.src1, temp, false);
|
||||
}
|
||||
} else {
|
||||
reg_reg_fn(e, i.dest, i.src1, i.src2, false);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_CPU_BACKEND_A64_A64_OP_H_
|
|
@ -0,0 +1,551 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Xenia Developers. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/cpu/backend/a64/a64_sequences.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
#include "xenia/cpu/backend/a64/a64_op.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {
|
||||
|
||||
volatile int anchor_control = 0;
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_DEBUG_BREAK
|
||||
// ============================================================================
|
||||
struct DEBUG_BREAK : Sequence<DEBUG_BREAK, I<OPCODE_DEBUG_BREAK, VoidOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) { e.DebugBreak(); }
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_DEBUG_BREAK, DEBUG_BREAK);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_DEBUG_BREAK_TRUE
|
||||
// ============================================================================
|
||||
struct DEBUG_BREAK_TRUE_I8
|
||||
: Sequence<DEBUG_BREAK_TRUE_I8, I<OPCODE_DEBUG_BREAK_TRUE, VoidOp, I8Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label skip;
|
||||
e.CBZ(i.src1, skip);
|
||||
e.DebugBreak();
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct DEBUG_BREAK_TRUE_I16
|
||||
: Sequence<DEBUG_BREAK_TRUE_I16,
|
||||
I<OPCODE_DEBUG_BREAK_TRUE, VoidOp, I16Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label skip;
|
||||
e.CBZ(i.src1, skip);
|
||||
e.DebugBreak();
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct DEBUG_BREAK_TRUE_I32
|
||||
: Sequence<DEBUG_BREAK_TRUE_I32,
|
||||
I<OPCODE_DEBUG_BREAK_TRUE, VoidOp, I32Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label skip;
|
||||
e.CBZ(i.src1, skip);
|
||||
e.DebugBreak();
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct DEBUG_BREAK_TRUE_I64
|
||||
: Sequence<DEBUG_BREAK_TRUE_I64,
|
||||
I<OPCODE_DEBUG_BREAK_TRUE, VoidOp, I64Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label skip;
|
||||
e.CBZ(i.src1, skip);
|
||||
e.DebugBreak();
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct DEBUG_BREAK_TRUE_F32
|
||||
: Sequence<DEBUG_BREAK_TRUE_F32,
|
||||
I<OPCODE_DEBUG_BREAK_TRUE, VoidOp, F32Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label skip;
|
||||
e.FCMP(i.src1, 0);
|
||||
e.B(Cond::EQ, skip);
|
||||
e.DebugBreak();
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct DEBUG_BREAK_TRUE_F64
|
||||
: Sequence<DEBUG_BREAK_TRUE_F64,
|
||||
I<OPCODE_DEBUG_BREAK_TRUE, VoidOp, F64Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label skip;
|
||||
e.FCMP(i.src1, 0);
|
||||
e.B(Cond::EQ, skip);
|
||||
e.DebugBreak();
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_DEBUG_BREAK_TRUE, DEBUG_BREAK_TRUE_I8,
|
||||
DEBUG_BREAK_TRUE_I16, DEBUG_BREAK_TRUE_I32,
|
||||
DEBUG_BREAK_TRUE_I64, DEBUG_BREAK_TRUE_F32,
|
||||
DEBUG_BREAK_TRUE_F64);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_TRAP
|
||||
// ============================================================================
|
||||
struct TRAP : Sequence<TRAP, I<OPCODE_TRAP, VoidOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
e.Trap(i.instr->flags);
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_TRAP, TRAP);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_TRAP_TRUE
|
||||
// ============================================================================
|
||||
struct TRAP_TRUE_I8
|
||||
: Sequence<TRAP_TRUE_I8, I<OPCODE_TRAP_TRUE, VoidOp, I8Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label skip;
|
||||
e.CBZ(i.src1, skip);
|
||||
e.Trap(i.instr->flags);
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct TRAP_TRUE_I16
|
||||
: Sequence<TRAP_TRUE_I16, I<OPCODE_TRAP_TRUE, VoidOp, I16Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label skip;
|
||||
e.CBZ(i.src1, skip);
|
||||
e.Trap(i.instr->flags);
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct TRAP_TRUE_I32
|
||||
: Sequence<TRAP_TRUE_I32, I<OPCODE_TRAP_TRUE, VoidOp, I32Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label skip;
|
||||
e.CBZ(i.src1, skip);
|
||||
e.Trap(i.instr->flags);
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct TRAP_TRUE_I64
|
||||
: Sequence<TRAP_TRUE_I64, I<OPCODE_TRAP_TRUE, VoidOp, I64Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label skip;
|
||||
e.CBZ(i.src1, skip);
|
||||
e.Trap(i.instr->flags);
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct TRAP_TRUE_F32
|
||||
: Sequence<TRAP_TRUE_F32, I<OPCODE_TRAP_TRUE, VoidOp, F32Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label skip;
|
||||
e.FCMP(i.src1, 0);
|
||||
e.B(Cond::EQ, skip);
|
||||
e.Trap(i.instr->flags);
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct TRAP_TRUE_F64
|
||||
: Sequence<TRAP_TRUE_F64, I<OPCODE_TRAP_TRUE, VoidOp, F64Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label skip;
|
||||
e.FCMP(i.src1, 0);
|
||||
e.B(Cond::EQ, skip);
|
||||
e.Trap(i.instr->flags);
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_TRAP_TRUE, TRAP_TRUE_I8, TRAP_TRUE_I16,
|
||||
TRAP_TRUE_I32, TRAP_TRUE_I64, TRAP_TRUE_F32,
|
||||
TRAP_TRUE_F64);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_CALL
|
||||
// ============================================================================
|
||||
struct CALL : Sequence<CALL, I<OPCODE_CALL, VoidOp, SymbolOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
assert_true(i.src1.value->is_guest());
|
||||
e.Call(i.instr, static_cast<GuestFunction*>(i.src1.value));
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_CALL, CALL);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_CALL_TRUE
|
||||
// ============================================================================
|
||||
struct CALL_TRUE_I8
|
||||
: Sequence<CALL_TRUE_I8, I<OPCODE_CALL_TRUE, VoidOp, I8Op, SymbolOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
assert_true(i.src2.value->is_guest());
|
||||
oaknut::Label skip;
|
||||
e.CBZ(i.src1, skip);
|
||||
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct CALL_TRUE_I16
|
||||
: Sequence<CALL_TRUE_I16, I<OPCODE_CALL_TRUE, VoidOp, I16Op, SymbolOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
assert_true(i.src2.value->is_guest());
|
||||
oaknut::Label skip;
|
||||
e.CBZ(i.src1, skip);
|
||||
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct CALL_TRUE_I32
|
||||
: Sequence<CALL_TRUE_I32, I<OPCODE_CALL_TRUE, VoidOp, I32Op, SymbolOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
assert_true(i.src2.value->is_guest());
|
||||
oaknut::Label skip;
|
||||
e.CBZ(i.src1, skip);
|
||||
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct CALL_TRUE_I64
|
||||
: Sequence<CALL_TRUE_I64, I<OPCODE_CALL_TRUE, VoidOp, I64Op, SymbolOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
assert_true(i.src2.value->is_guest());
|
||||
oaknut::Label skip;
|
||||
e.CBZ(i.src1, skip);
|
||||
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct CALL_TRUE_F32
|
||||
: Sequence<CALL_TRUE_F32, I<OPCODE_CALL_TRUE, VoidOp, F32Op, SymbolOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
assert_true(i.src2.value->is_guest());
|
||||
oaknut::Label skip;
|
||||
e.FCMP(i.src1, 0);
|
||||
e.B(Cond::EQ, skip);
|
||||
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct CALL_TRUE_F64
|
||||
: Sequence<CALL_TRUE_F64, I<OPCODE_CALL_TRUE, VoidOp, F64Op, SymbolOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
assert_true(i.src2.value->is_guest());
|
||||
oaknut::Label skip;
|
||||
e.FCMP(i.src1, 0);
|
||||
e.B(Cond::EQ, skip);
|
||||
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_CALL_TRUE, CALL_TRUE_I8, CALL_TRUE_I16,
|
||||
CALL_TRUE_I32, CALL_TRUE_I64, CALL_TRUE_F32,
|
||||
CALL_TRUE_F64);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_CALL_INDIRECT
|
||||
// ============================================================================
|
||||
struct CALL_INDIRECT
|
||||
: Sequence<CALL_INDIRECT, I<OPCODE_CALL_INDIRECT, VoidOp, I64Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
e.CallIndirect(i.instr, i.src1);
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_CALL_INDIRECT, CALL_INDIRECT);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_CALL_INDIRECT_TRUE
|
||||
// ============================================================================
|
||||
struct CALL_INDIRECT_TRUE_I8
|
||||
: Sequence<CALL_INDIRECT_TRUE_I8,
|
||||
I<OPCODE_CALL_INDIRECT_TRUE, VoidOp, I8Op, I64Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label skip;
|
||||
e.CBZ(i.src1, skip);
|
||||
e.CallIndirect(i.instr, i.src2);
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct CALL_INDIRECT_TRUE_I16
|
||||
: Sequence<CALL_INDIRECT_TRUE_I16,
|
||||
I<OPCODE_CALL_INDIRECT_TRUE, VoidOp, I16Op, I64Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label skip;
|
||||
e.CBZ(i.src1, skip);
|
||||
e.CallIndirect(i.instr, i.src2);
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct CALL_INDIRECT_TRUE_I32
|
||||
: Sequence<CALL_INDIRECT_TRUE_I32,
|
||||
I<OPCODE_CALL_INDIRECT_TRUE, VoidOp, I32Op, I64Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label skip;
|
||||
e.CBZ(i.src1, skip);
|
||||
e.CallIndirect(i.instr, i.src2);
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct CALL_INDIRECT_TRUE_I64
|
||||
: Sequence<CALL_INDIRECT_TRUE_I64,
|
||||
I<OPCODE_CALL_INDIRECT_TRUE, VoidOp, I64Op, I64Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label skip;
|
||||
e.CBZ(i.src1, skip);
|
||||
e.CallIndirect(i.instr, i.src2);
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct CALL_INDIRECT_TRUE_F32
|
||||
: Sequence<CALL_INDIRECT_TRUE_F32,
|
||||
I<OPCODE_CALL_INDIRECT_TRUE, VoidOp, F32Op, I64Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label skip;
|
||||
e.FCMP(i.src1, 0);
|
||||
e.B(Cond::EQ, skip);
|
||||
e.CallIndirect(i.instr, i.src2);
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
struct CALL_INDIRECT_TRUE_F64
|
||||
: Sequence<CALL_INDIRECT_TRUE_F64,
|
||||
I<OPCODE_CALL_INDIRECT_TRUE, VoidOp, F64Op, I64Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label skip;
|
||||
e.FCMP(i.src1, 0);
|
||||
e.B(Cond::EQ, skip);
|
||||
e.CallIndirect(i.instr, i.src2);
|
||||
e.l(skip);
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_CALL_INDIRECT_TRUE, CALL_INDIRECT_TRUE_I8,
|
||||
CALL_INDIRECT_TRUE_I16, CALL_INDIRECT_TRUE_I32,
|
||||
CALL_INDIRECT_TRUE_I64, CALL_INDIRECT_TRUE_F32,
|
||||
CALL_INDIRECT_TRUE_F64);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_CALL_EXTERN
|
||||
// ============================================================================
|
||||
struct CALL_EXTERN
|
||||
: Sequence<CALL_EXTERN, I<OPCODE_CALL_EXTERN, VoidOp, SymbolOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
e.CallExtern(i.instr, i.src1.value);
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_CALL_EXTERN, CALL_EXTERN);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_RETURN
|
||||
// ============================================================================
|
||||
struct RETURN : Sequence<RETURN, I<OPCODE_RETURN, VoidOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
// If this is the last instruction in the last block, just let us
|
||||
// fall through.
|
||||
if (i.instr->next || i.instr->block->next) {
|
||||
e.B(e.epilog_label());
|
||||
}
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_RETURN, RETURN);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_RETURN_TRUE
|
||||
// ============================================================================
|
||||
struct RETURN_TRUE_I8
|
||||
: Sequence<RETURN_TRUE_I8, I<OPCODE_RETURN_TRUE, VoidOp, I8Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
e.CBNZ(i.src1, e.epilog_label());
|
||||
}
|
||||
};
|
||||
struct RETURN_TRUE_I16
|
||||
: Sequence<RETURN_TRUE_I16, I<OPCODE_RETURN_TRUE, VoidOp, I16Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
e.CBNZ(i.src1, e.epilog_label());
|
||||
}
|
||||
};
|
||||
struct RETURN_TRUE_I32
|
||||
: Sequence<RETURN_TRUE_I32, I<OPCODE_RETURN_TRUE, VoidOp, I32Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
e.CBNZ(i.src1, e.epilog_label());
|
||||
}
|
||||
};
|
||||
struct RETURN_TRUE_I64
|
||||
: Sequence<RETURN_TRUE_I64, I<OPCODE_RETURN_TRUE, VoidOp, I64Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
e.CBNZ(i.src1, e.epilog_label());
|
||||
}
|
||||
};
|
||||
struct RETURN_TRUE_F32
|
||||
: Sequence<RETURN_TRUE_F32, I<OPCODE_RETURN_TRUE, VoidOp, F32Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
e.FCMP(i.src1, 0);
|
||||
e.B(Cond::NE, e.epilog_label());
|
||||
}
|
||||
};
|
||||
struct RETURN_TRUE_F64
|
||||
: Sequence<RETURN_TRUE_F64, I<OPCODE_RETURN_TRUE, VoidOp, F64Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
e.FCMP(i.src1, 0);
|
||||
e.B(Cond::NE, e.epilog_label());
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_RETURN_TRUE, RETURN_TRUE_I8, RETURN_TRUE_I16,
|
||||
RETURN_TRUE_I32, RETURN_TRUE_I64, RETURN_TRUE_F32,
|
||||
RETURN_TRUE_F64);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_SET_RETURN_ADDRESS
|
||||
// ============================================================================
|
||||
struct SET_RETURN_ADDRESS
|
||||
: Sequence<SET_RETURN_ADDRESS,
|
||||
I<OPCODE_SET_RETURN_ADDRESS, VoidOp, I64Op>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
e.SetReturnAddress(i.src1.constant());
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_SET_RETURN_ADDRESS, SET_RETURN_ADDRESS);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_BRANCH
|
||||
// ============================================================================
|
||||
struct BRANCH : Sequence<BRANCH, I<OPCODE_BRANCH, VoidOp, LabelOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label* label = e.lookup_label(i.src1.value->name);
|
||||
assert_not_null(label);
|
||||
e.B(*label);
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_BRANCH, BRANCH);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_BRANCH_TRUE
|
||||
// ============================================================================
|
||||
struct BRANCH_TRUE_I8
|
||||
: Sequence<BRANCH_TRUE_I8, I<OPCODE_BRANCH_TRUE, VoidOp, I8Op, LabelOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label* label = e.lookup_label(i.src2.value->name);
|
||||
assert_not_null(label);
|
||||
e.CBNZ(i.src1, *label);
|
||||
}
|
||||
};
|
||||
struct BRANCH_TRUE_I16
|
||||
: Sequence<BRANCH_TRUE_I16, I<OPCODE_BRANCH_TRUE, VoidOp, I16Op, LabelOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label* label = e.lookup_label(i.src2.value->name);
|
||||
assert_not_null(label);
|
||||
e.CBNZ(i.src1, *label);
|
||||
}
|
||||
};
|
||||
struct BRANCH_TRUE_I32
|
||||
: Sequence<BRANCH_TRUE_I32, I<OPCODE_BRANCH_TRUE, VoidOp, I32Op, LabelOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label* label = e.lookup_label(i.src2.value->name);
|
||||
assert_not_null(label);
|
||||
e.CBNZ(i.src1, *label);
|
||||
}
|
||||
};
|
||||
struct BRANCH_TRUE_I64
|
||||
: Sequence<BRANCH_TRUE_I64, I<OPCODE_BRANCH_TRUE, VoidOp, I64Op, LabelOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label* label = e.lookup_label(i.src2.value->name);
|
||||
assert_not_null(label);
|
||||
e.CBNZ(i.src1, *label);
|
||||
}
|
||||
};
|
||||
struct BRANCH_TRUE_F32
|
||||
: Sequence<BRANCH_TRUE_F32, I<OPCODE_BRANCH_TRUE, VoidOp, F32Op, LabelOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label* label = e.lookup_label(i.src2.value->name);
|
||||
assert_not_null(label);
|
||||
e.FCMP(i.src1, 0);
|
||||
e.B(Cond::NE, *label);
|
||||
}
|
||||
};
|
||||
struct BRANCH_TRUE_F64
|
||||
: Sequence<BRANCH_TRUE_F64, I<OPCODE_BRANCH_TRUE, VoidOp, F64Op, LabelOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label* label = e.lookup_label(i.src2.value->name);
|
||||
assert_not_null(label);
|
||||
e.FCMP(i.src1, 0);
|
||||
e.B(Cond::NE, *label);
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_BRANCH_TRUE, BRANCH_TRUE_I8, BRANCH_TRUE_I16,
|
||||
BRANCH_TRUE_I32, BRANCH_TRUE_I64, BRANCH_TRUE_F32,
|
||||
BRANCH_TRUE_F64);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_BRANCH_FALSE
|
||||
// ============================================================================
|
||||
struct BRANCH_FALSE_I8
|
||||
: Sequence<BRANCH_FALSE_I8, I<OPCODE_BRANCH_FALSE, VoidOp, I8Op, LabelOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label* label = e.lookup_label(i.src2.value->name);
|
||||
assert_not_null(label);
|
||||
e.CBZ(i.src1, *label);
|
||||
}
|
||||
};
|
||||
struct BRANCH_FALSE_I16
|
||||
: Sequence<BRANCH_FALSE_I16,
|
||||
I<OPCODE_BRANCH_FALSE, VoidOp, I16Op, LabelOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label* label = e.lookup_label(i.src2.value->name);
|
||||
assert_not_null(label);
|
||||
e.CBZ(i.src1, *label);
|
||||
}
|
||||
};
|
||||
struct BRANCH_FALSE_I32
|
||||
: Sequence<BRANCH_FALSE_I32,
|
||||
I<OPCODE_BRANCH_FALSE, VoidOp, I32Op, LabelOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label* label = e.lookup_label(i.src2.value->name);
|
||||
assert_not_null(label);
|
||||
e.CBZ(i.src1, *label);
|
||||
}
|
||||
};
|
||||
struct BRANCH_FALSE_I64
|
||||
: Sequence<BRANCH_FALSE_I64,
|
||||
I<OPCODE_BRANCH_FALSE, VoidOp, I64Op, LabelOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label* label = e.lookup_label(i.src2.value->name);
|
||||
assert_not_null(label);
|
||||
e.CBZ(i.src1, *label);
|
||||
}
|
||||
};
|
||||
struct BRANCH_FALSE_F32
|
||||
: Sequence<BRANCH_FALSE_F32,
|
||||
I<OPCODE_BRANCH_FALSE, VoidOp, F32Op, LabelOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label* label = e.lookup_label(i.src2.value->name);
|
||||
assert_not_null(label);
|
||||
e.FCMP(i.src1, 0);
|
||||
e.B(Cond::EQ, *label);
|
||||
}
|
||||
};
|
||||
struct BRANCH_FALSE_F64
|
||||
: Sequence<BRANCH_FALSE_F64,
|
||||
I<OPCODE_BRANCH_FALSE, VoidOp, F64Op, LabelOp>> {
|
||||
static void Emit(A64Emitter& e, const EmitArgType& i) {
|
||||
oaknut::Label* label = e.lookup_label(i.src2.value->name);
|
||||
assert_not_null(label);
|
||||
e.FCMP(i.src1, 0);
|
||||
e.B(Cond::EQ, *label);
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_BRANCH_FALSE, BRANCH_FALSE_I8, BRANCH_FALSE_I16,
|
||||
BRANCH_FALSE_I32, BRANCH_FALSE_I64, BRANCH_FALSE_F32,
|
||||
BRANCH_FALSE_F64);
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,51 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_CPU_BACKEND_A64_A64_SEQUENCES_H_
|
||||
#define XENIA_CPU_BACKEND_A64_A64_SEQUENCES_H_
|
||||
|
||||
#include "xenia/cpu/hir/instr.h"
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {
|
||||
|
||||
class A64Emitter;
|
||||
|
||||
typedef bool (*SequenceSelectFn)(A64Emitter&, const hir::Instr*);
|
||||
extern std::unordered_map<uint32_t, SequenceSelectFn> sequence_table;
|
||||
|
||||
template <typename T>
|
||||
bool Register() {
|
||||
sequence_table.insert({T::head_key(), T::Select});
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T, typename Tn, typename... Ts>
|
||||
static bool Register() {
|
||||
bool b = true;
|
||||
b = b && Register<T>(); // Call the above function
|
||||
b = b && Register<Tn, Ts...>(); // Call ourself again (recursively)
|
||||
return b;
|
||||
}
|
||||
#define EMITTER_OPCODE_TABLE(name, ...) \
|
||||
const auto A64_INSTR_##name = Register<__VA_ARGS__>();
|
||||
|
||||
bool SelectSequence(A64Emitter* e, const hir::Instr* i,
|
||||
const hir::Instr** new_tail);
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_CPU_BACKEND_A64_A64_SEQUENCES_H_
|
|
@ -0,0 +1,129 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_CPU_BACKEND_A64_A64_STACK_LAYOUT_H_
|
||||
#define XENIA_CPU_BACKEND_A64_A64_STACK_LAYOUT_H_
|
||||
|
||||
#include "xenia/base/vec128.h"
|
||||
#include "xenia/cpu/backend/a64/a64_backend.h"
|
||||
#include "xenia/cpu/backend/a64/a64_emitter.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {
|
||||
|
||||
class StackLayout {
|
||||
public:
|
||||
/**
|
||||
* Stack Layout
|
||||
* ----------------------------
|
||||
* NOTE: stack must always be 16b aligned.
|
||||
*
|
||||
* Thunk stack:
|
||||
* Non-Volatile Volatile
|
||||
* +------------------+------------------+
|
||||
* | arg temp, 3 * 8 | arg temp, 3 * 8 | sp + 0x000
|
||||
* | | |
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | rbx | (unused) | sp + 0x018
|
||||
* +------------------+------------------+
|
||||
* | rbp | X1 | sp + 0x020
|
||||
* +------------------+------------------+
|
||||
* | rcx (Win32) | X2 | sp + 0x028
|
||||
* +------------------+------------------+
|
||||
* | rsi (Win32) | X3 | sp + 0x030
|
||||
* +------------------+------------------+
|
||||
* | rdi (Win32) | X4 | sp + 0x038
|
||||
* +------------------+------------------+
|
||||
* | r12 | X5 | sp + 0x040
|
||||
* +------------------+------------------+
|
||||
* | r13 | X6 | sp + 0x048
|
||||
* +------------------+------------------+
|
||||
* | r14 | X7 | sp + 0x050
|
||||
* +------------------+------------------+
|
||||
* | r15 | X8 | sp + 0x058
|
||||
* +------------------+------------------+
|
||||
* | xmm6 (Win32) | X9 | sp + 0x060
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm7 (Win32) | X10 | sp + 0x070
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm8 (Win32) | X11 | sp + 0x080
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm9 (Win32) | X12 | sp + 0x090
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm10 (Win32) | X13 | sp + 0x0A0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm11 (Win32) | X14 | sp + 0x0B0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm12 (Win32) | X15 | sp + 0x0C0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm13 (Win32) | X16 | sp + 0x0D0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm14 (Win32) | X17 | sp + 0x0E0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm15 (Win32) | X18 | sp + 0x0F0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
*/
|
||||
XEPACKEDSTRUCT(Thunk, {
|
||||
uint64_t arg_temp[3];
|
||||
uint64_t r[17];
|
||||
vec128_t xmm[22];
|
||||
});
|
||||
static_assert(sizeof(Thunk) % 16 == 0,
|
||||
"sizeof(Thunk) must be a multiple of 16!");
|
||||
static const size_t THUNK_STACK_SIZE = sizeof(Thunk);
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* Guest stack:
|
||||
* +------------------+
|
||||
* | arg temp, 3 * 8 | sp + 0
|
||||
* | |
|
||||
* | |
|
||||
* +------------------+
|
||||
* | scratch, 48b | sp + 32(kStashOffset)
|
||||
* | |
|
||||
* +------------------+
|
||||
* | X0 / context | sp + 80
|
||||
* +------------------+
|
||||
* | guest ret addr | sp + 88
|
||||
* +------------------+
|
||||
* | call ret addr | sp + 96
|
||||
* +------------------+
|
||||
* ... locals ...
|
||||
* +------------------+
|
||||
* | (return address) |
|
||||
* +------------------+
|
||||
*
|
||||
*/
|
||||
static const size_t GUEST_STACK_SIZE = 96 + 16;
|
||||
static const size_t GUEST_CTX_HOME = 80;
|
||||
static const size_t GUEST_RET_ADDR = 88;
|
||||
static const size_t GUEST_CALL_RET_ADDR = 96;
|
||||
};
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_CPU_BACKEND_A64_A64_STACK_LAYOUT_H_
|
|
@ -0,0 +1,225 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/cpu/backend/a64/a64_tracers.h"
|
||||
|
||||
#include <cinttypes>
|
||||
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/vec128.h"
|
||||
#include "xenia/cpu/backend/a64/a64_emitter.h"
|
||||
#include "xenia/cpu/processor.h"
|
||||
#include "xenia/cpu/thread_state.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {
|
||||
|
||||
#define ITRACE 0
|
||||
#define DTRACE 0
|
||||
|
||||
#define TARGET_THREAD 0
|
||||
|
||||
bool trace_enabled = true;
|
||||
|
||||
#define THREAD_MATCH \
|
||||
(!TARGET_THREAD || thread_state->thread_id() == TARGET_THREAD)
|
||||
#define IFLUSH()
|
||||
#define IPRINT(s) \
|
||||
if (trace_enabled && THREAD_MATCH) \
|
||||
xe::logging::AppendLogLine(xe::LogLevel::Debug, 't', s)
|
||||
#define DFLUSH()
|
||||
#define DPRINT(...) \
|
||||
if (trace_enabled && THREAD_MATCH) \
|
||||
xe::logging::AppendLogLineFormat(xe::LogLevel::Debug, 't', __VA_ARGS__)
|
||||
|
||||
uint32_t GetTracingMode() {
|
||||
uint32_t mode = 0;
|
||||
#if ITRACE
|
||||
mode |= TRACING_INSTR;
|
||||
#endif // ITRACE
|
||||
#if DTRACE
|
||||
mode |= TRACING_DATA;
|
||||
#endif // DTRACE
|
||||
return mode;
|
||||
}
|
||||
|
||||
void TraceString(void* raw_context, const char* str) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
IPRINT(str);
|
||||
IFLUSH();
|
||||
}
|
||||
|
||||
void TraceContextLoadI8(void* raw_context, uint64_t offset, uint8_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("{} ({:X}) = ctx i8 +{}\n", (int8_t)value, value, offset);
|
||||
}
|
||||
void TraceContextLoadI16(void* raw_context, uint64_t offset, uint16_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("{} ({:X}) = ctx i16 +{}\n", (int16_t)value, value, offset);
|
||||
}
|
||||
void TraceContextLoadI32(void* raw_context, uint64_t offset, uint32_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("{} ({:X}) = ctx i32 +{}\n", (int32_t)value, value, offset);
|
||||
}
|
||||
void TraceContextLoadI64(void* raw_context, uint64_t offset, uint64_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("{} ({:X}) = ctx i64 +{}\n", (int64_t)value, value, offset);
|
||||
}
|
||||
void TraceContextLoadF32(void* raw_context, uint64_t offset,
|
||||
float32x4_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("{} ({:X}) = ctx f32 +{}\n", xe::m128_f32<0>(value),
|
||||
xe::m128_i32<0>(value), offset);
|
||||
}
|
||||
void TraceContextLoadF64(void* raw_context, uint64_t offset,
|
||||
const double* value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
// auto v = _mm_loadu_pd(value);
|
||||
auto v = vld1q_f64(value);
|
||||
DPRINT("{} ({:X}) = ctx f64 +{}\n", xe::m128_f64<0>(v), xe::m128_i64<0>(v),
|
||||
offset);
|
||||
}
|
||||
void TraceContextLoadV128(void* raw_context, uint64_t offset,
|
||||
float32x4_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("[{}, {}, {}, {}] [{:08X}, {:08X}, {:08X}, {:08X}] = ctx v128 +{}\n",
|
||||
xe::m128_f32<0>(value), xe::m128_f32<1>(value), xe::m128_f32<2>(value),
|
||||
xe::m128_f32<3>(value), xe::m128_i32<0>(value), xe::m128_i32<1>(value),
|
||||
xe::m128_i32<2>(value), xe::m128_i32<3>(value), offset);
|
||||
}
|
||||
|
||||
void TraceContextStoreI8(void* raw_context, uint64_t offset, uint8_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("ctx i8 +{} = {} ({:X})\n", offset, (int8_t)value, value);
|
||||
}
|
||||
void TraceContextStoreI16(void* raw_context, uint64_t offset, uint16_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("ctx i16 +{} = {} ({:X})\n", offset, (int16_t)value, value);
|
||||
}
|
||||
void TraceContextStoreI32(void* raw_context, uint64_t offset, uint32_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("ctx i32 +{} = {} ({:X})\n", offset, (int32_t)value, value);
|
||||
}
|
||||
void TraceContextStoreI64(void* raw_context, uint64_t offset, uint64_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("ctx i64 +{} = {} ({:X})\n", offset, (int64_t)value, value);
|
||||
}
|
||||
void TraceContextStoreF32(void* raw_context, uint64_t offset,
|
||||
float32x4_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("ctx f32 +{} = {} ({:X})\n", offset, xe::m128_f32<0>(value),
|
||||
xe::m128_i32<0>(value));
|
||||
}
|
||||
void TraceContextStoreF64(void* raw_context, uint64_t offset,
|
||||
const double* value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
// auto v = _mm_loadu_pd(value);
|
||||
auto v = vld1q_f64(value);
|
||||
DPRINT("ctx f64 +{} = {} ({:X})\n", offset, xe::m128_f64<0>(v),
|
||||
xe::m128_i64<0>(v));
|
||||
}
|
||||
void TraceContextStoreV128(void* raw_context, uint64_t offset,
|
||||
float32x4_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("ctx v128 +{} = [{}, {}, {}, {}] [{:08X}, {:08X}, {:08X}, {:08X}]\n",
|
||||
offset, xe::m128_f32<0>(value), xe::m128_f32<1>(value),
|
||||
xe::m128_f32<2>(value), xe::m128_f32<3>(value), xe::m128_i32<0>(value),
|
||||
xe::m128_i32<1>(value), xe::m128_i32<2>(value),
|
||||
xe::m128_i32<3>(value));
|
||||
}
|
||||
|
||||
void TraceMemoryLoadI8(void* raw_context, uint32_t address, uint8_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("{} ({:X}) = load.i8 {:08X}\n", (int8_t)value, value, address);
|
||||
}
|
||||
void TraceMemoryLoadI16(void* raw_context, uint32_t address, uint16_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("{} ({:X}) = load.i16 {:08X}\n", (int16_t)value, value, address);
|
||||
}
|
||||
void TraceMemoryLoadI32(void* raw_context, uint32_t address, uint32_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("{} ({:X}) = load.i32 {:08X}\n", (int32_t)value, value, address);
|
||||
}
|
||||
void TraceMemoryLoadI64(void* raw_context, uint32_t address, uint64_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("{} ({:X}) = load.i64 {:08X}\n", (int64_t)value, value, address);
|
||||
}
|
||||
void TraceMemoryLoadF32(void* raw_context, uint32_t address,
|
||||
float32x4_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("{} ({:X}) = load.f32 {:08X}\n", xe::m128_f32<0>(value),
|
||||
xe::m128_i32<0>(value), address);
|
||||
}
|
||||
void TraceMemoryLoadF64(void* raw_context, uint32_t address,
|
||||
float64x2_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("{} ({:X}) = load.f64 {:08X}\n", xe::m128_f64<0>(value),
|
||||
xe::m128_i64<0>(value), address);
|
||||
}
|
||||
void TraceMemoryLoadV128(void* raw_context, uint32_t address,
|
||||
float32x4_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT(
|
||||
"[{}, {}, {}, {}] [{:08X}, {:08X}, {:08X}, {:08X}] = load.v128 {:08X}\n",
|
||||
xe::m128_f32<0>(value), xe::m128_f32<1>(value), xe::m128_f32<2>(value),
|
||||
xe::m128_f32<3>(value), xe::m128_i32<0>(value), xe::m128_i32<1>(value),
|
||||
xe::m128_i32<2>(value), xe::m128_i32<3>(value), address);
|
||||
}
|
||||
|
||||
void TraceMemoryStoreI8(void* raw_context, uint32_t address, uint8_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("store.i8 {:08X} = {} ({:X})\n", address, (int8_t)value, value);
|
||||
}
|
||||
void TraceMemoryStoreI16(void* raw_context, uint32_t address, uint16_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("store.i16 {:08X} = {} ({:X})\n", address, (int16_t)value, value);
|
||||
}
|
||||
void TraceMemoryStoreI32(void* raw_context, uint32_t address, uint32_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("store.i32 {:08X} = {} ({:X})\n", address, (int32_t)value, value);
|
||||
}
|
||||
void TraceMemoryStoreI64(void* raw_context, uint32_t address, uint64_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("store.i64 {:08X} = {} ({:X})\n", address, (int64_t)value, value);
|
||||
}
|
||||
void TraceMemoryStoreF32(void* raw_context, uint32_t address,
|
||||
float32x4_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("store.f32 {:08X} = {} ({:X})\n", address, xe::m128_f32<0>(value),
|
||||
xe::m128_i32<0>(value));
|
||||
}
|
||||
void TraceMemoryStoreF64(void* raw_context, uint32_t address,
|
||||
float64x2_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("store.f64 {:08X} = {} ({:X})\n", address, xe::m128_f64<0>(value),
|
||||
xe::m128_i64<0>(value));
|
||||
}
|
||||
void TraceMemoryStoreV128(void* raw_context, uint32_t address,
|
||||
float32x4_t value) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT(
|
||||
"store.v128 {:08X} = [{}, {}, {}, {}] [{:08X}, {:08X}, {:08X}, {:08X}]\n",
|
||||
address, xe::m128_f32<0>(value), xe::m128_f32<1>(value),
|
||||
xe::m128_f32<2>(value), xe::m128_f32<3>(value), xe::m128_i32<0>(value),
|
||||
xe::m128_i32<1>(value), xe::m128_i32<2>(value), xe::m128_i32<3>(value));
|
||||
}
|
||||
|
||||
void TraceMemset(void* raw_context, uint32_t address, uint8_t value,
|
||||
uint32_t length) {
|
||||
auto thread_state = *reinterpret_cast<ThreadState**>(raw_context);
|
||||
DPRINT("memset {:08X}-{:08X} ({}) = {:02X}", address, address + length,
|
||||
length, value);
|
||||
}
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,82 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_CPU_BACKEND_A64_A64_TRACERS_H_
|
||||
#define XENIA_CPU_BACKEND_A64_A64_TRACERS_H_
|
||||
|
||||
#include <arm64_neon.h>
|
||||
#include <cstdint>
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {
|
||||
class A64Emitter;
|
||||
|
||||
enum TracingMode {
|
||||
TRACING_INSTR = (1 << 1),
|
||||
TRACING_DATA = (1 << 2),
|
||||
};
|
||||
|
||||
uint32_t GetTracingMode();
|
||||
inline bool IsTracingInstr() { return (GetTracingMode() & TRACING_INSTR) != 0; }
|
||||
inline bool IsTracingData() { return (GetTracingMode() & TRACING_DATA) != 0; }
|
||||
|
||||
void TraceString(void* raw_context, const char* str);
|
||||
|
||||
void TraceContextLoadI8(void* raw_context, uint64_t offset, uint8_t value);
|
||||
void TraceContextLoadI16(void* raw_context, uint64_t offset, uint16_t value);
|
||||
void TraceContextLoadI32(void* raw_context, uint64_t offset, uint32_t value);
|
||||
void TraceContextLoadI64(void* raw_context, uint64_t offset, uint64_t value);
|
||||
void TraceContextLoadF32(void* raw_context, uint64_t offset, float32x4_t value);
|
||||
void TraceContextLoadF64(void* raw_context, uint64_t offset,
|
||||
const double* value);
|
||||
void TraceContextLoadV128(void* raw_context, uint64_t offset,
|
||||
float32x4_t value);
|
||||
|
||||
void TraceContextStoreI8(void* raw_context, uint64_t offset, uint8_t value);
|
||||
void TraceContextStoreI16(void* raw_context, uint64_t offset, uint16_t value);
|
||||
void TraceContextStoreI32(void* raw_context, uint64_t offset, uint32_t value);
|
||||
void TraceContextStoreI64(void* raw_context, uint64_t offset, uint64_t value);
|
||||
void TraceContextStoreF32(void* raw_context, uint64_t offset,
|
||||
float32x4_t value);
|
||||
void TraceContextStoreF64(void* raw_context, uint64_t offset,
|
||||
const double* value);
|
||||
void TraceContextStoreV128(void* raw_context, uint64_t offset,
|
||||
float32x4_t value);
|
||||
|
||||
void TraceMemoryLoadI8(void* raw_context, uint32_t address, uint8_t value);
|
||||
void TraceMemoryLoadI16(void* raw_context, uint32_t address, uint16_t value);
|
||||
void TraceMemoryLoadI32(void* raw_context, uint32_t address, uint32_t value);
|
||||
void TraceMemoryLoadI64(void* raw_context, uint32_t address, uint64_t value);
|
||||
void TraceMemoryLoadF32(void* raw_context, uint32_t address, float32x4_t value);
|
||||
void TraceMemoryLoadF64(void* raw_context, uint32_t address, float64x2_t value);
|
||||
void TraceMemoryLoadV128(void* raw_context, uint32_t address,
|
||||
float32x4_t value);
|
||||
|
||||
void TraceMemoryStoreI8(void* raw_context, uint32_t address, uint8_t value);
|
||||
void TraceMemoryStoreI16(void* raw_context, uint32_t address, uint16_t value);
|
||||
void TraceMemoryStoreI32(void* raw_context, uint32_t address, uint32_t value);
|
||||
void TraceMemoryStoreI64(void* raw_context, uint32_t address, uint64_t value);
|
||||
void TraceMemoryStoreF32(void* raw_context, uint32_t address,
|
||||
float32x4_t value);
|
||||
void TraceMemoryStoreF64(void* raw_context, uint32_t address,
|
||||
float64x2_t value);
|
||||
void TraceMemoryStoreV128(void* raw_context, uint32_t address,
|
||||
float32x4_t value);
|
||||
|
||||
void TraceMemset(void* raw_context, uint32_t address, uint8_t value,
|
||||
uint32_t length);
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_CPU_BACKEND_A64_A64_TRACERS_H_
|
|
@ -0,0 +1,95 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2024 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_CPU_BACKEND_A64_A64_UTIL_H_
|
||||
#define XENIA_CPU_BACKEND_A64_A64_UTIL_H_
|
||||
|
||||
#include "xenia/base/vec128.h"
|
||||
#include "xenia/cpu/backend/a64/a64_backend.h"
|
||||
#include "xenia/cpu/backend/a64/a64_emitter.h"
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
namespace a64 {
|
||||
|
||||
// Attempts to convert an fp32 bit-value into an fp8-immediate value for FMOV
|
||||
// returns false if the value cannot be represented
|
||||
// C2.2.3 Modified immediate constants in A64 ing-point instructions
|
||||
// abcdefgh
|
||||
// V
|
||||
// aBbbbbbc defgh000 00000000 00000000
|
||||
// B = NOT(b)
|
||||
constexpr bool f32_to_fimm8(uint32_t u32, oaknut::FImm8& fp8) {
|
||||
const uint32_t sign = (u32 >> 31) & 1;
|
||||
int32_t exp = ((u32 >> 23) & 0xff) - 127;
|
||||
int64_t mantissa = u32 & 0x7fffff;
|
||||
|
||||
// Too many mantissa bits
|
||||
if (mantissa & 0x7ffff) {
|
||||
return false;
|
||||
}
|
||||
// Too many exp bits
|
||||
if (exp < -3 || exp > 4) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// mantissa = (16 + e:f:g:h) / 16.
|
||||
mantissa >>= 19;
|
||||
if ((mantissa & 0b1111) != mantissa) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// exp = (NOT(b):c:d) - 3
|
||||
exp = ((exp + 3) & 0b111) ^ 0b100;
|
||||
|
||||
fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa));
|
||||
return true;
|
||||
}
|
||||
|
||||
// Attempts to convert an fp64 bit-value into an fp8-immediate value for FMOV
|
||||
// returns false if the value cannot be represented
|
||||
// C2.2.3 Modified immediate constants in A64 floating-point instructions
|
||||
// abcdefgh
|
||||
// V
|
||||
// aBbbbbbb bbcdefgh 00000000 00000000 00000000 00000000 00000000 00000000
|
||||
// B = NOT(b)
|
||||
constexpr bool f64_to_fimm8(uint64_t u64, oaknut::FImm8& fp8) {
|
||||
const uint32_t sign = (u64 >> 63) & 1;
|
||||
int32_t exp = ((u64 >> 52) & 0x7ff) - 1023;
|
||||
int64_t mantissa = u64 & 0xfffffffffffffULL;
|
||||
|
||||
// Too many mantissa bits
|
||||
if (mantissa & 0xffffffffffffULL) {
|
||||
return false;
|
||||
}
|
||||
// Too many exp bits
|
||||
if (exp < -3 || exp > 4) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// mantissa = (16 + e:f:g:h) / 16.
|
||||
mantissa >>= 48;
|
||||
if ((mantissa & 0b1111) != mantissa) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// exp = (NOT(b):c:d) - 3
|
||||
exp = ((exp + 3) & 0b111) ^ 0b100;
|
||||
|
||||
fp8 = oaknut::FImm8(sign, exp, uint8_t(mantissa));
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace a64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_CPU_BACKEND_A64_A64_UTIL_H_
|
|
@ -0,0 +1,31 @@
|
|||
project_root = "../../../../.."
|
||||
include(project_root.."/tools/build")
|
||||
|
||||
group("src")
|
||||
project("xenia-cpu-backend-a64")
|
||||
uuid("495f3f3e-f5e8-489a-bd0f-289d0495bc08")
|
||||
filter("architecture:ARM64")
|
||||
kind("StaticLib")
|
||||
filter("architecture:not ARM64")
|
||||
kind("None")
|
||||
filter({})
|
||||
language("C++")
|
||||
cppdialect("C++20")
|
||||
links({
|
||||
"fmt",
|
||||
"xenia-base",
|
||||
"xenia-cpu",
|
||||
})
|
||||
defines({
|
||||
})
|
||||
|
||||
disablewarnings({
|
||||
-- Silence errors in oaknut
|
||||
"4146", -- unary minus operator applied to unsigned type, result still unsigned
|
||||
"4267" -- 'initializing': conversion from 'size_t' to 'uint32_t', possible loss of data
|
||||
})
|
||||
|
||||
includedirs({
|
||||
project_root.."/third_party/oaknut/include",
|
||||
})
|
||||
local_platform_files()
|
|
@ -4,7 +4,11 @@ include(project_root.."/tools/build")
|
|||
group("src")
|
||||
project("xenia-cpu-backend-x64")
|
||||
uuid("7d8d5dce-4696-4197-952a-09506f725afe")
|
||||
kind("StaticLib")
|
||||
filter("architecture:x86_64")
|
||||
kind("StaticLib")
|
||||
filter("architecture:not x86_64")
|
||||
kind("None")
|
||||
filter({})
|
||||
language("C++")
|
||||
links({
|
||||
"capstone",
|
||||
|
|
|
@ -48,7 +48,8 @@ std::string Breakpoint::to_string() const {
|
|||
str += " " + functions[0]->name();
|
||||
return str;
|
||||
} else {
|
||||
return std::string("x64 ") + xe::string_util::to_hex_string(host_address());
|
||||
return std::string(XE_HOST_ARCH_NAME " ") +
|
||||
xe::string_util::to_hex_string(host_address());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
#include "xenia/cpu/cpu_flags.h"
|
||||
|
||||
DEFINE_string(cpu, "any", "CPU backend [any, x64].", "CPU");
|
||||
DEFINE_string(cpu, "any", "CPU backend [any, x64, a64].", "CPU");
|
||||
|
||||
DEFINE_string(
|
||||
load_module_map, "",
|
||||
|
|
|
@ -23,6 +23,8 @@
|
|||
|
||||
#if XE_ARCH_AMD64
|
||||
#include "xenia/cpu/backend/x64/x64_backend.h"
|
||||
#elif XE_ARCH_ARM64
|
||||
#include "xenia/cpu/backend/a64/a64_backend.h"
|
||||
#endif // XE_ARCH
|
||||
|
||||
#if XE_COMPILER_MSVC
|
||||
|
@ -203,11 +205,17 @@ class TestRunner {
|
|||
if (cvars::cpu == "x64") {
|
||||
backend.reset(new xe::cpu::backend::x64::X64Backend());
|
||||
}
|
||||
#elif XE_ARCH_ARM64
|
||||
if (cvars::cpu == "a64") {
|
||||
backend.reset(new xe::cpu::backend::a64::A64Backend());
|
||||
}
|
||||
#endif // XE_ARCH
|
||||
if (cvars::cpu == "any") {
|
||||
if (!backend) {
|
||||
#if XE_ARCH_AMD64
|
||||
backend.reset(new xe::cpu::backend::x64::X64Backend());
|
||||
#elif XE_ARCH_ARM64
|
||||
backend.reset(new xe::cpu::backend::a64::A64Backend());
|
||||
#endif // XE_ARCH
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,11 @@ project("xenia-cpu-ppc-tests")
|
|||
links({
|
||||
"xenia-cpu-backend-x64",
|
||||
})
|
||||
filter("platforms:Windows")
|
||||
filter("architecture:ARM64")
|
||||
links({
|
||||
"xenia-cpu-backend-a64",
|
||||
})
|
||||
filter("platforms:Windows-*")
|
||||
debugdir(project_root)
|
||||
debugargs({
|
||||
"2>&1",
|
||||
|
|
|
@ -34,7 +34,11 @@
|
|||
#include "xenia/cpu/xex_module.h"
|
||||
|
||||
// TODO(benvanik): based on compiler support
|
||||
#ifdef XE_ARCH_AMD64
|
||||
#include "xenia/cpu/backend/x64/x64_backend.h"
|
||||
#elif XE_ARCH_ARM64
|
||||
#include "xenia/cpu/backend/a64/a64_backend.h"
|
||||
#endif // XE_ARCH
|
||||
|
||||
#if 0 && DEBUG
|
||||
#define DEFAULT_DEBUG_FLAG true
|
||||
|
|
|
@ -162,7 +162,7 @@ class Processor {
|
|||
// This will cancel any active step operations and resume all threads.
|
||||
void Continue();
|
||||
|
||||
// Steps the given thread a single x64 host instruction.
|
||||
// Steps the given thread a single host instruction.
|
||||
// If the step is over a branch the branch will be followed.
|
||||
void StepHostInstruction(uint32_t thread_id);
|
||||
|
||||
|
|
|
@ -58,6 +58,12 @@ LPSYMFUNCTIONTABLEACCESS64 sym_function_table_access_64_ = nullptr;
|
|||
LPSYMGETMODULEBASE64 sym_get_module_base_64_ = nullptr;
|
||||
LPSYMGETSYMFROMADDR64 sym_get_sym_from_addr_64_ = nullptr;
|
||||
|
||||
#if XE_ARCH_AMD64
|
||||
static const DWORD kMachineType = IMAGE_FILE_MACHINE_AMD64;
|
||||
#elif XE_ARCH_ARM64
|
||||
static const DWORD kMachineType = IMAGE_FILE_MACHINE_ARM64;
|
||||
#endif
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
|
||||
|
@ -173,40 +179,70 @@ class Win32StackWalker : public StackWalker {
|
|||
} else {
|
||||
// Copy thread context local. We will be modifying it during stack
|
||||
// walking, so we don't want to mess with the incoming copy.
|
||||
#if XE_ARCH_AMD64
|
||||
thread_context.Rip = in_host_context->rip;
|
||||
thread_context.EFlags = in_host_context->eflags;
|
||||
std::memcpy(&thread_context.Rax, in_host_context->int_registers,
|
||||
sizeof(in_host_context->int_registers));
|
||||
std::memcpy(&thread_context.Xmm0, in_host_context->xmm_registers,
|
||||
sizeof(in_host_context->xmm_registers));
|
||||
#elif XE_ARCH_ARM64
|
||||
thread_context.Pc = in_host_context->pc;
|
||||
thread_context.Cpsr = in_host_context->cpsr;
|
||||
std::memcpy(thread_context.X, in_host_context->x,
|
||||
sizeof(in_host_context->x));
|
||||
std::memcpy(&thread_context.V, in_host_context->v,
|
||||
sizeof(in_host_context->v));
|
||||
#endif
|
||||
}
|
||||
|
||||
if (out_host_context) {
|
||||
// Write out the captured thread context if the caller asked for it.
|
||||
#if XE_ARCH_AMD64
|
||||
out_host_context->rip = thread_context.Rip;
|
||||
out_host_context->eflags = thread_context.EFlags;
|
||||
std::memcpy(out_host_context->int_registers, &thread_context.Rax,
|
||||
sizeof(out_host_context->int_registers));
|
||||
std::memcpy(out_host_context->xmm_registers, &thread_context.Xmm0,
|
||||
sizeof(out_host_context->xmm_registers));
|
||||
#elif XE_ARCH_ARM64
|
||||
out_host_context->pc = thread_context.Pc;
|
||||
out_host_context->cpsr = thread_context.Cpsr;
|
||||
std::memcpy(out_host_context->x, &thread_context.X,
|
||||
sizeof(out_host_context->x));
|
||||
std::memcpy(out_host_context->v, &thread_context.V,
|
||||
sizeof(out_host_context->v));
|
||||
#endif
|
||||
}
|
||||
|
||||
// Setup the frame for walking.
|
||||
STACKFRAME64 stack_frame = {0};
|
||||
stack_frame.AddrPC.Mode = AddrModeFlat;
|
||||
#if XE_ARCH_AMD64
|
||||
stack_frame.AddrPC.Offset = thread_context.Rip;
|
||||
#elif XE_ARCH_ARM64
|
||||
stack_frame.AddrPC.Offset = thread_context.Pc;
|
||||
#endif
|
||||
stack_frame.AddrFrame.Mode = AddrModeFlat;
|
||||
#if XE_ARCH_AMD64
|
||||
stack_frame.AddrFrame.Offset = thread_context.Rbp;
|
||||
#elif XE_ARCH_ARM64
|
||||
stack_frame.AddrFrame.Offset = thread_context.Fp;
|
||||
#endif
|
||||
stack_frame.AddrStack.Mode = AddrModeFlat;
|
||||
#if XE_ARCH_AMD64
|
||||
stack_frame.AddrStack.Offset = thread_context.Rsp;
|
||||
#elif XE_ARCH_ARM64
|
||||
stack_frame.AddrStack.Offset = thread_context.Sp;
|
||||
#endif
|
||||
|
||||
// Walk the stack.
|
||||
// Note that StackWalk64 is thread safe, though other dbghelp functions are
|
||||
// not.
|
||||
size_t frame_index = 0;
|
||||
while (frame_index < frame_count &&
|
||||
stack_walk_64_(IMAGE_FILE_MACHINE_AMD64, GetCurrentProcess(),
|
||||
thread_handle, &stack_frame, &thread_context, nullptr,
|
||||
stack_walk_64_(kMachineType, GetCurrentProcess(), thread_handle,
|
||||
&stack_frame, &thread_context, nullptr,
|
||||
XSymFunctionTableAccess64, XSymGetModuleBase64,
|
||||
nullptr) == TRUE) {
|
||||
if (frame_index >= frame_offset) {
|
||||
|
@ -237,7 +273,7 @@ class Win32StackWalker : public StackWalker {
|
|||
if (function) {
|
||||
frame.guest_symbol.function = function;
|
||||
// Figure out where in guest code we are by looking up the
|
||||
// displacement in x64 from the JIT'ed code start to the PC.
|
||||
// displacement in bytes from the JIT'ed code start to the PC.
|
||||
if (function->is_guest()) {
|
||||
auto guest_function = static_cast<GuestFunction*>(function);
|
||||
// Adjust the host PC by -1 so that we will go back into whatever
|
||||
|
|
|
@ -19,6 +19,12 @@ test_suite("xenia-cpu-tests", project_root, ".", {
|
|||
links = {
|
||||
"xenia-cpu-backend-x64",
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
filter = 'architecture:ARM64',
|
||||
links = {
|
||||
"xenia-cpu-backend-a64",
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
|
|
@ -13,7 +13,12 @@
|
|||
#include <vector>
|
||||
|
||||
#include "xenia/base/platform.h"
|
||||
#if XE_ARCH_AMD64
|
||||
#include "xenia/cpu/backend/x64/x64_backend.h"
|
||||
#elif XE_ARCH_ARM64
|
||||
#include "xenia/cpu/backend/a64/a64_backend.h"
|
||||
#endif // XE_ARCH
|
||||
|
||||
#include "xenia/cpu/hir/hir_builder.h"
|
||||
#include "xenia/cpu/ppc/ppc_context.h"
|
||||
#include "xenia/cpu/ppc/ppc_frontend.h"
|
||||
|
@ -39,6 +44,8 @@ class TestFunction {
|
|||
std::unique_ptr<xe::cpu::backend::Backend> backend;
|
||||
#if XE_ARCH_AMD64
|
||||
backend.reset(new xe::cpu::backend::x64::X64Backend());
|
||||
#elif XE_ARCH_ARM64
|
||||
backend.reset(new xe::cpu::backend::a64::A64Backend());
|
||||
#endif // XE_ARCH
|
||||
if (backend) {
|
||||
auto processor = std::make_unique<Processor>(memory.get(), nullptr);
|
||||
|
@ -74,7 +81,7 @@ class TestFunction {
|
|||
uint32_t stack_address = memory_size - stack_size;
|
||||
uint32_t thread_state_address = stack_address - 0x1000;
|
||||
auto thread_state = std::make_unique<ThreadState>(processor.get(), 0x100);
|
||||
assert_always(); // TODO: Allocate a thread stack!!!
|
||||
// assert_always(); // TODO: Allocate a thread stack!!!
|
||||
auto ctx = thread_state->context();
|
||||
ctx->lr = 0xBCBCBCBC;
|
||||
|
||||
|
|
|
@ -63,7 +63,13 @@ DebugWindow::DebugWindow(Emulator* emulator,
|
|||
processor_(emulator->processor()),
|
||||
app_context_(app_context),
|
||||
window_(xe::ui::Window::Create(app_context_, kBaseTitle, 1500, 1000)) {
|
||||
if (cs_open(CS_ARCH_X86, CS_MODE_64, &capstone_handle_) != CS_ERR_OK) {
|
||||
if (
|
||||
#ifdef XE_ARCH_AMD64
|
||||
cs_open(CS_ARCH_X86, CS_MODE_64, &capstone_handle_)
|
||||
#elif XE_ARCH_ARM64
|
||||
cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &capstone_handle_)
|
||||
#endif
|
||||
!= CS_ERR_OK) {
|
||||
assert_always("Failed to initialize capstone");
|
||||
}
|
||||
cs_option(capstone_handle_, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL);
|
||||
|
@ -338,7 +344,7 @@ void DebugWindow::DrawSourcePane() {
|
|||
// copy button
|
||||
// address start - end
|
||||
// name text box (editable)
|
||||
// combo for interleaved + [ppc, hir, opt hir, x64 + byte with sizes]
|
||||
// combo for interleaved + [ppc, hir, opt hir, asm + byte with sizes]
|
||||
ImGui::AlignTextToFramePadding();
|
||||
ImGui::Text("%s", function->module()->name().c_str());
|
||||
ImGui::SameLine();
|
||||
|
@ -383,11 +389,11 @@ void DebugWindow::DrawSourcePane() {
|
|||
}
|
||||
ImGui::SameLine();
|
||||
if (state_.source_display_mode > 0) {
|
||||
// Only show x64 step button if we have x64 visible.
|
||||
// Only show asm step button if we have asm visible.
|
||||
ImGui::Dummy(ImVec2(4, 0));
|
||||
ImGui::SameLine();
|
||||
ImGui::PushButtonRepeat(true);
|
||||
if (ImGui::ButtonEx("Step x64", ImVec2(0, 0),
|
||||
if (ImGui::ButtonEx("Step " XE_HOST_ARCH_NAME, ImVec2(0, 0),
|
||||
can_step ? 0 : ImGuiItemFlags_Disabled)) {
|
||||
// By enabling the button when stepping we allow repeat behavior.
|
||||
if (processor_->execution_state() != cpu::ExecutionState::kStepping) {
|
||||
|
@ -396,8 +402,8 @@ void DebugWindow::DrawSourcePane() {
|
|||
}
|
||||
ImGui::PopButtonRepeat();
|
||||
if (ImGui::IsItemHovered()) {
|
||||
ImGui::SetTooltip(
|
||||
"Step one x64 instruction on the current thread (hold for many).");
|
||||
ImGui::SetTooltip("Step one " XE_HOST_ARCH_NAME
|
||||
" instruction on the current thread (hold for many).");
|
||||
}
|
||||
ImGui::SameLine();
|
||||
}
|
||||
|
@ -412,9 +418,9 @@ void DebugWindow::DrawSourcePane() {
|
|||
if (function->is_guest()) {
|
||||
const char* kSourceDisplayModes[] = {
|
||||
"PPC",
|
||||
"PPC+HIR+x64",
|
||||
"PPC+HIR (opt)+x64",
|
||||
"PPC+x64",
|
||||
"PPC+HIR+" XE_HOST_ARCH_NAME,
|
||||
"PPC+HIR (opt)+" XE_HOST_ARCH_NAME,
|
||||
"PPC+" XE_HOST_ARCH_NAME,
|
||||
};
|
||||
ImGui::PushItemWidth(90);
|
||||
ImGui::Combo("##display_mode", &state_.source_display_mode,
|
||||
|
@ -459,7 +465,7 @@ void DebugWindow::DrawGuestFunctionSource() {
|
|||
// labels get their own line with duped addresses
|
||||
// show xrefs to labels?
|
||||
// hir greyed and offset (background color change?)
|
||||
// x64 greyed and offset with native address
|
||||
// asm greyed and offset with native address
|
||||
// hover on registers/etc for tooltip/highlight others
|
||||
// click register to go to location of last write
|
||||
// click code address to jump to code
|
||||
|
@ -472,18 +478,18 @@ void DebugWindow::DrawGuestFunctionSource() {
|
|||
|
||||
bool draw_hir = false;
|
||||
bool draw_hir_opt = false;
|
||||
bool draw_x64 = false;
|
||||
bool draw_asm = false;
|
||||
switch (state_.source_display_mode) {
|
||||
case 1:
|
||||
draw_hir = true;
|
||||
draw_x64 = true;
|
||||
draw_asm = true;
|
||||
break;
|
||||
case 2:
|
||||
draw_hir_opt = true;
|
||||
draw_x64 = true;
|
||||
draw_asm = true;
|
||||
break;
|
||||
case 3:
|
||||
draw_x64 = true;
|
||||
draw_asm = true;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -498,8 +504,8 @@ void DebugWindow::DrawGuestFunctionSource() {
|
|||
if (draw_hir_opt) {
|
||||
// TODO(benvanik): get HIR and draw preamble.
|
||||
}
|
||||
if (draw_x64) {
|
||||
// x64 preamble.
|
||||
if (draw_asm) {
|
||||
// asm preamble.
|
||||
DrawMachineCodeSource(function->machine_code(), source_map[0].code_offset);
|
||||
}
|
||||
|
||||
|
@ -512,7 +518,7 @@ void DebugWindow::DrawGuestFunctionSource() {
|
|||
bool is_current_instr = address == guest_pc;
|
||||
if (is_current_instr) {
|
||||
ImGui::PushStyleColor(ImGuiCol_Text, ImVec4(0.0f, 1.0f, 0.0f, 1.0f));
|
||||
if (!draw_x64) {
|
||||
if (!draw_asm) {
|
||||
ScrollToSourceIfPcChanged();
|
||||
}
|
||||
}
|
||||
|
@ -548,7 +554,7 @@ void DebugWindow::DrawGuestFunctionSource() {
|
|||
if (draw_hir_opt) {
|
||||
// TODO(benvanik): get HIR and draw for this PPC function.
|
||||
}
|
||||
if (draw_x64) {
|
||||
if (draw_asm) {
|
||||
const uint8_t* machine_code_start =
|
||||
function->machine_code() + source_map[source_map_index].code_offset;
|
||||
const size_t machine_code_length =
|
||||
|
@ -851,10 +857,10 @@ void DebugWindow::DrawRegistersPane() {
|
|||
if (state_.register_group == RegisterGroup::kHostGeneral) {
|
||||
ImGui::PushStyleColor(ImGuiCol_Button,
|
||||
ImGui::GetStyle().Colors[ImGuiCol_ButtonActive]);
|
||||
ImGui::Button("x64");
|
||||
ImGui::Button(XE_HOST_ARCH_NAME);
|
||||
ImGui::PopStyleColor();
|
||||
} else {
|
||||
if (ImGui::Button("x64")) {
|
||||
if (ImGui::Button(XE_HOST_ARCH_NAME)) {
|
||||
state_.register_group = RegisterGroup::kHostGeneral;
|
||||
}
|
||||
}
|
||||
|
@ -862,10 +868,10 @@ void DebugWindow::DrawRegistersPane() {
|
|||
if (state_.register_group == RegisterGroup::kHostVector) {
|
||||
ImGui::PushStyleColor(ImGuiCol_Button,
|
||||
ImGui::GetStyle().Colors[ImGuiCol_ButtonActive]);
|
||||
ImGui::Button("XMM");
|
||||
ImGui::Button(XE_HOST_ARCH_NAME "-vec");
|
||||
ImGui::PopStyleColor();
|
||||
} else {
|
||||
if (ImGui::Button("XMM")) {
|
||||
if (ImGui::Button(XE_HOST_ARCH_NAME "-vec")) {
|
||||
state_.register_group = RegisterGroup::kHostVector;
|
||||
}
|
||||
}
|
||||
|
@ -958,6 +964,7 @@ void DebugWindow::DrawRegistersPane() {
|
|||
} break;
|
||||
case RegisterGroup::kHostGeneral: {
|
||||
ImGui::BeginChild("##host_general");
|
||||
#if XE_ARCH_AMD64
|
||||
for (int i = 0; i < 18; ++i) {
|
||||
auto reg = static_cast<X64Register>(i);
|
||||
ImGui::BeginGroup();
|
||||
|
@ -995,6 +1002,46 @@ void DebugWindow::DrawRegistersPane() {
|
|||
i, thread_info->host_context.xmm_registers[i].f32);
|
||||
ImGui::EndGroup();
|
||||
}
|
||||
#elif XE_ARCH_ARM64
|
||||
// TODO(wunkolo): print ARM64 registers
|
||||
for (int i = 0; i < 34; ++i) {
|
||||
auto reg = static_cast<Arm64Register>(i);
|
||||
ImGui::BeginGroup();
|
||||
ImGui::AlignTextToFramePadding();
|
||||
ImGui::Text("%3s", HostThreadContext::GetRegisterName(reg));
|
||||
ImGui::SameLine();
|
||||
ImGui::Dummy(ImVec2(4, 0));
|
||||
ImGui::SameLine();
|
||||
if (reg == Arm64Register::kPc) {
|
||||
dirty_guest_context |=
|
||||
DrawRegisterTextBox(i, &thread_info->host_context.pc);
|
||||
} else if (reg == Arm64Register::kPstate) {
|
||||
dirty_guest_context =
|
||||
DrawRegisterTextBox(i, &thread_info->host_context.cpsr);
|
||||
} else {
|
||||
dirty_guest_context |=
|
||||
DrawRegisterTextBox(i, &thread_info->host_context.x[i]);
|
||||
}
|
||||
ImGui::EndGroup();
|
||||
}
|
||||
ImGui::EndChild();
|
||||
} break;
|
||||
case RegisterGroup::kHostVector: {
|
||||
ImGui::BeginChild("##host_vector");
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
auto reg = static_cast<Arm64Register>(
|
||||
static_cast<int>(Arm64Register::kV0) + i);
|
||||
ImGui::BeginGroup();
|
||||
ImGui::AlignTextToFramePadding();
|
||||
ImGui::Text("%5s", HostThreadContext::GetRegisterName(reg));
|
||||
ImGui::SameLine();
|
||||
ImGui::Dummy(ImVec2(4, 0));
|
||||
ImGui::SameLine();
|
||||
dirty_host_context |=
|
||||
DrawRegisterTextBoxes(i, thread_info->host_context.v[i].f32);
|
||||
ImGui::EndGroup();
|
||||
}
|
||||
#endif
|
||||
ImGui::EndChild();
|
||||
}
|
||||
}
|
||||
|
@ -1144,7 +1191,8 @@ void DebugWindow::DrawBreakpointsPane() {
|
|||
ImGui::OpenPopup("##add_code_breakpoint");
|
||||
}
|
||||
if (ImGui::IsItemHovered()) {
|
||||
ImGui::SetTooltip("Add a code breakpoint for either PPC or x64.");
|
||||
ImGui::SetTooltip(
|
||||
"Add a code breakpoint for either PPC or " XE_HOST_ARCH_NAME ".");
|
||||
}
|
||||
// TODO(benvanik): remove this set focus workaround when imgui is fixed:
|
||||
// https://github.com/ocornut/imgui/issues/343
|
||||
|
@ -1178,15 +1226,15 @@ void DebugWindow::DrawBreakpointsPane() {
|
|||
ImGui::Dummy(ImVec2(0, 2));
|
||||
|
||||
ImGui::AlignTextToFramePadding();
|
||||
ImGui::Text("x64");
|
||||
ImGui::Text(XE_HOST_ARCH_NAME);
|
||||
ImGui::SameLine();
|
||||
ImGui::Dummy(ImVec2(2, 0));
|
||||
ImGui::SameLine();
|
||||
static char x64_buffer[64] = {0};
|
||||
static char asm_buffer[64] = {0};
|
||||
ImGui::PushItemWidth(100);
|
||||
if (ImGui::InputText("##host_address", x64_buffer, 17, input_flags)) {
|
||||
uint64_t address = string_util::from_string<uint64_t>(x64_buffer, true);
|
||||
x64_buffer[0] = 0;
|
||||
if (ImGui::InputText("##host_address", asm_buffer, 17, input_flags)) {
|
||||
uint64_t address = string_util::from_string<uint64_t>(asm_buffer, true);
|
||||
asm_buffer[0] = 0;
|
||||
CreateCodeBreakpoint(Breakpoint::AddressType::kHost, address);
|
||||
ImGui::CloseCurrentPopup();
|
||||
}
|
||||
|
|
|
@ -53,6 +53,8 @@
|
|||
|
||||
#if XE_ARCH_AMD64
|
||||
#include "xenia/cpu/backend/x64/x64_backend.h"
|
||||
#elif XE_ARCH_ARM64
|
||||
#include "xenia/cpu/backend/a64/a64_backend.h"
|
||||
#endif // XE_ARCH
|
||||
|
||||
DECLARE_int32(user_language);
|
||||
|
@ -172,11 +174,18 @@ X_STATUS Emulator::Setup(
|
|||
if (cvars::cpu == "x64") {
|
||||
backend.reset(new xe::cpu::backend::x64::X64Backend());
|
||||
}
|
||||
#elif XE_ARCH_ARM64
|
||||
if (cvars::cpu == "a64") {
|
||||
backend.reset(new xe::cpu::backend::a64::A64Backend());
|
||||
}
|
||||
#endif // XE_ARCH
|
||||
if (cvars::cpu == "any") {
|
||||
if (!backend) {
|
||||
#if XE_ARCH_AMD64
|
||||
backend.reset(new xe::cpu::backend::x64::X64Backend());
|
||||
#elif XE_ARCH_ARM64
|
||||
// TODO(wunkolo): Arm64 backend
|
||||
backend.reset(new xe::cpu::backend::a64::A64Backend());
|
||||
#endif // XE_ARCH
|
||||
}
|
||||
}
|
||||
|
|
|
@ -70,6 +70,11 @@ project("xenia-gpu-d3d12-trace-viewer")
|
|||
"xenia-cpu-backend-x64",
|
||||
})
|
||||
|
||||
filter("architecture:ARM64")
|
||||
links({
|
||||
"xenia-cpu-backend-a64",
|
||||
})
|
||||
|
||||
group("src")
|
||||
project("xenia-gpu-d3d12-trace-dump")
|
||||
uuid("686b859c-0046-44c4-a02c-41fc3fb75698")
|
||||
|
@ -120,3 +125,8 @@ project("xenia-gpu-d3d12-trace-dump")
|
|||
links({
|
||||
"xenia-cpu-backend-x64",
|
||||
})
|
||||
|
||||
filter("architecture:ARM64")
|
||||
links({
|
||||
"xenia-cpu-backend-a64",
|
||||
})
|
||||
|
|
|
@ -43,7 +43,7 @@ project("xenia-gpu-shader-compiler")
|
|||
"../base/console_app_main_"..platform_suffix..".cc",
|
||||
})
|
||||
|
||||
filter("platforms:Windows")
|
||||
filter("platforms:Windows-*")
|
||||
-- Only create the .user file if it doesn't already exist.
|
||||
local user_file = project_root.."/build/xenia-gpu-shader-compiler.vcxproj.user"
|
||||
if not os.isfile(user_file) then
|
||||
|
|
|
@ -68,6 +68,11 @@ project("xenia-gpu-vulkan-trace-viewer")
|
|||
"xenia-cpu-backend-x64",
|
||||
})
|
||||
|
||||
filter("architecture:ARM64")
|
||||
links({
|
||||
"xenia-cpu-backend-a64",
|
||||
})
|
||||
|
||||
filter("platforms:Linux")
|
||||
links({
|
||||
"X11",
|
||||
|
@ -75,7 +80,7 @@ project("xenia-gpu-vulkan-trace-viewer")
|
|||
"X11-xcb",
|
||||
})
|
||||
|
||||
filter("platforms:Windows")
|
||||
filter("platforms:Windows-*")
|
||||
-- Only create the .user file if it doesn't already exist.
|
||||
local user_file = project_root.."/build/xenia-gpu-vulkan-trace-viewer.vcxproj.user"
|
||||
if not os.isfile(user_file) then
|
||||
|
@ -131,6 +136,11 @@ project("xenia-gpu-vulkan-trace-dump")
|
|||
"xenia-cpu-backend-x64",
|
||||
})
|
||||
|
||||
filter("architecture:ARM64")
|
||||
links({
|
||||
"xenia-cpu-backend-a64",
|
||||
})
|
||||
|
||||
filter("platforms:Linux")
|
||||
links({
|
||||
"X11",
|
||||
|
@ -138,7 +148,7 @@ project("xenia-gpu-vulkan-trace-dump")
|
|||
"X11-xcb",
|
||||
})
|
||||
|
||||
filter("platforms:Windows")
|
||||
filter("platforms:Windows-*")
|
||||
-- Only create the .user file if it doesn't already exist.
|
||||
local user_file = project_root.."/build/xenia-gpu-vulkan-trace-dump.vcxproj.user"
|
||||
if not os.isfile(user_file) then
|
||||
|
|
|
@ -53,7 +53,7 @@ project("xenia-hid-demo")
|
|||
"X11-xcb",
|
||||
})
|
||||
|
||||
filter("platforms:Windows")
|
||||
filter("platforms:Windows-*")
|
||||
links({
|
||||
"xenia-hid-winkey",
|
||||
"xenia-hid-xinput",
|
||||
|
|
|
@ -19,7 +19,7 @@ project("xenia-ui")
|
|||
-- Exports JNI functions.
|
||||
wholelib("On")
|
||||
|
||||
filter("platforms:Windows")
|
||||
filter("platforms:Windows-*")
|
||||
links({
|
||||
"dwmapi",
|
||||
"dxgi",
|
||||
|
|
|
@ -26,7 +26,7 @@ end
|
|||
-- Call this function in project scope to include the SDL2 headers.
|
||||
--
|
||||
function sdl2_include()
|
||||
filter("platforms:Windows")
|
||||
filter("platforms:Windows-*")
|
||||
includedirs({
|
||||
path.getrelative(".", third_party_path) .. "/SDL2/include",
|
||||
})
|
||||
|
|
|
@ -4,13 +4,37 @@ project("capstone")
|
|||
kind("StaticLib")
|
||||
language("C")
|
||||
defines({
|
||||
"CAPSTONE_X86_ATT_DISABLE",
|
||||
"CAPSTONE_DIET_NO",
|
||||
"CAPSTONE_X86_REDUCE_NO",
|
||||
"CAPSTONE_HAS_X86",
|
||||
"CAPSTONE_USE_SYS_DYN_MEM",
|
||||
"_LIB",
|
||||
})
|
||||
filter("architecture:x86_64")
|
||||
defines({
|
||||
"CAPSTONE_HAS_X86",
|
||||
"CAPSTONE_X86_ATT_DISABLE",
|
||||
"CAPSTONE_X86_REDUCE_NO",
|
||||
})
|
||||
files({
|
||||
"capstone/arch/X86/*.c",
|
||||
"capstone/arch/X86/*.h",
|
||||
"capstone/arch/X86/*.inc",
|
||||
})
|
||||
force_compile_as_c({
|
||||
"capstone/arch/X86/**.c",
|
||||
})
|
||||
filter("architecture:ARM64")
|
||||
defines({
|
||||
"CAPSTONE_HAS_ARM64",
|
||||
})
|
||||
files({
|
||||
"capstone/arch/AArch64/*.c",
|
||||
"capstone/arch/AArch64/*.h",
|
||||
"capstone/arch/AArch64/*.inc",
|
||||
})
|
||||
force_compile_as_c({
|
||||
"capstone/arch/AArch64/**.c",
|
||||
})
|
||||
filter({})
|
||||
includedirs({
|
||||
"capstone",
|
||||
"capstone/include",
|
||||
|
@ -32,12 +56,7 @@ project("capstone")
|
|||
"capstone/SStream.h",
|
||||
"capstone/utils.c",
|
||||
"capstone/utils.h",
|
||||
|
||||
"capstone/arch/X86/*.c",
|
||||
"capstone/arch/X86/*.h",
|
||||
"capstone/arch/X86/*.inc",
|
||||
})
|
||||
force_compile_as_c({
|
||||
"capstone/**.c",
|
||||
"capstone/arch/X86/**.c",
|
||||
})
|
||||
"capstone/**.c",
|
||||
})
|
|
@ -30,7 +30,7 @@ project("discord-rpc")
|
|||
files({
|
||||
"discord-rpc/src/discord_register_osx.m"
|
||||
})
|
||||
filter("platforms:Windows")
|
||||
filter("platforms:Windows-*")
|
||||
files({
|
||||
"discord-rpc/src/connection_win.cpp",
|
||||
"discord-rpc/src/discord_register_win.cpp"
|
||||
|
|
|
@ -3252,7 +3252,7 @@ void MicroProfileDraw(uint32_t nWidth, uint32_t nHeight)
|
|||
|
||||
#if MICROPROFILE_CONTEXT_SWITCH_TRACE
|
||||
MicroProfileStringArrayAddLiteral(&Debug, "Context Switch");
|
||||
MicroProfileStringArrayFormat(&Debug, "%9d [%7d]", S.nContextSwitchUsage, MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE / S.nContextSwitchUsage );
|
||||
MicroProfileStringArrayFormat(&Debug, "%9d [%7d]", S.nContextSwitchUsage, S.nContextSwitchUsage ? MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE / S.nContextSwitchUsage : 0 );
|
||||
#endif
|
||||
|
||||
for(int i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
|
||||
|
|
|
@ -28,7 +28,7 @@ project("mspack")
|
|||
"mspack/system.h",
|
||||
})
|
||||
|
||||
filter("platforms:Windows")
|
||||
filter("platforms:Windows-*")
|
||||
defines({
|
||||
})
|
||||
filter("platforms:Linux")
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 94c726ce0338b054eb8cb5ea91de8fe6c19f4392
|
|
@ -18,5 +18,5 @@ project("snappy")
|
|||
"snappy/snappy.h",
|
||||
})
|
||||
|
||||
filter("platforms:Windows")
|
||||
filter("platforms:Windows-*")
|
||||
warnings("Off") -- Too many warnings.
|
||||
|
|
|
@ -20,7 +20,7 @@ local function match_platform_files(base_path, base_match)
|
|||
removefiles({base_path.."/".."**_android.h", base_path.."/".."**_android.cc"})
|
||||
removefiles({base_path.."/".."**_mac.h", base_path.."/".."**_mac.cc"})
|
||||
removefiles({base_path.."/".."**_win.h", base_path.."/".."**_win.cc"})
|
||||
filter("platforms:Windows")
|
||||
filter("platforms:Windows-*")
|
||||
files({
|
||||
base_path.."/"..base_match.."_win.h",
|
||||
base_path.."/"..base_match.."_win.cc",
|
||||
|
|
|
@ -781,6 +781,8 @@ class BaseBuildCommand(Command):
|
|||
self.parser.add_argument(
|
||||
'--target', action='append', default=[],
|
||||
help='Builds only the given target(s).')
|
||||
self.parser.add_argument(
|
||||
'--arch', default='x86_64', help='Builds only the given architecture')
|
||||
self.parser.add_argument(
|
||||
'--force', action='store_true',
|
||||
help='Forces a full rebuild.')
|
||||
|
@ -823,6 +825,7 @@ class BaseBuildCommand(Command):
|
|||
'/m',
|
||||
'/v:m',
|
||||
'/p:Configuration=' + args['config'],
|
||||
'/p:Platform=' + "Windows-" + args['arch'],
|
||||
] + ([targets] if targets is not None else []) + pass_args,
|
||||
shell=False)
|
||||
elif sys.platform == 'darwin':
|
||||
|
|
Loading…
Reference in New Issue