Merge remote-tracking branch 'bwr/linux_cpu' into canary

This commit is contained in:
Prism Tutaj 2019-09-07 20:55:27 -05:00
commit 28762f40fb
21 changed files with 357 additions and 70 deletions

View File

@ -50,6 +50,7 @@ before_script:
- if [[ $BUILD == true ]]; then sudo dpkg -i libvulkan1_$LIBVULKAN_VERSION+dfsg1-1_amd64.deb libvulkan-dev_$LIBVULKAN_VERSION+dfsg1-1_amd64.deb; fi
# Prepare environment (pull dependencies, build tools).
- travis_retry ./xenia-build setup
- if [[ $BUILD == true ]]; then ./third_party/binutils/build.sh; fi
script:
# Run linter.
@ -58,9 +59,13 @@ script:
# Build and run base tests.
- if [[ $BUILD == true ]]; then ./xenia-build build --config=$CONFIG --target=xenia-base-tests; fi
- if [[ $BUILD == true ]]; then ./build/bin/Linux/$CONFIG/xenia-base-tests; fi
# Build and run cpu tests.
- if [[ $BUILD == true ]]; then ./xenia-build build --config=$CONFIG --target=xenia-cpu-tests; fi
- if [[ $BUILD == true ]]; then ./build/bin/Linux/$CONFIG/xenia-cpu-tests; fi
# Build and run ppc tests.
- if [[ $BUILD == true ]]; then ./xenia-build gentests; fi
- if [[ $BUILD == true ]]; then ./xenia-build build --config=$CONFIG --target=xenia-cpu-ppc-tests; fi
# - if [[ $BUILD == true ]]; then ./build/bin/Linux/$CONFIG/xenia-cpu-ppc-tests --log_file=stdout; fi
- if [[ $BUILD == true ]]; then ./build/bin/Linux/$CONFIG/xenia-cpu-ppc-tests --log_file=stdout; fi
# Build all of xenia.
- if [[ $BUILD == true ]]; then ./xenia-build build --config=$CONFIG; fi

View File

@ -9,10 +9,135 @@
#include "xenia/base/exception_handler.h"
#include <signal.h>
#include <ucontext.h>
namespace xe {
// TODO(DrChat): Exception handling on linux.
void ExceptionHandler::Install(Handler fn, void* data) {}
void ExceptionHandler::Uninstall(Handler fn, void* data) {}
bool signal_handlers_installed_ = false;
struct sigaction original_sigill_handler_;
struct sigaction original_sigsegv_handler_;
} // namespace xe
// This can be as large as needed, but isn't often needed.
// As we will be sometimes firing many exceptions we want to avoid having to
// scan the table too much or invoke many custom handlers.
constexpr size_t kMaxHandlerCount = 8;
// All custom handlers, left-aligned and null terminated.
// Executed in order.
std::pair<ExceptionHandler::Handler, void*> handlers_[kMaxHandlerCount];
void ExceptionHandlerCallback(int signum, siginfo_t* siginfo, void* sigctx) {
ucontext_t* ctx = static_cast<ucontext_t*>(sigctx);
X64Context thread_context;
thread_context.rip = ctx->uc_mcontext.gregs[REG_RIP];
thread_context.eflags = ctx->uc_mcontext.gregs[REG_EFL];
thread_context.rax = ctx->uc_mcontext.gregs[REG_RAX];
thread_context.rcx = ctx->uc_mcontext.gregs[REG_RCX];
thread_context.rdx = ctx->uc_mcontext.gregs[REG_RDX];
thread_context.rbx = ctx->uc_mcontext.gregs[REG_RBX];
thread_context.rsp = ctx->uc_mcontext.gregs[REG_RSP];
thread_context.rbp = ctx->uc_mcontext.gregs[REG_RBP];
thread_context.rsi = ctx->uc_mcontext.gregs[REG_RSI];
thread_context.rdi = ctx->uc_mcontext.gregs[REG_RDI];
thread_context.r8 = ctx->uc_mcontext.gregs[REG_R8];
thread_context.r9 = ctx->uc_mcontext.gregs[REG_R9];
thread_context.r10 = ctx->uc_mcontext.gregs[REG_R10];
thread_context.r11 = ctx->uc_mcontext.gregs[REG_R11];
thread_context.r12 = ctx->uc_mcontext.gregs[REG_R12];
thread_context.r13 = ctx->uc_mcontext.gregs[REG_R13];
thread_context.r14 = ctx->uc_mcontext.gregs[REG_R14];
thread_context.r15 = ctx->uc_mcontext.gregs[REG_R15];
std::memcpy(thread_context.xmm_registers, ctx->uc_mcontext.fpregs->_xmm,
sizeof(thread_context.xmm_registers));
Exception ex;
switch (signum) {
case SIGILL:
ex.InitializeIllegalInstruction(&thread_context);
break;
case SIGSEGV: {
Exception::AccessViolationOperation access_violation_operation =
((ucontext_t*)sigctx)->uc_mcontext.gregs[REG_ERR] & 0x2
? Exception::AccessViolationOperation::kRead
: Exception::AccessViolationOperation::kWrite;
ex.InitializeAccessViolation(&thread_context,
reinterpret_cast<uint64_t>(siginfo->si_addr),
access_violation_operation);
} break;
default:
assert_always("Unhandled signum");
}
for (size_t i = 0; i < xe::countof(handlers_) && handlers_[i].first; ++i) {
if (handlers_[i].first(&ex, handlers_[i].second)) {
// Exception handled.
return;
}
}
assert_always("Unhandled exception");
}
void ExceptionHandler::Install(Handler fn, void* data) {
if (!signal_handlers_installed_) {
struct sigaction signal_handler;
std::memset(&signal_handler, 0, sizeof(signal_handler));
signal_handler.sa_sigaction = ExceptionHandlerCallback;
signal_handler.sa_flags = SA_SIGINFO;
if (sigaction(SIGILL, &signal_handler, &original_sigill_handler_) != 0) {
assert_always("Failed to install new SIGILL handler");
}
if (sigaction(SIGSEGV, &signal_handler, &original_sigsegv_handler_) != 0) {
assert_always("Failed to install new SIGSEGV handler");
}
signal_handlers_installed_ = true;
}
for (size_t i = 0; i < xe::countof(handlers_); ++i) {
if (!handlers_[i].first) {
handlers_[i].first = fn;
handlers_[i].second = data;
return;
}
}
assert_always("Too many exception handlers installed");
}
void ExceptionHandler::Uninstall(Handler fn, void* data) {
for (size_t i = 0; i < xe::countof(handlers_); ++i) {
if (handlers_[i].first == fn && handlers_[i].second == data) {
for (; i < xe::countof(handlers_) - 1; ++i) {
handlers_[i] = handlers_[i + 1];
}
handlers_[i].first = nullptr;
handlers_[i].second = nullptr;
break;
}
}
bool has_any = false;
for (size_t i = 0; i < xe::countof(handlers_); ++i) {
if (handlers_[i].first) {
has_any = true;
break;
}
}
if (!has_any) {
if (signal_handlers_installed_) {
if (sigaction(SIGILL, &original_sigill_handler_, NULL) != 0) {
assert_always("Failed to restore original SIGILL handler");
}
if (sigaction(SIGSEGV, &original_sigsegv_handler_, NULL) != 0) {
assert_always("Failed to restore original SIGSEGV handler");
}
signal_handlers_installed_ = false;
}
}
}
} // namespace xe

View File

@ -204,13 +204,21 @@ inline bool bit_scan_forward(uint64_t v, uint32_t* out_first_set_index) {
#else
inline bool bit_scan_forward(uint32_t v, uint32_t* out_first_set_index) {
int i = ffs(v);
*out_first_set_index = i;
return i != 0;
if (i == 0) {
return false;
} else {
*out_first_set_index = i - 1;
return true;
}
}
inline bool bit_scan_forward(uint64_t v, uint32_t* out_first_set_index) {
int i = ffsll(v);
*out_first_set_index = i;
return i != 0;
if (i == 0) {
return false;
} else {
*out_first_set_index = i - 1;
return true;
}
}
#endif // XE_PLATFORM_WIN32
inline bool bit_scan_forward(int32_t v, uint32_t* out_first_set_index) {

View File

@ -40,7 +40,13 @@ void* AllocFixed(void* base_address, size_t length,
AllocationType allocation_type, PageAccess access) {
// mmap does not support reserve / commit, so ignore allocation_type.
uint32_t prot = ToPosixProtectFlags(access);
return mmap(base_address, length, prot, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
void* result = mmap(base_address, length, prot,
MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
if (result == MAP_FAILED) {
return nullptr;
} else {
return result;
}
}
bool DeallocFixed(void* base_address, size_t length,
@ -90,7 +96,7 @@ FileMappingHandle CreateFileMappingHandle(std::wstring path, size_t length,
}
void CloseFileMappingHandle(FileMappingHandle handle) {
close((intptr_t)handle);
close(static_cast<int>(reinterpret_cast<int64_t>(handle)));
}
void* MapFileView(FileMappingHandle handle, void* base_address, size_t length,

View File

@ -64,7 +64,10 @@ void StringBuffer::AppendFormat(const char* format, ...) {
}
void StringBuffer::AppendVarargs(const char* format, va_list args) {
int length = vsnprintf(nullptr, 0, format, args);
va_list size_args;
va_copy(size_args, args); // arg is indeterminate after the return so copy it
int length = vsnprintf(nullptr, 0, format, size_args);
va_end(size_args);
Grow(length + 1);
vsnprintf(buffer_ + buffer_offset_, buffer_capacity_, format, args);
buffer_offset_ += length;

View File

@ -0,0 +1,34 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2019 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/base/string.h"
#include "xenia/base/string_buffer.h"
#include "third_party/catch/include/catch.hpp"
namespace xe {
namespace base {
namespace test {
TEST_CASE("StringBuffer") {
StringBuffer sb;
uint32_t module_flags = 0x1000000;
std::string path_(R"(\Device\Cdrom0\default.xex)");
sb.AppendFormat("Module %s:\n", path_.c_str());
REQUIRE(sb.to_string() == "Module \\Device\\Cdrom0\\default.xex:\n");
sb.AppendFormat(" Module Flags: %.8X\n", module_flags);
REQUIRE(
sb.to_string() ==
"Module \\Device\\Cdrom0\\default.xex:\n Module Flags: 01000000\n");
}
} // namespace test
} // namespace base
} // namespace xe

View File

@ -403,9 +403,9 @@ X64ThunkEmitter::X64ThunkEmitter(X64Backend* backend, XbyakAllocator* allocator)
X64ThunkEmitter::~X64ThunkEmitter() {}
HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
// rcx = target
// rdx = arg0 (context)
// r8 = arg1 (guest return address)
// rcx (win), rdi (linux) = target
// rdx (win), rsi (linux) = arg0 (context)
// r8 (win), rdx (linux) = arg1 (guest return address)
struct _code_offsets {
size_t prolog;
@ -420,9 +420,15 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
code_offsets.prolog = getSize();
// rsp + 0 = return address
#if XE_PLATFORM_LINUX
mov(qword[rsp + 8 * 3], rdx);
mov(qword[rsp + 8 * 2], rsi);
mov(qword[rsp + 8 * 1], rdi);
#else
mov(qword[rsp + 8 * 3], r8);
mov(qword[rsp + 8 * 2], rdx);
mov(qword[rsp + 8 * 1], rcx);
#endif
sub(rsp, stack_size);
code_offsets.prolog_stack_alloc = getSize();
@ -431,9 +437,15 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
// Save nonvolatile registers.
EmitSaveNonvolatileRegs();
#ifdef XE_PLATFORM_LINUX
mov(rax, rdi);
// context already in rsi
mov(rcx, rdx); // return address
#else
mov(rax, rcx);
mov(rsi, rdx); // context
mov(rcx, r8); // return address
#endif
call(rax);
EmitLoadNonvolatileRegs();
@ -441,9 +453,15 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
code_offsets.epilog = getSize();
add(rsp, stack_size);
#if XE_PLATFORM_LINUX
mov(rdi, qword[rsp + 8 * 1]);
mov(rsi, qword[rsp + 8 * 2]);
mov(rdx, qword[rsp + 8 * 3]);
#else
mov(rcx, qword[rsp + 8 * 1]);
mov(rdx, qword[rsp + 8 * 2]);
mov(r8, qword[rsp + 8 * 3]);
#endif
ret();
code_offsets.tail = getSize();
@ -464,10 +482,12 @@ HostToGuestThunk X64ThunkEmitter::EmitHostToGuestThunk() {
}
GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
// rcx = target function
// rdx = arg0
// r8 = arg1
// r9 = arg2
// rcx (windows), rdi (linux) = target function
// rdx (windows), rsi (linux) = arg0
// r8 (windows), rdx (linux) = arg1
// r9 (windows), rcx (linux) = arg2
// --- (windows), r8 (linux) = arg3
// --- (windows), r9 (linux) = arg4
struct _code_offsets {
size_t prolog;
@ -490,8 +510,13 @@ GuestToHostThunk X64ThunkEmitter::EmitGuestToHostThunk() {
// Save off volatile registers.
EmitSaveVolatileRegs();
mov(rax, rcx); // function
mov(rax, rcx); // function
#if XE_PLATFORM_LINUX
mov(rdi, GetContextReg()); // context
mov(rsi, rbx);
#else
mov(rcx, GetContextReg()); // context
#endif
call(rax);
EmitLoadVolatileRegs();
@ -546,8 +571,13 @@ ResolveFunctionThunk X64ThunkEmitter::EmitResolveFunctionThunk() {
// Save volatile registers
EmitSaveVolatileRegs();
mov(rcx, rsi); // context
#if XE_PLATFORM_LINUX
mov(rdi, rsi); // context
mov(rsi, rbx);
#else
mov(rcx, rsi); // context
mov(rdx, rbx);
#endif
mov(rax, uint64_t(&ResolveFunction));
call(rax);
@ -578,6 +608,12 @@ ResolveFunctionThunk X64ThunkEmitter::EmitResolveFunctionThunk() {
void X64ThunkEmitter::EmitSaveVolatileRegs() {
// Save off volatile registers.
// mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rax);
#if XE_PLATFORM_LINUX
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rdi);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rsi);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rcx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdx);
#else
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rdx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], r8);
@ -591,10 +627,17 @@ void X64ThunkEmitter::EmitSaveVolatileRegs() {
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[3])], xmm3);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[4])], xmm4);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[5])], xmm5);
#endif
}
void X64ThunkEmitter::EmitLoadVolatileRegs() {
// Load volatile registers from our stack frame.
#if XE_PLATFORM_LINUX
// mov(rax, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
#else
// vmovaps(xmm0, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]);
vmovaps(xmm1, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
vmovaps(xmm2, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
@ -609,10 +652,19 @@ void X64ThunkEmitter::EmitLoadVolatileRegs() {
mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
#endif
}
void X64ThunkEmitter::EmitSaveNonvolatileRegs() {
// Preserve nonvolatile registers.
#if XE_PLATFORM_LINUX
// SysV does not have nonvolatile XMM registers.
mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rbx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rbp);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], r12);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], r13);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], r14);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r15);
#else
mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rbx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rbp);
@ -633,9 +685,18 @@ void X64ThunkEmitter::EmitSaveNonvolatileRegs() {
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[7])], xmm13);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[8])], xmm14);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[9])], xmm15);
#endif
}
void X64ThunkEmitter::EmitLoadNonvolatileRegs() {
#ifdef XE_PLATFORM_LINUX
mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
#else
vmovaps(xmm6, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]);
vmovaps(xmm7, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
vmovaps(xmm8, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
@ -656,6 +717,7 @@ void X64ThunkEmitter::EmitLoadNonvolatileRegs() {
mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
#endif
}
} // namespace x64

View File

@ -491,14 +491,19 @@ void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) {
if (builtin_function->handler()) {
undefined = false;
// rcx = target function
// rdx = arg0
// r8 = arg1
// r9 = arg2
// rdx (windows), r8 (linux) = arg0
// r8 (windows), rdx (linux) = arg1
// r9 (windows), rcx (linux) = arg2
auto thunk = backend()->guest_to_host_thunk();
mov(rax, reinterpret_cast<uint64_t>(thunk));
mov(rcx, reinterpret_cast<uint64_t>(builtin_function->handler()));
#if XE_PLATFORM_LINUX
mov(rbx, reinterpret_cast<uint64_t>(builtin_function->arg0()));
mov(rdx, reinterpret_cast<uint64_t>(builtin_function->arg1()));
#else
mov(rdx, reinterpret_cast<uint64_t>(builtin_function->arg0()));
mov(r8, reinterpret_cast<uint64_t>(builtin_function->arg1()));
#endif
call(rax);
// rax = host return
}
@ -507,9 +512,9 @@ void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) {
if (extern_function->extern_handler()) {
undefined = false;
// rcx = target function
// rdx = arg0
// r8 = arg1
// r9 = arg2
// rdx (windows), r8 (linux) = arg0
// r8 (windows), rdx (linux) = arg1
// r9 (windows), rcx (linux) = arg2
auto thunk = backend()->guest_to_host_thunk();
mov(rax, reinterpret_cast<uint64_t>(thunk));
mov(rcx, reinterpret_cast<uint64_t>(extern_function->extern_handler()));
@ -542,9 +547,9 @@ void X64Emitter::CallNative(uint64_t (*fn)(void* raw_context, uint64_t arg0),
void X64Emitter::CallNativeSafe(void* fn) {
// rcx = target function
// rdx = arg0
// r8 = arg1
// r9 = arg2
// rdx (windows), r8 (linux) = arg0
// r8 (windows), rdx (linux) = arg1
// r9 (windows), rcx (linux) = arg2
auto thunk = backend()->guest_to_host_thunk();
mov(rax, reinterpret_cast<uint64_t>(thunk));
mov(rcx, reinterpret_cast<uint64_t>(fn));
@ -558,6 +563,17 @@ void X64Emitter::SetReturnAddress(uint64_t value) {
}
Xbyak::Reg64 X64Emitter::GetNativeParam(uint32_t param) {
#if XE_PLATFORM_LINUX
if (param == 0)
return rbx;
else if (param == 1)
return rdx;
else if (param == 2)
return rcx;
assert_always();
return rcx;
#else
if (param == 0)
return rdx;
else if (param == 1)
@ -567,6 +583,7 @@ Xbyak::Reg64 X64Emitter::GetNativeParam(uint32_t param) {
assert_always();
return r9;
#endif
}
// Important: If you change these, you must update the thunks in x64_backend.cc!

View File

@ -671,7 +671,7 @@ EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SUB, VECTOR_SUB);
// OPCODE_VECTOR_SHL
// ============================================================================
template <typename T, std::enable_if_t<std::is_integral<T>::value, int> = 0>
static __m128i EmulateVectorShl(void*, __m128i src1, __m128i src2) {
static __m128i EmulateVectorShl(void*, __m128i& src1, __m128i& src2) {
alignas(16) T value[16 / sizeof(T)];
alignas(16) T shamt[16 / sizeof(T)];
@ -863,7 +863,7 @@ EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHL, VECTOR_SHL_V128);
// OPCODE_VECTOR_SHR
// ============================================================================
template <typename T, std::enable_if_t<std::is_integral<T>::value, int> = 0>
static __m128i EmulateVectorShr(void*, __m128i src1, __m128i src2) {
static __m128i EmulateVectorShr(void*, __m128i& src1, __m128i& src2) {
alignas(16) T value[16 / sizeof(T)];
alignas(16) T shamt[16 / sizeof(T)];
@ -1199,7 +1199,7 @@ EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHA, VECTOR_SHA_V128);
// OPCODE_VECTOR_ROTATE_LEFT
// ============================================================================
template <typename T, std::enable_if_t<std::is_integral<T>::value, int> = 0>
static __m128i EmulateVectorRotateLeft(void*, __m128i src1, __m128i src2) {
static __m128i EmulateVectorRotateLeft(void*, __m128i& src1, __m128i& src2) {
alignas(16) T value[16 / sizeof(T)];
alignas(16) T shamt[16 / sizeof(T)];
@ -1289,7 +1289,7 @@ EMITTER_OPCODE_TABLE(OPCODE_VECTOR_ROTATE_LEFT, VECTOR_ROTATE_LEFT_V128);
// OPCODE_VECTOR_AVERAGE
// ============================================================================
template <typename T, std::enable_if_t<std::is_integral<T>::value, int> = 0>
static __m128i EmulateVectorAverage(void*, __m128i src1, __m128i src2) {
static __m128i EmulateVectorAverage(void*, __m128i& src1, __m128i& src2) {
alignas(16) T src1v[16 / sizeof(T)];
alignas(16) T src2v[16 / sizeof(T)];
alignas(16) T value[16 / sizeof(T)];
@ -1857,7 +1857,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
// ((src1.uy & 0xFF) << 8) | (src1.uz & 0xFF)
e.vpshufb(i.dest, i.dest, e.GetXmmConstPtr(XMMPackD3DCOLOR));
}
static __m128i EmulateFLOAT16_2(void*, __m128 src1) {
static __m128i EmulateFLOAT16_2(void*, __m128& src1) {
alignas(16) float a[4];
alignas(16) uint16_t b[8];
_mm_store_ps(a, src1);
@ -1898,7 +1898,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
e.vmovaps(i.dest, e.xmm0);
}
}
static __m128i EmulateFLOAT16_4(void*, __m128 src1) {
static __m128i EmulateFLOAT16_4(void*, __m128& src1) {
alignas(16) float a[4];
alignas(16) uint16_t b[8];
_mm_store_ps(a, src1);
@ -2031,8 +2031,8 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
// Merge XZ and YW.
e.vorps(i.dest, e.xmm0);
}
static __m128i EmulatePack8_IN_16_UN_UN_SAT(void*, __m128i src1,
__m128i src2) {
static __m128i EmulatePack8_IN_16_UN_UN_SAT(void*, __m128i& src1,
__m128i& src2) {
alignas(16) uint16_t a[8];
alignas(16) uint16_t b[8];
alignas(16) uint8_t c[16];
@ -2044,7 +2044,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
}
return _mm_load_si128(reinterpret_cast<__m128i*>(c));
}
static __m128i EmulatePack8_IN_16_UN_UN(void*, __m128i src1, __m128i src2) {
static __m128i EmulatePack8_IN_16_UN_UN(void*, __m128i& src1, __m128i& src2) {
alignas(16) uint8_t a[16];
alignas(16) uint8_t b[16];
alignas(16) uint8_t c[16];
@ -2277,7 +2277,7 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
e.vpor(i.dest, e.GetXmmConstPtr(XMMOne));
// To convert to 0 to 1, games multiply by 0x47008081 and add 0xC7008081.
}
static __m128 EmulateFLOAT16_2(void*, __m128i src1) {
static __m128 EmulateFLOAT16_2(void*, __m128i& src1) {
alignas(16) uint16_t a[8];
alignas(16) float b[4];
_mm_store_si128(reinterpret_cast<__m128i*>(a), src1);
@ -2336,7 +2336,7 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
e.vmovaps(i.dest, e.xmm0);
}
}
static __m128 EmulateFLOAT16_4(void*, __m128i src1) {
static __m128 EmulateFLOAT16_4(void*, __m128i& src1) {
alignas(16) uint16_t a[8];
alignas(16) float b[4];
_mm_store_si128(reinterpret_cast<__m128i*>(a), src1);
@ -2616,4 +2616,4 @@ EMITTER_OPCODE_TABLE(OPCODE_UNPACK, UNPACK);
} // namespace x64
} // namespace backend
} // namespace cpu
} // namespace xe
} // namespace xe

View File

@ -2352,7 +2352,7 @@ EMITTER_OPCODE_TABLE(OPCODE_RECIP, RECIP_F32, RECIP_F64, RECIP_V128);
// TODO(benvanik): use approx here:
// https://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html
struct POW2_F32 : Sequence<POW2_F32, I<OPCODE_POW2, F32Op, F32Op>> {
static __m128 EmulatePow2(void*, __m128 src) {
static __m128 EmulatePow2(void*, __m128& src) {
float src_value;
_mm_store_ss(&src_value, src);
float result = std::exp2(src_value);
@ -2366,7 +2366,7 @@ struct POW2_F32 : Sequence<POW2_F32, I<OPCODE_POW2, F32Op, F32Op>> {
}
};
struct POW2_F64 : Sequence<POW2_F64, I<OPCODE_POW2, F64Op, F64Op>> {
static __m128d EmulatePow2(void*, __m128d src) {
static __m128d EmulatePow2(void*, __m128d& src) {
double src_value;
_mm_store_sd(&src_value, src);
double result = std::exp2(src_value);
@ -2380,7 +2380,7 @@ struct POW2_F64 : Sequence<POW2_F64, I<OPCODE_POW2, F64Op, F64Op>> {
}
};
struct POW2_V128 : Sequence<POW2_V128, I<OPCODE_POW2, V128Op, V128Op>> {
static __m128 EmulatePow2(void*, __m128 src) {
static __m128 EmulatePow2(void*, __m128& src) {
alignas(16) float values[4];
_mm_store_ps(values, src);
for (size_t i = 0; i < 4; ++i) {
@ -2403,7 +2403,7 @@ EMITTER_OPCODE_TABLE(OPCODE_POW2, POW2_F32, POW2_F64, POW2_V128);
// https://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html
// TODO(benvanik): this emulated fn destroys all xmm registers! don't do it!
struct LOG2_F32 : Sequence<LOG2_F32, I<OPCODE_LOG2, F32Op, F32Op>> {
static __m128 EmulateLog2(void*, __m128 src) {
static __m128 EmulateLog2(void*, __m128& src) {
float src_value;
_mm_store_ss(&src_value, src);
float result = std::log2(src_value);
@ -2417,7 +2417,7 @@ struct LOG2_F32 : Sequence<LOG2_F32, I<OPCODE_LOG2, F32Op, F32Op>> {
}
};
struct LOG2_F64 : Sequence<LOG2_F64, I<OPCODE_LOG2, F64Op, F64Op>> {
static __m128d EmulateLog2(void*, __m128d src) {
static __m128d EmulateLog2(void*, __m128d& src) {
double src_value;
_mm_store_sd(&src_value, src);
double result = std::log2(src_value);
@ -2431,7 +2431,7 @@ struct LOG2_F64 : Sequence<LOG2_F64, I<OPCODE_LOG2, F64Op, F64Op>> {
}
};
struct LOG2_V128 : Sequence<LOG2_V128, I<OPCODE_LOG2, V128Op, V128Op>> {
static __m128 EmulateLog2(void*, __m128 src) {
static __m128 EmulateLog2(void*, __m128& src) {
alignas(16) float values[4];
_mm_store_ps(values, src);
for (size_t i = 0; i < 4; ++i) {
@ -2713,7 +2713,7 @@ struct SHL_V128 : Sequence<SHL_V128, I<OPCODE_SHL, V128Op, V128Op, I8Op>> {
e.CallNativeSafe(reinterpret_cast<void*>(EmulateShlV128));
e.vmovaps(i.dest, e.xmm0);
}
static __m128i EmulateShlV128(void*, __m128i src1, uint8_t src2) {
static __m128i EmulateShlV128(void*, __m128i& src1, uint8_t src2) {
// Almost all instances are shamt = 1, but non-constant.
// shamt is [0,7]
uint8_t shamt = src2 & 0x7;
@ -2790,7 +2790,7 @@ struct SHR_V128 : Sequence<SHR_V128, I<OPCODE_SHR, V128Op, V128Op, I8Op>> {
e.CallNativeSafe(reinterpret_cast<void*>(EmulateShrV128));
e.vmovaps(i.dest, e.xmm0);
}
static __m128i EmulateShrV128(void*, __m128i src1, uint8_t src2) {
static __m128i EmulateShrV128(void*, __m128i& src1, uint8_t src2) {
// Almost all instances are shamt = 1, but non-constant.
// shamt is [0,7]
uint8_t shamt = src2 & 0x7;

View File

@ -16,6 +16,12 @@
#include "xenia/base/byte_order.h"
#include "xenia/base/math.h"
#if XE_PLATFORM_LINUX
#define OPTNONE __attribute__((optnone))
#else
#define OPTNONE
#endif // XE_PLATFORM_LINUX
namespace xe {
namespace cpu {
namespace hir {
@ -440,12 +446,14 @@ void Value::MulHi(Value* other, bool is_unsigned) {
#else
if (is_unsigned) {
constant.i64 = static_cast<uint64_t>(
static_cast<unsigned __int128>(constant.i64) *
static_cast<unsigned __int128>(other->constant.i64));
(static_cast<unsigned __int128>(constant.u64) *
static_cast<unsigned __int128>(other->constant.u64)) >>
64);
} else {
constant.i64 =
static_cast<uint64_t>(static_cast<__int128>(constant.i64) *
static_cast<__int128>(other->constant.i64));
constant.i64 = static_cast<uint64_t>(
(static_cast<__int128>(constant.i64) *
static_cast<__int128>(other->constant.i64)) >>
64);
}
#endif // XE_COMPILER_MSVC
break;
@ -755,8 +763,8 @@ void Value::Xor(Value* other) {
break;
}
}
void Value::Not() {
// Set optnone to prevent clang 6 from optimizing and causing issues
void Value::Not() OPTNONE {
switch (type) {
case INT8_TYPE:
constant.i8 = ~constant.i8;

View File

@ -16,6 +16,12 @@
#include "xenia/cpu/backend/machine_info.h"
#include "xenia/cpu/hir/opcodes.h"
#if XE_PLATFORM_LINUX
#define OPTNONE __attribute__((optnone))
#else
#define OPTNONE
#endif // XE_PLATFORM_LINUX
namespace xe {
namespace cpu {
namespace hir {
@ -109,7 +115,9 @@ class Value {
Use* AddUse(Arena* arena, Instr* instr);
void RemoveUse(Use* use);
void set_zero(TypeName new_type) {
// Set optnone to prevent clang from vectorizing the assignment to 0 which
// would happen on different registers.
void set_zero(TypeName new_type) OPTNONE {
type = new_type;
flags |= VALUE_IS_CONSTANT;
constant.v128.low = constant.v128.high = 0;

View File

@ -106,7 +106,7 @@ Symbol::Status Module::DeclareFunction(uint32_t address,
Symbol* symbol;
Symbol::Status status =
DeclareSymbol(Symbol::Type::kFunction, address, &symbol);
*out_function = static_cast<Function*>(symbol);
*out_function = dynamic_cast<Function*>(symbol);
return status;
}

View File

@ -197,7 +197,7 @@ Function* Processor::DefineBuiltin(const std::string& name,
function->set_end_address(address + 4);
function->set_name(name);
auto builtin_function = static_cast<BuiltinFunction*>(function);
auto builtin_function = dynamic_cast<BuiltinFunction*>(function);
builtin_function->SetupBuiltin(handler, arg0, arg1);
function->set_status(Symbol::Status::kDeclared);

View File

@ -29,6 +29,12 @@
#include "xenia/cpu/thread_state.h"
#include "xenia/memory.h"
#if XE_PLATFORM_LINUX
#define NOINLINE __attribute__((noinline))
#else
#define NOINLINE
#endif // XE_PLATFORM_LINUX
DECLARE_bool(debug);
namespace xe {
@ -67,7 +73,9 @@ class Processor {
Memory* memory() const { return memory_; }
StackWalker* stack_walker() const { return stack_walker_.get(); }
ppc::PPCFrontend* frontend() const { return frontend_.get(); }
backend::Backend* backend() const { return backend_.get(); }
// Clang 6 on release has issues with ppc instructions when this function is
// inlined
backend::Backend* backend() const NOINLINE { return backend_.get(); }
ExportResolver* export_resolver() const { return export_resolver_; }
bool Setup(std::unique_ptr<backend::Backend> backend);

View File

@ -265,7 +265,7 @@ TEST_CASE("ADD_I64", "[instr]") {
},
[](PPCContext* ctx) {
auto result = ctx->r[3];
REQUIRE(result == -15);
REQUIRE(result == static_cast<uint64_t>(-15));
});
test.Run(
[](PPCContext* ctx) {

View File

@ -51,7 +51,7 @@ TEST_CASE("PACK_FLOAT16_2", "[instr]") {
},
[](PPCContext* ctx) {
auto result = ctx->v[3];
REQUIRE(result == vec128i(0, 0, 0, 0x7FFFFFFF));
REQUIRE(result == vec128i(0, 0, 0, 0x7BFFFBFF));
});
test.Run(
[](PPCContext* ctx) {
@ -80,7 +80,7 @@ TEST_CASE("PACK_FLOAT16_4", "[instr]") {
[](PPCContext* ctx) {
auto result = ctx->v[3];
REQUIRE(result ==
vec128i(0x00000000, 0x00000000, 0x64D26D8C, 0x48824491));
vec128i(0x00000000, 0x00000000, 0x64D26D8B, 0x48814491));
});
}
@ -92,7 +92,8 @@ TEST_CASE("PACK_SHORT_2", "[instr]") {
test.Run([](PPCContext* ctx) { ctx->v[4] = vec128i(0); },
[](PPCContext* ctx) {
auto result = ctx->v[3];
REQUIRE(result == vec128i(0));
REQUIRE(result ==
vec128i(0x00000000, 0x00000000, 0x00000000, 0x80018001));
});
test.Run(
[](PPCContext* ctx) {

View File

@ -55,7 +55,7 @@ TEST_CASE("UNPACK_FLOAT16_2", "[instr]") {
[](PPCContext* ctx) {
auto result = ctx->v[3];
REQUIRE(result ==
vec128i(0x47FFE000, 0xC7FFE000, 0x00000000, 0x3F800000));
vec128i(0x7FFFE000, 0xFFFFE000, 0x00000000, 0x3F800000));
});
test.Run([](PPCContext* ctx) { ctx->v[4] = vec128i(0, 0, 0, 0x55556666); },
[](PPCContext* ctx) {

View File

@ -73,7 +73,6 @@ class TestFunction {
uint32_t stack_address = memory_size - stack_size;
uint32_t thread_state_address = stack_address - 0x1000;
auto thread_state = std::make_unique<ThreadState>(processor.get(), 0x100);
assert_always(); // TODO: Allocate a thread stack!!!
auto ctx = thread_state->context();
ctx->lr = 0xBCBCBCBC;

0
third_party/binutils/build.sh vendored Normal file → Executable file
View File

View File

@ -938,7 +938,10 @@ class GenTestsCommand(Command):
print('Generating test binaries...')
print('')
binutils_path = os.path.join('third_party', 'binutils-ppc-cygwin')
if sys.platform == 'win32':
binutils_path = os.path.join('third_party', 'binutils-ppc-cygwin')
else:
binutils_path = os.path.join('third_party', 'binutils', 'bin')
ppc_as = os.path.join(binutils_path, 'powerpc-none-elf-as')
ppc_ld = os.path.join(binutils_path, 'powerpc-none-elf-ld')
ppc_objdump = os.path.join(binutils_path, 'powerpc-none-elf-objdump')