[CPU] MMIO: Arm64, load register writes + exception cleanup

This commit is contained in:
Triang3l 2022-07-06 21:02:59 +03:00
parent fd03d886e9
commit 326e718035
9 changed files with 496 additions and 105 deletions

View File

@ -40,11 +40,14 @@ namespace xe {
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
bool IsArm64LoadPrefetchStore(uint32_t instruction, bool& is_store_out) { bool IsArm64LoadPrefetchStore(uint32_t instruction, bool& is_store_out) {
if ((instruction & kArm64LoadStoreAnyMask) != kArm64LoadStoreAnyValue) { if ((instruction & kArm64LoadLiteralFMask) == kArm64LoadLiteralFixed) {
return true;
}
if ((instruction & kArm64LoadStoreAnyFMask) != kArm64LoadStoreAnyFixed) {
return false; return false;
} }
if ((instruction & kArm64LoadStorePairAnyMask) == if ((instruction & kArm64LoadStorePairAnyFMask) ==
kArm64LoadStorePairAnyValue) { kArm64LoadStorePairAnyFixed) {
is_store_out = !(instruction & kArm64LoadStorePairLoadBit); is_store_out = !(instruction & kArm64LoadStorePairLoadBit);
return true; return true;
} }

View File

@ -48,13 +48,19 @@ namespace xe {
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
constexpr uint32_t kArm64LoadStoreAnyMask = UINT32_C(0x0A000000); // `Instruction address + literal offset` loads.
constexpr uint32_t kArm64LoadStoreAnyValue = UINT32_C(0x08000000); // This includes PRFM_lit.
constexpr uint32_t kArm64LoadStorePairAnyMask = UINT32_C(0x3A000000); constexpr uint32_t kArm64LoadLiteralFMask = UINT32_C(0x3B000000);
constexpr uint32_t kArm64LoadStorePairAnyValue = UINT32_C(0x28000000); constexpr uint32_t kArm64LoadLiteralFixed = UINT32_C(0x18000000);
constexpr uint32_t kArm64LoadStorePairLoadBit = UINT32_C(1) << 22;
constexpr uint32_t kArm64LoadStoreMask = UINT32_C(0xC4C00000);
constexpr uint32_t kArm64LoadStoreAnyFMask = UINT32_C(0x0A000000);
constexpr uint32_t kArm64LoadStoreAnyFixed = UINT32_C(0x08000000);
constexpr uint32_t kArm64LoadStorePairAnyFMask = UINT32_C(0x3A000000);
constexpr uint32_t kArm64LoadStorePairAnyFixed = UINT32_C(0x28000000);
constexpr uint32_t kArm64LoadStorePairLoadBit = UINT32_C(1) << 22;
constexpr uint32_t kArm64LoadStoreMask = UINT32_C(0xC4C00000);
enum class Arm64LoadStoreOp : uint32_t { enum class Arm64LoadStoreOp : uint32_t {
kSTRB_w = UINT32_C(0x00000000), kSTRB_w = UINT32_C(0x00000000),
kSTRH_w = UINT32_C(0x40000000), kSTRH_w = UINT32_C(0x40000000),
@ -82,6 +88,17 @@ enum class Arm64LoadStoreOp : uint32_t {
kPRFM = UINT32_C(0xC0800000), kPRFM = UINT32_C(0xC0800000),
}; };
constexpr uint32_t kArm64LoadStoreOffsetFMask = UINT32_C(0x3B200C00);
enum class Arm64LoadStoreOffsetFixed : uint32_t {
kUnscaledOffset = UINT32_C(0x38000000),
kPostIndex = UINT32_C(0x38000400),
kPreIndex = UINT32_C(0x38000C00),
kRegisterOffset = UINT32_C(0x38200800),
};
constexpr uint32_t kArm64LoadStoreUnsignedOffsetFMask = UINT32_C(0x3B000000);
constexpr uint32_t kArm64LoadStoreUnsignedOffsetFixed = UINT32_C(0x39000000);
bool IsArm64LoadPrefetchStore(uint32_t instruction, bool& is_store_out); bool IsArm64LoadPrefetchStore(uint32_t instruction, bool& is_store_out);
class Exception { class Exception {
@ -114,6 +131,14 @@ class Exception {
Code code() const { return code_; } Code code() const { return code_; }
// Returns the platform-specific thread context info. // Returns the platform-specific thread context info.
// Note that certain registers must be modified through Modify* proxy
// functions rather than directly:
// x86-64:
// - General-purpose registers (r##, r8-r15).
// - XMM registers.
// AArch64:
// - General-purpose registers (Xn), including FP and LR.
// - SIMD and floating-point registers (Vn).
HostThreadContext* thread_context() const { return thread_context_; } HostThreadContext* thread_context() const { return thread_context_; }
// Returns the program counter where the exception occurred. // Returns the program counter where the exception occurred.
@ -139,6 +164,35 @@ class Exception {
#endif // XE_ARCH #endif // XE_ARCH
} }
#if XE_ARCH_AMD64
// The index is relative to X64Register::kIntRegisterFirst.
uint64_t& ModifyIntRegister(uint32_t index) {
assert_true(index <= 15);
modified_int_registers_ |= UINT16_C(1) << index;
return thread_context_->int_registers[index];
}
uint16_t modified_int_registers() const { return modified_int_registers_; }
vec128_t& ModifyXmmRegister(uint32_t index) {
assert_true(index <= 15);
modified_xmm_registers_ |= UINT16_C(1) << index;
return thread_context_->xmm_registers[index];
}
uint16_t modified_xmm_registers() const { return modified_xmm_registers_; }
#elif XE_ARCH_ARM64
uint64_t& ModifyXRegister(uint32_t index) {
assert_true(index <= 30);
modified_x_registers_ |= UINT32_C(1) << index;
return thread_context_->x[index];
}
uint32_t modified_x_registers() const { return modified_x_registers_; }
vec128_t& ModifyVRegister(uint32_t index) {
assert_true(index <= 31);
modified_v_registers_ |= UINT32_C(1) << index;
return thread_context_->v[index];
}
uint32_t modified_v_registers() const { return modified_v_registers_; }
#endif // XE_ARCH
// In case of AV, address that was read from/written to. // In case of AV, address that was read from/written to.
uint64_t fault_address() const { return fault_address_; } uint64_t fault_address() const { return fault_address_; }
@ -150,6 +204,13 @@ class Exception {
private: private:
Code code_ = Code::kInvalidException; Code code_ = Code::kInvalidException;
HostThreadContext* thread_context_ = nullptr; HostThreadContext* thread_context_ = nullptr;
#if XE_ARCH_AMD64
uint16_t modified_int_registers_ = 0;
uint16_t modified_xmm_registers_ = 0;
#elif XE_ARCH_ARM64
uint32_t modified_x_registers_ = 0;
uint32_t modified_v_registers_ = 0;
#endif // XE_ARCH
uint64_t fault_address_ = 0; uint64_t fault_address_ = 0;
AccessViolationOperation access_violation_operation_ = AccessViolationOperation access_violation_operation_ =
AccessViolationOperation::kUnknown; AccessViolationOperation::kUnknown;

View File

@ -16,6 +16,7 @@
#include "xenia/base/assert.h" #include "xenia/base/assert.h"
#include "xenia/base/host_thread_context.h" #include "xenia/base/host_thread_context.h"
#include "xenia/base/logging.h" #include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/platform.h" #include "xenia/base/platform.h"
namespace xe { namespace xe {
@ -43,6 +44,8 @@ static void ExceptionHandlerCallback(int signal_number, siginfo_t* signal_info,
#if XE_ARCH_AMD64 #if XE_ARCH_AMD64
thread_context.rip = uint64_t(mcontext.gregs[REG_RIP]); thread_context.rip = uint64_t(mcontext.gregs[REG_RIP]);
thread_context.eflags = uint32_t(mcontext.gregs[REG_EFL]); thread_context.eflags = uint32_t(mcontext.gregs[REG_EFL]);
// The REG_ order may be different than the register indices in the
// instruction encoding.
thread_context.rax = uint64_t(mcontext.gregs[REG_RAX]); thread_context.rax = uint64_t(mcontext.gregs[REG_RAX]);
thread_context.rcx = uint64_t(mcontext.gregs[REG_RCX]); thread_context.rcx = uint64_t(mcontext.gregs[REG_RCX]);
thread_context.rdx = uint64_t(mcontext.gregs[REG_RDX]); thread_context.rdx = uint64_t(mcontext.gregs[REG_RDX]);
@ -160,11 +163,61 @@ static void ExceptionHandlerCallback(int signal_number, siginfo_t* signal_info,
for (size_t i = 0; i < xe::countof(handlers_) && handlers_[i].first; ++i) { for (size_t i = 0; i < xe::countof(handlers_) && handlers_[i].first; ++i) {
if (handlers_[i].first(&ex, handlers_[i].second)) { if (handlers_[i].first(&ex, handlers_[i].second)) {
// Exception handled. // Exception handled.
// TODO(benvanik): Update all thread state? Dirty flags?
#if XE_ARCH_AMD64 #if XE_ARCH_AMD64
mcontext.gregs[REG_RIP] = thread_context.rip; mcontext.gregs[REG_RIP] = greg_t(thread_context.rip);
mcontext.gregs[REG_EFL] = greg_t(thread_context.eflags);
uint32_t modified_register_index;
// The order must match the order in X64Register.
static const size_t kIntRegisterMap[] = {
REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP,
REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11,
REG_R12, REG_R13, REG_R14, REG_R15,
};
uint16_t modified_int_registers_remaining = ex.modified_int_registers();
while (xe::bit_scan_forward(modified_int_registers_remaining,
&modified_register_index)) {
modified_int_registers_remaining &=
~(UINT16_C(1) << modified_register_index);
mcontext.gregs[kIntRegisterMap[modified_register_index]] =
thread_context.int_registers[modified_register_index];
}
uint16_t modified_xmm_registers_remaining = ex.modified_xmm_registers();
while (xe::bit_scan_forward(modified_xmm_registers_remaining,
&modified_register_index)) {
modified_xmm_registers_remaining &=
~(UINT16_C(1) << modified_register_index);
std::memcpy(&mcontext.fpregs->_xmm[modified_register_index],
&thread_context.xmm_registers[modified_register_index],
sizeof(vec128_t));
}
#elif XE_ARCH_ARM64 #elif XE_ARCH_ARM64
uint32_t modified_register_index;
uint32_t modified_x_registers_remaining = ex.modified_x_registers();
while (xe::bit_scan_forward(modified_x_registers_remaining,
&modified_register_index)) {
modified_x_registers_remaining &=
~(UINT32_C(1) << modified_register_index);
mcontext.regs[modified_register_index] =
thread_context.x[modified_register_index];
}
mcontext.sp = thread_context.sp;
mcontext.pc = thread_context.pc; mcontext.pc = thread_context.pc;
mcontext.pstate = thread_context.pstate;
if (mcontext_fpsimd) {
mcontext_fpsimd->fpsr = thread_context.fpsr;
mcontext_fpsimd->fpcr = thread_context.fpcr;
uint32_t modified_v_registers_remaining = ex.modified_v_registers();
while (xe::bit_scan_forward(modified_v_registers_remaining,
&modified_register_index)) {
modified_v_registers_remaining &=
~(UINT32_C(1) << modified_register_index);
std::memcpy(&mcontext_fpsimd->vregs[modified_register_index],
&thread_context.v[modified_register_index],
sizeof(vec128_t));
mcontext.regs[modified_register_index] =
thread_context.x[modified_register_index];
}
}
#endif // XE_ARCH #endif // XE_ARCH
return; return;
} }

View File

@ -78,8 +78,26 @@ LONG CALLBACK ExceptionHandlerCallback(PEXCEPTION_POINTERS ex_info) {
for (size_t i = 0; i < xe::countof(handlers_) && handlers_[i].first; ++i) { for (size_t i = 0; i < xe::countof(handlers_) && handlers_[i].first; ++i) {
if (handlers_[i].first(&ex, handlers_[i].second)) { if (handlers_[i].first(&ex, handlers_[i].second)) {
// Exception handled. // Exception handled.
// TODO(benvanik): update all thread state? Dirty flags?
ex_info->ContextRecord->Rip = thread_context.rip; ex_info->ContextRecord->Rip = thread_context.rip;
ex_info->ContextRecord->EFlags = thread_context.eflags;
uint32_t modified_register_index;
uint16_t modified_int_registers_remaining = ex.modified_int_registers();
while (xe::bit_scan_forward(modified_int_registers_remaining,
&modified_register_index)) {
modified_int_registers_remaining &=
~(UINT16_C(1) << modified_register_index);
(&ex_info->ContextRecord->Rax)[modified_register_index] =
thread_context.int_registers[modified_register_index];
}
uint16_t modified_xmm_registers_remaining = ex.modified_xmm_registers();
while (xe::bit_scan_forward(modified_xmm_registers_remaining,
&modified_register_index)) {
modified_xmm_registers_remaining &=
~(UINT16_C(1) << modified_register_index);
std::memcpy(&ex_info->ContextRecord->Xmm0 + modified_register_index,
&thread_context.xmm_registers[modified_register_index],
sizeof(vec128_t));
}
return EXCEPTION_CONTINUE_EXECUTION; return EXCEPTION_CONTINUE_EXECUTION;
} }
} }

View File

@ -27,10 +27,10 @@ static const char* kRegisterNames[] = {
"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9",
"x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19",
"x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29",
"x30", "sp", "pc", "pstate", "fpsr", "fpcr", "q0", "q1", "q2", "q3", "x30", "sp", "pc", "pstate", "fpsr", "fpcr", "v0", "v1", "v2", "v3",
"q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13",
"q14", "q15", "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
"q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
#endif // XE_ARCH #endif // XE_ARCH
}; };
@ -47,12 +47,12 @@ std::string HostThreadContext::GetStringFromValue(HostRegister reg,
case X64Register::kEflags: case X64Register::kEflags:
return hex ? string_util::to_hex_string(eflags) : std::to_string(eflags); return hex ? string_util::to_hex_string(eflags) : std::to_string(eflags);
default: default:
if (int(reg) >= int(X64Register::kRax) && if (reg >= X64Register::kIntRegisterFirst &&
int(reg) <= int(X64Register::kR15)) { reg <= X64Register::kIntRegisterLast) {
auto value = int_registers[int(reg) - int(X64Register::kRax)]; auto value =
int_registers[int(reg) - int(X64Register::kIntRegisterFirst)];
return hex ? string_util::to_hex_string(value) : std::to_string(value); return hex ? string_util::to_hex_string(value) : std::to_string(value);
} else if (int(reg) >= int(X64Register::kXmm0) && } else if (reg >= X64Register::kXmm0 && reg <= X64Register::kXmm15) {
int(reg) <= int(X64Register::kXmm15)) {
auto value = xmm_registers[int(reg) - int(X64Register::kXmm0)]; auto value = xmm_registers[int(reg) - int(X64Register::kXmm0)];
return hex ? string_util::to_hex_string(value) : xe::to_string(value); return hex ? string_util::to_hex_string(value) : xe::to_string(value);
} else { } else {
@ -73,12 +73,10 @@ std::string HostThreadContext::GetStringFromValue(HostRegister reg,
case Arm64Register::kFpcr: case Arm64Register::kFpcr:
return hex ? string_util::to_hex_string(fpcr) : std::to_string(fpcr); return hex ? string_util::to_hex_string(fpcr) : std::to_string(fpcr);
default: default:
if (int(reg) >= int(Arm64Register::kX0) && if (reg >= Arm64Register::kX0 && reg <= Arm64Register::kX30) {
int(reg) <= int(Arm64Register::kX30)) {
auto value = x[int(reg) - int(Arm64Register::kX0)]; auto value = x[int(reg) - int(Arm64Register::kX0)];
return hex ? string_util::to_hex_string(value) : std::to_string(value); return hex ? string_util::to_hex_string(value) : std::to_string(value);
} else if (int(reg) >= int(Arm64Register::kV0) && } else if (reg >= Arm64Register::kV0 && reg <= Arm64Register::kV31) {
int(reg) <= int(Arm64Register::kV31)) {
auto value = v[int(reg) - int(Arm64Register::kV0)]; auto value = v[int(reg) - int(Arm64Register::kV0)];
return hex ? string_util::to_hex_string(value) : xe::to_string(value); return hex ? string_util::to_hex_string(value) : xe::to_string(value);
} else { } else {

View File

@ -23,12 +23,17 @@
namespace xe { namespace xe {
// NOTE: The order of the registers in the enumerations must match the order in // NOTE: The order of the registers in the enumerations must match the order in
// the string table in host_thread_context.cc. // the string table in host_thread_context.cc, as well as remapping tables in
// exception handler implementations.
enum class X64Register { enum class X64Register {
kRip, kRip,
kEflags, kEflags,
kRax,
kIntRegisterFirst,
// The order matches the indices in the instruction encoding, as well as the
// Windows CONTEXT structure.
kRax = kIntRegisterFirst,
kRcx, kRcx,
kRdx, kRdx,
kRbx, kRbx,
@ -44,6 +49,8 @@ enum class X64Register {
kR13, kR13,
kR14, kR14,
kR15, kR15,
kIntRegisterLast = kR15,
kXmm0, kXmm0,
kXmm1, kXmm1,
kXmm2, kXmm2,
@ -101,8 +108,7 @@ enum class Arm64Register {
kPstate, kPstate,
kFpsr, kFpsr,
kFpcr, kFpcr,
// In assembly, the whole 128 bits of the Neon vector registers are accessible // The whole 128 bits of a Vn register are also known as Qn (quadword).
// as Q# (quadword registers). VFP also uses these registers.
kV0, kV0,
kV1, kV1,
kV2, kV2,

View File

@ -18,6 +18,7 @@
#include "xenia/base/exception_handler.h" #include "xenia/base/exception_handler.h"
#include "xenia/base/logging.h" #include "xenia/base/logging.h"
#include "xenia/base/memory.h" #include "xenia/base/memory.h"
#include "xenia/base/platform.h"
namespace xe { namespace xe {
namespace cpu { namespace cpu {
@ -114,28 +115,10 @@ bool MMIOHandler::CheckStore(uint32_t virtual_address, uint32_t value) {
return false; return false;
} }
struct DecodedMov { bool MMIOHandler::TryDecodeLoadStore(const uint8_t* p,
size_t length; DecodedLoadStore& decoded_out) {
// Inidicates this is a load (or conversely a store). std::memset(&decoded_out, 0, sizeof(decoded_out));
bool is_load; #if XE_ARCH_AMD64
// Indicates the memory must be swapped.
bool byte_swap;
// Source (for store) or target (for load) register.
// AX CX DX BX SP BP SI DI // REX.R=0
// R8 R9 R10 R11 R12 R13 R14 R15 // REX.R=1
uint32_t value_reg;
// [base + (index * scale) + displacement]
bool mem_has_base;
uint8_t mem_base_reg;
bool mem_has_index;
uint8_t mem_index_reg;
uint8_t mem_scale;
int32_t mem_displacement;
bool is_constant;
int32_t constant;
};
bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
uint8_t i = 0; // Current byte decode index. uint8_t i = 0; // Current byte decode index.
uint8_t rex = 0; uint8_t rex = 0;
if ((p[i] & 0xF0) == 0x40) { if ((p[i] & 0xF0) == 0x40) {
@ -148,8 +131,8 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
// 44 0f 38 f1 a4 02 00 movbe DWORD PTR [rdx+rax*1+0x0],r12d // 44 0f 38 f1 a4 02 00 movbe DWORD PTR [rdx+rax*1+0x0],r12d
// 42 0f 38 f1 8c 22 00 movbe DWORD PTR [rdx+r12*1+0x0],ecx // 42 0f 38 f1 8c 22 00 movbe DWORD PTR [rdx+r12*1+0x0],ecx
// 0f 38 f1 8c 02 00 00 movbe DWORD PTR [rdx + rax * 1 + 0x0], ecx // 0f 38 f1 8c 02 00 00 movbe DWORD PTR [rdx + rax * 1 + 0x0], ecx
mov->is_load = false; decoded_out.is_load = false;
mov->byte_swap = true; decoded_out.byte_swap = true;
i += 3; i += 3;
} else if (p[i] == 0x0F && p[i + 1] == 0x38 && p[i + 2] == 0xF0) { } else if (p[i] == 0x0F && p[i + 1] == 0x38 && p[i + 2] == 0xF0) {
// MOVBE r32, m32 (load) // MOVBE r32, m32 (load)
@ -159,8 +142,8 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
// 46 0f 38 f0 a4 22 00 movbe r12d,DWORD PTR [rdx+r12*1+0x0] // 46 0f 38 f0 a4 22 00 movbe r12d,DWORD PTR [rdx+r12*1+0x0]
// 0f 38 f0 8c 02 00 00 movbe ecx,DWORD PTR [rdx+rax*1+0x0] // 0f 38 f0 8c 02 00 00 movbe ecx,DWORD PTR [rdx+rax*1+0x0]
// 0F 38 F0 1C 02 movbe ebx,dword ptr [rdx+rax] // 0F 38 F0 1C 02 movbe ebx,dword ptr [rdx+rax]
mov->is_load = true; decoded_out.is_load = true;
mov->byte_swap = true; decoded_out.byte_swap = true;
i += 3; i += 3;
} else if (p[i] == 0x89) { } else if (p[i] == 0x89) {
// MOV m32, r32 (store) // MOV m32, r32 (store)
@ -168,8 +151,8 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
// 44 89 24 02 mov DWORD PTR[rdx + rax * 1], r12d // 44 89 24 02 mov DWORD PTR[rdx + rax * 1], r12d
// 42 89 0c 22 mov DWORD PTR[rdx + r12 * 1], ecx // 42 89 0c 22 mov DWORD PTR[rdx + r12 * 1], ecx
// 89 0c 02 mov DWORD PTR[rdx + rax * 1], ecx // 89 0c 02 mov DWORD PTR[rdx + rax * 1], ecx
mov->is_load = false; decoded_out.is_load = false;
mov->byte_swap = false; decoded_out.byte_swap = false;
++i; ++i;
} else if (p[i] == 0x8B) { } else if (p[i] == 0x8B) {
// MOV r32, m32 (load) // MOV r32, m32 (load)
@ -178,16 +161,16 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
// 42 8b 0c 22 mov ecx, DWORD PTR[rdx + r12 * 1] // 42 8b 0c 22 mov ecx, DWORD PTR[rdx + r12 * 1]
// 46 8b 24 22 mov r12d, DWORD PTR[rdx + r12 * 1] // 46 8b 24 22 mov r12d, DWORD PTR[rdx + r12 * 1]
// 8b 0c 02 mov ecx, DWORD PTR[rdx + rax * 1] // 8b 0c 02 mov ecx, DWORD PTR[rdx + rax * 1]
mov->is_load = true; decoded_out.is_load = true;
mov->byte_swap = false; decoded_out.byte_swap = false;
++i; ++i;
} else if (p[i] == 0xC7) { } else if (p[i] == 0xC7) {
// MOV m32, simm32 // MOV m32, simm32
// https://web.archive.org/web/20161017042413/https://www.asmpedia.org/index.php?title=MOV // https://web.archive.org/web/20161017042413/https://www.asmpedia.org/index.php?title=MOV
// C7 04 02 02 00 00 00 mov dword ptr [rdx+rax],2 // C7 04 02 02 00 00 00 mov dword ptr [rdx+rax],2
mov->is_load = false; decoded_out.is_load = false;
mov->byte_swap = false; decoded_out.byte_swap = false;
mov->is_constant = true; decoded_out.is_constant = true;
++i; ++i;
} else { } else {
return false; return false;
@ -204,13 +187,13 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
uint8_t mod = (modrm & 0b11000000) >> 6; uint8_t mod = (modrm & 0b11000000) >> 6;
uint8_t reg = (modrm & 0b00111000) >> 3; uint8_t reg = (modrm & 0b00111000) >> 3;
uint8_t rm = (modrm & 0b00000111); uint8_t rm = (modrm & 0b00000111);
mov->value_reg = reg + (rex_r ? 8 : 0); decoded_out.value_reg = reg + (rex_r ? 8 : 0);
mov->mem_has_base = false; decoded_out.mem_has_base = false;
mov->mem_base_reg = 0; decoded_out.mem_base_reg = 0;
mov->mem_has_index = false; decoded_out.mem_has_index = false;
mov->mem_index_reg = 0; decoded_out.mem_index_reg = 0;
mov->mem_scale = 1; decoded_out.mem_scale = 1;
mov->mem_displacement = 0; decoded_out.mem_displacement = 0;
bool has_sib = false; bool has_sib = false;
switch (rm) { switch (rm) {
case 0b100: // SIB case 0b100: // SIB
@ -221,17 +204,17 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
// RIP-relative not supported. // RIP-relative not supported.
return false; return false;
} }
mov->mem_has_base = true; decoded_out.mem_has_base = true;
mov->mem_base_reg = rm + (rex_b ? 8 : 0); decoded_out.mem_base_reg = rm + (rex_b ? 8 : 0);
break; break;
default: default:
mov->mem_has_base = true; decoded_out.mem_has_base = true;
mov->mem_base_reg = rm + (rex_b ? 8 : 0); decoded_out.mem_base_reg = rm + (rex_b ? 8 : 0);
break; break;
} }
if (has_sib) { if (has_sib) {
uint8_t sib = p[i++]; uint8_t sib = p[i++];
mov->mem_scale = 1 << ((sib & 0b11000000) >> 8); decoded_out.mem_scale = 1 << ((sib & 0b11000000) >> 8);
uint8_t sib_index = (sib & 0b00111000) >> 3; uint8_t sib_index = (sib & 0b00111000) >> 3;
uint8_t sib_base = (sib & 0b00000111); uint8_t sib_base = (sib & 0b00000111);
switch (sib_index) { switch (sib_index) {
@ -239,8 +222,9 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
// No index. // No index.
break; break;
default: default:
mov->mem_has_index = true; decoded_out.mem_has_index = true;
mov->mem_index_reg = sib_index + (rex_x ? 8 : 0); decoded_out.mem_index_reg = sib_index + (rex_x ? 8 : 0);
decoded_out.mem_index_size = sizeof(uint64_t);
break; break;
} }
switch (sib_base) { switch (sib_base) {
@ -249,29 +233,162 @@ bool TryDecodeMov(const uint8_t* p, DecodedMov* mov) {
assert_zero(mod); assert_zero(mod);
return false; return false;
default: default:
mov->mem_has_base = true; decoded_out.mem_has_base = true;
mov->mem_base_reg = sib_base + (rex_b ? 8 : 0); decoded_out.mem_base_reg = sib_base + (rex_b ? 8 : 0);
break; break;
} }
} }
switch (mod) { switch (mod) {
case 0b00: { case 0b00: {
mov->mem_displacement += 0; decoded_out.mem_displacement += 0;
} break; } break;
case 0b01: { case 0b01: {
mov->mem_displacement += int8_t(p[i++]); decoded_out.mem_displacement += int8_t(p[i++]);
} break; } break;
case 0b10: { case 0b10: {
mov->mem_displacement += xe::load<int32_t>(p + i); decoded_out.mem_displacement += xe::load<int32_t>(p + i);
i += 4; i += 4;
} break; } break;
} }
if (mov->is_constant) { if (decoded_out.is_constant) {
mov->constant = xe::load<int32_t>(p + i); decoded_out.constant = xe::load<int32_t>(p + i);
i += 4; i += 4;
} }
mov->length = i; decoded_out.length = i;
return true; return true;
#elif XE_ARCH_ARM64
decoded_out.length = sizeof(uint32_t);
uint32_t instruction = *reinterpret_cast<const uint32_t*>(p);
// Literal loading (PC-relative) is not handled.
if ((instruction & kArm64LoadStoreAnyFMask) != kArm64LoadStoreAnyFixed) {
// Not a load or a store instruction.
return false;
}
if ((instruction & kArm64LoadStorePairAnyFMask) ==
kArm64LoadStorePairAnyFixed) {
// Handling MMIO only for single 32-bit values, not for pairs.
return false;
}
uint8_t value_reg_base;
switch (Arm64LoadStoreOp(instruction & kArm64LoadStoreMask)) {
case Arm64LoadStoreOp::kSTR_w:
decoded_out.is_load = false;
value_reg_base = DecodedLoadStore::kArm64ValueRegX0;
break;
case Arm64LoadStoreOp::kLDR_w:
decoded_out.is_load = true;
value_reg_base = DecodedLoadStore::kArm64ValueRegX0;
break;
case Arm64LoadStoreOp::kSTR_s:
decoded_out.is_load = false;
value_reg_base = DecodedLoadStore::kArm64ValueRegV0;
break;
case Arm64LoadStoreOp::kLDR_s:
decoded_out.is_load = true;
value_reg_base = DecodedLoadStore::kArm64ValueRegV0;
break;
default:
return false;
}
// `Rt` field (load / store register).
decoded_out.value_reg = value_reg_base + (instruction & 31);
if (decoded_out.is_load &&
decoded_out.value_reg == DecodedLoadStore::kArm64ValueRegZero) {
// Zero constant rather than a register read.
decoded_out.is_constant = true;
decoded_out.constant = 0;
}
decoded_out.mem_has_base = true;
// The base is Xn (for 0...30) or SP (for 31).
// `Rn` field (first source register).
decoded_out.mem_base_reg = (instruction >> 5) & 31;
bool is_unsigned_offset =
(instruction & kArm64LoadStoreUnsignedOffsetFMask) ==
kArm64LoadStoreUnsignedOffsetFixed;
if (is_unsigned_offset) {
// LDR|STR Wt|St, [Xn|SP{, #pimm}]
// pimm (positive immediate) is scaled by the size of the data (4 for
// words).
// `ImmLSUnsigned` field.
uint32_t unsigned_offset = (instruction >> 10) & 4095;
decoded_out.mem_displacement =
ptrdiff_t(sizeof(uint32_t) * unsigned_offset);
} else {
Arm64LoadStoreOffsetFixed offset =
Arm64LoadStoreOffsetFixed(instruction & kArm64LoadStoreOffsetFMask);
// simm (signed immediate) is not scaled.
// Only applicable to kUnscaledOffset, kPostIndex and kPreIndex.
// `ImmLS` field.
int32_t signed_offset = int32_t(instruction << (32 - (9 + 12))) >> (32 - 9);
// For both post- and pre-indexing, the new address is written to the
// register after the data register write, thus if Xt and Xn are the same,
// the final value in the register will be the new address.
// https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--immediate---Load-Register--immediate--
switch (offset) {
case Arm64LoadStoreOffsetFixed::kUnscaledOffset: {
// LDUR|STUR Wt|St, [Xn|SP{, #simm}]
decoded_out.mem_displacement = signed_offset;
} break;
case Arm64LoadStoreOffsetFixed::kPostIndex: {
// LDR|STR Wt|St, [Xn|SP], #simm
decoded_out.mem_base_writeback = true;
decoded_out.mem_base_writeback_offset = signed_offset;
} break;
case Arm64LoadStoreOffsetFixed::kPreIndex: {
// LDR|STR Wt|St, [Xn|SP, #simm]!
decoded_out.mem_base_writeback = true;
decoded_out.mem_base_writeback_offset = signed_offset;
decoded_out.mem_displacement = signed_offset;
} break;
case Arm64LoadStoreOffsetFixed::kRegisterOffset: {
// LDR|STR Wt|St, [Xn|SP, (Wm|Xm){, extend {amount}}]
// `Rm` field.
decoded_out.mem_index_reg = (instruction >> 16) & 31;
if (decoded_out.mem_index_reg != DecodedLoadStore::kArm64RegZero) {
decoded_out.mem_has_index = true;
// Allowed extend types in the `option` field are UXTW (0b010), LSL
// (0b011 - identical to UXTX), SXTW (0b110), SXTX (0b111).
// The shift (0 or 2 for 32-bit LDR/STR) can be applied regardless of
// the extend type ("LSL" is just a term for assembly readability,
// internally it's treated simply as UXTX).
// If bit 0 of the `option` field is 0 (UXTW, SXTW), the index
// register is treated as 32-bit (Wm) extended to 64-bit. If it's 1
// (LSL aka UXTX, SXTX), the index register is treated as 64-bit (Xm).
// `ExtendMode` (`option`) field.
uint32_t extend_mode = (instruction >> 13) & 0b111;
if (!(extend_mode & 0b010)) {
// Sub-word index - undefined.
return false;
}
decoded_out.mem_index_size =
(extend_mode & 0b001) ? sizeof(uint64_t) : sizeof(uint32_t);
decoded_out.mem_index_sign_extend = (extend_mode & 0b100) != 0;
// Shift is either 0 or log2(sizeof(load or store size)).
// Supporting MMIO only for 4-byte words.
// `ImmShiftLS` field.
decoded_out.mem_scale =
(instruction & (UINT32_C(1) << 12)) ? sizeof(uint32_t) : 1;
}
} break;
default:
return false;
}
}
return true;
#else
#error TryDecodeLoadStore not implemented for the target CPU architecture.
return false;
#endif // XE_ARCH
} }
bool MMIOHandler::ExceptionCallbackThunk(Exception* ex, void* data) { bool MMIOHandler::ExceptionCallbackThunk(Exception* ex, void* data) {
@ -300,11 +417,13 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) {
// Access violations are pretty rare, so we can do a linear search here. // Access violations are pretty rare, so we can do a linear search here.
// Only check if in the virtual range, as we only support virtual ranges. // Only check if in the virtual range, as we only support virtual ranges.
const MMIORange* range = nullptr; const MMIORange* range = nullptr;
uint32_t fault_guest_virtual_address = 0;
if (ex->fault_address() < uint64_t(physical_membase_)) { if (ex->fault_address() < uint64_t(physical_membase_)) {
uint32_t fault_virtual_address = host_to_guest_virtual_( fault_guest_virtual_address = host_to_guest_virtual_(
host_to_guest_virtual_context_, fault_host_address); host_to_guest_virtual_context_, fault_host_address);
for (const auto& test_range : mapped_ranges_) { for (const auto& test_range : mapped_ranges_) {
if ((fault_virtual_address & test_range.mask) == test_range.address) { if ((fault_guest_virtual_address & test_range.mask) ==
test_range.address) {
// Address is within the range of this mapping. // Address is within the range of this mapping.
range = &test_range; range = &test_range;
break; break;
@ -336,44 +455,114 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) {
auto rip = ex->pc(); auto rip = ex->pc();
auto p = reinterpret_cast<const uint8_t*>(rip); auto p = reinterpret_cast<const uint8_t*>(rip);
DecodedMov mov = {0}; DecodedLoadStore decoded_load_store;
bool decoded = TryDecodeMov(p, &mov); if (!TryDecodeLoadStore(p, decoded_load_store)) {
if (!decoded) { XELOGE("Unable to decode MMIO load or store instruction at {}", p);
XELOGE("Unable to decode MMIO mov at {}", p);
assert_always("Unknown MMIO instruction type"); assert_always("Unknown MMIO instruction type");
return false; return false;
} }
if (mov.is_load) { HostThreadContext& thread_context = *ex->thread_context();
#if XE_ARCH_ARM64
// Preserve the base address with the pre- or the post-index offset to write
// it after writing the result (since the base address register and the
// register to load to may be the same, in which case it should receive the
// original base address with the offset).
uintptr_t mem_base_writeback_address = 0;
if (decoded_load_store.mem_has_base &&
decoded_load_store.mem_base_writeback) {
if (decoded_load_store.mem_base_reg ==
DecodedLoadStore::kArm64MemBaseRegSp) {
mem_base_writeback_address = thread_context.sp;
} else {
assert_true(decoded_load_store.mem_base_reg <= 30);
mem_base_writeback_address =
thread_context.x[decoded_load_store.mem_base_reg];
}
mem_base_writeback_address += decoded_load_store.mem_base_writeback_offset;
}
#endif // XE_ARCH_ARM64
uint8_t value_reg = decoded_load_store.value_reg;
if (decoded_load_store.is_load) {
// Load of a memory value - read from range, swap, and store in the // Load of a memory value - read from range, swap, and store in the
// register. // register.
uint32_t value = range->read(nullptr, range->callback_context, uint32_t value = range->read(nullptr, range->callback_context,
static_cast<uint32_t>(ex->fault_address())); fault_guest_virtual_address);
uint64_t* reg_ptr = &ex->thread_context()->int_registers[mov.value_reg]; if (!decoded_load_store.byte_swap) {
if (!mov.byte_swap) {
// We swap only if it's not a movbe, as otherwise we are swapping twice. // We swap only if it's not a movbe, as otherwise we are swapping twice.
value = xe::byte_swap(value); value = xe::byte_swap(value);
} }
*reg_ptr = value; #if XE_ARCH_AMD64
ex->ModifyIntRegister(value_reg) = value;
#elif XE_ARCH_ARM64
if (value_reg >= DecodedLoadStore::kArm64ValueRegX0 &&
value_reg <= (DecodedLoadStore::kArm64ValueRegX0 + 30)) {
ex->ModifyXRegister(value_reg - DecodedLoadStore::kArm64ValueRegX0) =
value;
} else if (value_reg >= DecodedLoadStore::kArm64ValueRegV0 &&
value_reg <= (DecodedLoadStore::kArm64ValueRegV0 + 31)) {
ex->ModifyVRegister(value_reg - DecodedLoadStore::kArm64ValueRegV0)
.u32[0] = value;
} else {
assert_true(value_reg == DecodedLoadStore::kArm64ValueRegZero);
// Register write is ignored for X31.
}
#else
#error Register value writing not implemented for the target CPU architecture.
#endif // XE_ARCH
} else { } else {
// Store of a register value - read register, swap, write to range. // Store of a register value - read register, swap, write to range.
int32_t value; uint32_t value;
if (mov.is_constant) { if (decoded_load_store.is_constant) {
value = uint32_t(mov.constant); value = uint32_t(decoded_load_store.constant);
} else { } else {
uint64_t* reg_ptr = &ex->thread_context()->int_registers[mov.value_reg]; #if XE_ARCH_AMD64
value = static_cast<uint32_t>(*reg_ptr); value = uint32_t(thread_context.int_registers[value_reg]);
if (!mov.byte_swap) { #elif XE_ARCH_ARM64
if (value_reg >= DecodedLoadStore::kArm64ValueRegX0 &&
value_reg <= (DecodedLoadStore::kArm64ValueRegX0 + 30)) {
value = uint32_t(
thread_context.x[value_reg - DecodedLoadStore::kArm64ValueRegX0]);
} else if (value_reg >= DecodedLoadStore::kArm64ValueRegV0 &&
value_reg <= (DecodedLoadStore::kArm64ValueRegV0 + 31)) {
value = thread_context.v[value_reg - DecodedLoadStore::kArm64ValueRegV0]
.u32[0];
} else {
assert_true(value_reg == DecodedLoadStore::kArm64ValueRegZero);
value = 0;
}
#else
#error Register value reading not implemented for the target CPU architecture.
#endif // XE_ARCH
if (!decoded_load_store.byte_swap) {
// We swap only if it's not a movbe, as otherwise we are swapping twice. // We swap only if it's not a movbe, as otherwise we are swapping twice.
value = xe::byte_swap(static_cast<uint32_t>(value)); value = xe::byte_swap(value);
} }
} }
range->write(nullptr, range->callback_context, range->write(nullptr, range->callback_context, fault_guest_virtual_address,
static_cast<uint32_t>(ex->fault_address()), value); value);
} }
#if XE_ARCH_ARM64
// Write the base address with the pre- or the post-index offset, overwriting
// the register to load to if it's the same.
if (decoded_load_store.mem_has_base &&
decoded_load_store.mem_base_writeback) {
if (decoded_load_store.mem_base_reg ==
DecodedLoadStore::kArm64MemBaseRegSp) {
thread_context.sp = mem_base_writeback_address;
} else {
assert_true(decoded_load_store.mem_base_reg <= 30);
ex->ModifyXRegister(decoded_load_store.mem_base_reg) =
mem_base_writeback_address;
}
}
#endif // XE_ARCH_ARM64
// Advance RIP to the next instruction so that we resume properly. // Advance RIP to the next instruction so that we resume properly.
ex->set_resume_pc(rip + mov.length); ex->set_resume_pc(rip + decoded_load_store.length);
return true; return true;
} }

View File

@ -15,6 +15,7 @@
#include <vector> #include <vector>
#include "xenia/base/mutex.h" #include "xenia/base/mutex.h"
#include "xenia/base/platform.h"
namespace xe { namespace xe {
class Exception; class Exception;
@ -93,6 +94,61 @@ class MMIOHandler {
static MMIOHandler* global_handler_; static MMIOHandler* global_handler_;
xe::global_critical_region global_critical_region_; xe::global_critical_region global_critical_region_;
private:
struct DecodedLoadStore {
// Matches the Xn/Wn register number for 0 reads and ignored writes in many
// usage cases.
static constexpr uint8_t kArm64RegZero = 31;
// Matches the actual register number encoding for an SP base in AArch64
// load and store instructions.
static constexpr uint8_t kArm64MemBaseRegSp = kArm64RegZero;
static constexpr uint8_t kArm64ValueRegX0 = 0;
static constexpr uint8_t kArm64ValueRegZero =
kArm64ValueRegX0 + kArm64RegZero;
static constexpr uint8_t kArm64ValueRegV0 = 32;
size_t length;
// Inidicates this is a load (or conversely a store).
bool is_load;
// Indicates the memory must be swapped.
bool byte_swap;
// Source (for store) or target (for load) register.
// For x86-64:
// AX CX DX BX SP BP SI DI // REX.R=0
// R8 R9 R10 R11 R12 R13 R14 R15 // REX.R=1
// For AArch64:
// - kArm64ValueRegX0 + [0...30]: Xn (Wn for 32 bits - upper 32 bits of Xn
// are zeroed on Wn write).
// - kArm64ValueRegZero: Zero constant for register read, ignored register
// write (though memory must still be accessed - a MMIO load may have side
// effects even if the result is discarded).
// - kArm64ValueRegV0 + [0...31]: Vn (Sn for 32 bits).
uint8_t value_reg;
// [base + (index * scale) + displacement]
bool mem_has_base;
// On AArch64, if mem_base_reg is kArm64MemBaseRegSp, the base register is
// SP, not Xn.
uint8_t mem_base_reg;
// For AArch64 pre- and post-indexing. In case of a load, the base register
// is written back after the loaded data is written to the register,
// overwriting the value register if it's the same.
bool mem_base_writeback;
int32_t mem_base_writeback_offset;
bool mem_has_index;
uint8_t mem_index_reg;
uint8_t mem_index_size;
bool mem_index_sign_extend;
uint8_t mem_scale;
ptrdiff_t mem_displacement;
bool is_constant;
int32_t constant;
};
static bool TryDecodeLoadStore(const uint8_t* p,
DecodedLoadStore& decoded_out);
}; };
} // namespace cpu } // namespace cpu

View File

@ -19,6 +19,7 @@
#include "xenia/base/literals.h" #include "xenia/base/literals.h"
#include "xenia/base/logging.h" #include "xenia/base/logging.h"
#include "xenia/base/memory.h" #include "xenia/base/memory.h"
#include "xenia/base/platform.h"
#include "xenia/base/profiling.h" #include "xenia/base/profiling.h"
#include "xenia/base/threading.h" #include "xenia/base/threading.h"
#include "xenia/cpu/breakpoint.h" #include "xenia/cpu/breakpoint.h"
@ -675,7 +676,13 @@ bool Processor::OnThreadBreakpointHit(Exception* ex) {
// Apply thread context changes. // Apply thread context changes.
// TODO(benvanik): apply to all threads? // TODO(benvanik): apply to all threads?
#if XE_ARCH_AMD64
ex->set_resume_pc(thread_info->host_context.rip); ex->set_resume_pc(thread_info->host_context.rip);
#elif XE_ARCH_ARM64
ex->set_resume_pc(thread_info->host_context.pc);
#else
#error Instruction pointer not specified for the target CPU architecture.
#endif // XE_ARCH
// Resume execution. // Resume execution.
return true; return true;