diff --git a/src/xenia/cpu/cpu.h b/src/xenia/cpu/cpu.h index e9fed4f77..4bd272af0 100644 --- a/src/xenia/cpu/cpu.h +++ b/src/xenia/cpu/cpu.h @@ -13,6 +13,6 @@ #include // TODO(benvanik): conditionally include? -//#include +#include #endif // XENIA_CPU_CPU_H_ diff --git a/src/xenia/cpu/sources.gypi b/src/xenia/cpu/sources.gypi index 97ef7d3a7..846fa0f56 100644 --- a/src/xenia/cpu/sources.gypi +++ b/src/xenia/cpu/sources.gypi @@ -20,5 +20,6 @@ 'includes': [ 'ppc/sources.gypi', 'sdb/sources.gypi', + 'x64/sources.gypi', ], } diff --git a/src/xenia/cpu/x64/sources.gypi b/src/xenia/cpu/x64/sources.gypi new file mode 100644 index 000000000..6a1eed7a1 --- /dev/null +++ b/src/xenia/cpu/x64/sources.gypi @@ -0,0 +1,16 @@ +# Copyright 2013 Ben Vanik. All Rights Reserved. +{ + 'sources': [ + 'x64_backend.cc', + 'x64_backend.h', + 'x64_emit.h', + 'x64_emit_alu.cc', + 'x64_emit_control.cc', + 'x64_emit_fpu.cc', + 'x64_emit_memory.cc', + 'x64_emitter.cc', + 'x64_emitter.h', + 'x64_jit.cc', + 'x64_jit.h', + ], +} diff --git a/src/xenia/cpu/x64/x64_backend.cc b/src/xenia/cpu/x64/x64_backend.cc new file mode 100644 index 000000000..6217822bc --- /dev/null +++ b/src/xenia/cpu/x64/x64_backend.cc @@ -0,0 +1,57 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2013 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include +#include +#include + + +using namespace xe; +using namespace xe::cpu; +using namespace xe::cpu::sdb; +using namespace xe::cpu::x64; + + +namespace { + void InitializeIfNeeded(); + void CleanupOnShutdown(); + + void InitializeIfNeeded() { + static bool has_initialized = false; + if (has_initialized) { + return; + } + has_initialized = true; + + X64RegisterEmitCategoryALU(); + X64RegisterEmitCategoryControl(); + X64RegisterEmitCategoryFPU(); + X64RegisterEmitCategoryMemory(); + + atexit(CleanupOnShutdown); + } + + void CleanupOnShutdown() { + } +} + + +X64Backend::X64Backend() : + Backend() { + InitializeIfNeeded(); +} + +X64Backend::~X64Backend() { +} + +JIT* X64Backend::CreateJIT(xe_memory_ref memory, SymbolTable* sym_table) { + return new X64JIT(memory, sym_table); +} diff --git a/src/xenia/cpu/x64/x64_backend.h b/src/xenia/cpu/x64/x64_backend.h new file mode 100644 index 000000000..8ac539d14 --- /dev/null +++ b/src/xenia/cpu/x64/x64_backend.h @@ -0,0 +1,39 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2013 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_CPU_X64_X64_BACKEND_H_ +#define XENIA_CPU_X64_X64_BACKEND_H_ + +#include + +#include + + +namespace xe { +namespace cpu { +namespace x64 { + + +class X64Backend : public Backend { +public: + X64Backend(); + virtual ~X64Backend(); + + virtual JIT* CreateJIT(xe_memory_ref memory, sdb::SymbolTable* sym_table); + +protected: +}; + + +} // namespace x64 +} // namespace cpu +} // namespace xe + + +#endif // XENIA_CPU_X64_X64_BACKEND_H_ diff --git a/src/xenia/cpu/x64/x64_emit.h b/src/xenia/cpu/x64/x64_emit.h new file mode 100644 index 000000000..02eb793df --- /dev/null +++ b/src/xenia/cpu/x64/x64_emit.h @@ -0,0 +1,43 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2013 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_CPU_X64_X64_EMIT_H_ +#define XENIA_CPU_X64_X64_EMIT_H_ + +#include +#include +#include + + +namespace xe { +namespace cpu { +namespace x64 { + + +void X64RegisterEmitCategoryALU(); +void X64RegisterEmitCategoryControl(); +void X64RegisterEmitCategoryFPU(); +void X64RegisterEmitCategoryMemory(); + + +#define XEEMITTER(name, opcode, format) int InstrEmit_##name + +#define XEREGISTERINSTR(name, opcode) \ + RegisterInstrEmit(opcode, (InstrEmitFn)InstrEmit_##name); + +#define XEINSTRNOTIMPLEMENTED() +//#define XEINSTRNOTIMPLEMENTED XEASSERTALWAYS + + +} // namespace x64 +} // namespace cpu +} // namespace xe + + +#endif // XENIA_CPU_X64_X64_EMIT_H_ diff --git a/src/xenia/cpu/x64/x64_emit_alu.cc b/src/xenia/cpu/x64/x64_emit_alu.cc new file mode 100644 index 000000000..801476c90 --- /dev/null +++ b/src/xenia/cpu/x64/x64_emit_alu.cc @@ -0,0 +1,1094 @@ +/* + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2013 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include + + +using namespace xe::cpu; +using namespace xe::cpu::ppc; + + +namespace xe { +namespace cpu { +namespace x64 { + + +// Integer arithmetic (A-3) + +XEEMITTER(addx, 0x7C000214, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { + // RD <- (RA) + (RB) + + if (i.XO.OE) { + // With XER update. + // This is a different codepath as we need to use llvm.sadd.with.overflow. + + // TODO(benvanik): handle overflow exception. + jit_value_t v = jit_insn_add_ovf(f, + e.make_signed(e.gpr_value(i.XO.RA)), + e.make_signed(e.gpr_value(i.XO.RB))); + e.update_gpr_value(i.XO.RT, v); + //e.update_xer_with_overflow(b.CreateExtractValue(v, 1)); + + if (i.XO.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + } + } else { + // No OE bit setting. + jit_value_t v = jit_insn_add(f, + e.make_signed(e.gpr_value(i.XO.RA)), + e.make_signed(e.gpr_value(i.XO.RB))); + e.update_gpr_value(i.XO.RT, v); + + if (i.XO.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + } + } + return 0; +} + +XEEMITTER(addcx, 0x7C000014, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(addex, 0x7C000114, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(addi, 0x38000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // RT <- EXTS(SI) + // else + // RT <- (RA) + EXTS(SI) + + jit_value_t v = e.get_int64(XEEXTS16(i.D.DS)); + if (i.D.RA) { + v = jit_insn_add(f, e.gpr_value(i.D.RA), v); + } + e.update_gpr_value(i.D.RT, v); + + return 0; +} + +XEEMITTER(addic, 0x30000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // RT <- (RA) + EXTS(SI) + + // TODO(benvanik): track exception + jit_value_t v = jit_insn_add_ovf(f, e.make_signed(e.gpr_value(i.D.RA)), + e.get_int64(XEEXTS16(i.D.DS))); + + e.update_gpr_value(i.D.RT, v); + // e.update_xer_with_carry(b.CreateExtractValue(v, 1)); + + return 0; +} + +XEEMITTER(addicx, 0x34000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(addis, 0x3C000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // RT <- EXTS(SI) || i16.0 + // else + // RT <- (RA) + EXTS(SI) || i16.0 + + jit_value_t v = e.get_int64(XEEXTS16(i.D.DS) << 16); + if (i.D.RA) { + v = jit_insn_add(f, e.gpr_value(i.D.RA), v); + } + e.update_gpr_value(i.D.RT, v); + + return 0; +} + +XEEMITTER(addmex, 0x7C0001D4, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(addzex, 0x7C000194, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { + // RT <- (RA) + CA + + // TODO(benvanik): handle overflow exception. + jit_value_t ca = jit_insn_and(f, + jit_insn_ushr(f, e.xer_value(), e.get_uint32(29)), + e.get_uint64(0x1)); + jit_value_t v = jit_insn_add_ovf(f, + e.make_signed(e.gpr_value(i.XO.RA)), + e.make_signed(ca)); + e.update_gpr_value(i.XO.RT, v); + if (i.XO.OE) { + // With XER[SO] update too. + //e.update_xer_with_overflow_and_carry(b.CreateExtractValue(v, 1)); + } else { + // Just CA update. + //e.update_xer_with_carry(b.CreateExtractValue(v, 1)); + } + + if (i.XO.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + } + + return 0; +} + +XEEMITTER(divdx, 0x7C0003D2, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(divdux, 0x7C000392, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +// XEEMITTER(divwx, 0x7C0003D6, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // dividend[0:31] <- (RA)[32:63] +// // divisor[0:31] <- (RB)[32:63] +// // if divisor = 0 then +// // if OE = 1 then +// // XER[OV] <- 1 +// // return +// // RT[32:63] <- dividend ÷ divisor +// // RT[0:31] <- undefined + +// jit_value_t dividend = e.trunc_to_int(e.gpr_value(i.XO.RA)); +// jit_value_t divisor = e.trunc_to_int(e.gpr_value(i.XO.RB)); + +// // Note that we skip the zero handling block and just avoid the divide if +// // we are OE=0. +// BasicBlock* zero_bb = i.XO.OE ? +// BasicBlock::Create(*e.context(), "", e.fn()) : NULL; +// BasicBlock* nonzero_bb = BasicBlock::Create(*e.context(), "", e.fn()); +// BasicBlock* after_bb = BasicBlock::Create(*e.context(), "", e.fn()); +// b.CreateCondBr(b.CreateICmpEQ(divisor, b.get_int32(0)), +// i.XO.OE ? zero_bb : after_bb, nonzero_bb); + +// if (zero_bb) { +// // Divisor was zero - do XER update. +// b.SetInsertPoint(zero_bb); +// e.update_xer_with_overflow(b.getInt1(1)); +// b.CreateBr(after_bb); +// } + +// // Divide. +// b.SetInsertPoint(nonzero_bb); +// jit_value_t v = b.CreateSDiv(dividend, divisor); +// v = e.sign_extend(v, jit_type_nint); +// e.update_gpr_value(i.XO.RT, v); + +// // If we are OE=1 we need to clear the overflow bit. +// if (i.XO.OE) { +// e.update_xer_with_overflow(b.getInt1(0)); +// } + +// if (i.XO.Rc) { +// // With cr0 update. +// e.update_cr_with_cond(0, v, e.get_int64(0), true); +// } + +// b.CreateBr(after_bb); + +// // Resume. +// b.SetInsertPoint(after_bb); + +// return 0; +// } + +// XEEMITTER(divwux, 0x7C000396, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // dividend[0:31] <- (RA)[32:63] +// // divisor[0:31] <- (RB)[32:63] +// // if divisor = 0 then +// // if OE = 1 then +// // XER[OV] <- 1 +// // return +// // RT[32:63] <- dividend ÷ divisor +// // RT[0:31] <- undefined + +// jit_value_t dividend = e.trunc_to_int(e.gpr_value(i.XO.RA)); +// jit_value_t divisor = e.trunc_to_int(e.gpr_value(i.XO.RB)); + +// // Note that we skip the zero handling block and just avoid the divide if +// // we are OE=0. +// BasicBlock* zero_bb = i.XO.OE ? +// BasicBlock::Create(*e.context(), "", e.fn()) : NULL; +// BasicBlock* nonzero_bb = BasicBlock::Create(*e.context(), "", e.fn()); +// BasicBlock* after_bb = BasicBlock::Create(*e.context(), "", e.fn()); +// b.CreateCondBr(b.CreateICmpEQ(divisor, b.get_int32(0)), +// i.XO.OE ? zero_bb : after_bb, nonzero_bb); + +// if (zero_bb) { +// // Divisor was zero - do XER update. +// b.SetInsertPoint(zero_bb); +// e.update_xer_with_overflow(b.getInt1(1)); +// b.CreateBr(after_bb); +// } + +// // Divide. +// b.SetInsertPoint(nonzero_bb); +// jit_value_t v = b.CreateUDiv(dividend, divisor); +// v = e.zero_extend(v, jit_type_nint); +// e.update_gpr_value(i.XO.RT, v); + +// // If we are OE=1 we need to clear the overflow bit. +// if (i.XO.OE) { +// e.update_xer_with_overflow(b.getInt1(0)); +// } + +// if (i.XO.Rc) { +// // With cr0 update. +// e.update_cr_with_cond(0, v, e.get_int64(0), true); +// } + +// b.CreateBr(after_bb); + +// // Resume. +// b.SetInsertPoint(after_bb); + +// return 0; +// } + +XEEMITTER(mulhdx, 0x7C000092, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(mulhdux, 0x7C000012, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(mulhwx, 0x7C000096, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(mulhwux, 0x7C000016, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(mulldx, 0x7C0001D2, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(mulli, 0x1C000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // prod[0:127] <- (RA) × EXTS(SI) + // RT <- prod[64:127] + + // TODO(benvanik): ensure this has the right behavior when the value + // overflows. It should be truncating the result, but I'm not sure what LLVM + // does. + + jit_value_t v = jit_insn_mul(f, e.gpr_value(i.D.RA), + e.get_int64(XEEXTS16(i.D.DS))); + e.update_gpr_value(i.D.RT, v); + + return 0; +} + +XEEMITTER(mullwx, 0x7C0001D6, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { + // RT <- (RA)[32:63] × (RB)[32:63] + + if (i.XO.OE) { + // With XER update. + XEINSTRNOTIMPLEMENTED(); + return 1; + } + + jit_value_t v = jit_insn_mul( + f, e.sign_extend(e.gpr_value(i.XO.RA), jit_type_nint), + e.sign_extend(e.gpr_value(i.XO.RB), jit_type_nint)); + e.update_gpr_value(i.XO.RT, v); + + if (i.XO.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + } + + return 0; +} + +// XEEMITTER(negx, 0x7C0000D0, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // RT <- ¬(RA) + 1 + +// if (i.XO.OE) { +// // With XER update. +// // This is a different codepath as we need to use llvm.ssub.with.overflow. + +// // if RA == 0x8000000000000000 then no-op and set OV=1 +// // This may just magically do that... + +// Function* ssub_with_overflow = Intrinsic::getDeclaration( +// e.gen_module(), Intrinsic::ssub_with_overflow, jit_type_nint); +// jit_value_t v = b.CreateCall2(ssub_with_overflow, +// e.get_int64(0), e.gpr_value(i.XO.RA)); +// jit_value_t v0 = b.CreateExtractValue(v, 0); +// e.update_gpr_value(i.XO.RT, v0); +// e.update_xer_with_overflow(b.CreateExtractValue(v, 1)); + +// if (i.XO.Rc) { +// // With cr0 update. +// e.update_cr_with_cond(0, v0, e.get_int64(0), true); +// } + +// return 0; +// } else { +// // No OE bit setting. +// jit_value_t v = b.CreateSub(e.get_int64(0), e.gpr_value(i.XO.RA)); +// e.update_gpr_value(i.XO.RT, v); + +// if (i.XO.Rc) { +// // With cr0 update. +// e.update_cr_with_cond(0, v, e.get_int64(0), true); +// } + +// return 0; +// } +// } + +XEEMITTER(subfx, 0x7C000050, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { + // RT <- ¬(RA) + (RB) + 1 + + if (i.XO.OE) { + // With XER update. + // This is a different codepath as we need to use llvm.ssub.with.overflow. + + // TODO(benvanik): handle overflow exceptions. + jit_value_t v = jit_insn_sub_ovf(f, + e.make_signed(e.gpr_value(i.XO.RB)), + e.make_signed(e.gpr_value(i.XO.RA))); + e.update_gpr_value(i.XO.RT, v); + //e.update_xer_with_overflow(b.CreateExtractValue(v, 1)); + + if (i.XO.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + } + + return 0; + } else { + // No OE bit setting. + jit_value_t v = jit_insn_sub(f, + e.make_signed(e.gpr_value(i.XO.RB)), + e.make_signed(e.gpr_value(i.XO.RA))); + e.update_gpr_value(i.XO.RT, v); + + if (i.XO.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + } + + return 0; + } +} + +XEEMITTER(subfcx, 0x7C000010, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +// XEEMITTER(subficx, 0x20000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // RT <- ¬(RA) + EXTS(SI) + 1 + +// Function* ssub_with_overflow = Intrinsic::getDeclaration( +// e.gen_module(), Intrinsic::ssub_with_overflow, jit_type_nint); +// jit_value_t v = b.CreateCall2(ssub_with_overflow, +// e.get_int64(XEEXTS16(i.D.DS)), e.gpr_value(i.D.RA)); +// e.update_gpr_value(i.D.RT, b.CreateExtractValue(v, 0)); +// e.update_xer_with_carry(b.CreateExtractValue(v, 1)); + +// return 0; +// } + +XEEMITTER(subfex, 0x7C000110, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { + // RT <- ¬(RA) + (RB) + CA + + // TODO(benvanik): possible that the add of rb+ca needs to also check for + // overflow! + + // TODO(benvanik): handle overflow exception + jit_value_t ca = jit_insn_and(f, jit_insn_ushr(f, e.xer_value(), + e.get_uint32(29)), + e.get_uint64(0x1)); + jit_value_t v = jit_insn_add_ovf(f, + e.make_unsigned(jit_insn_neg(f, e.gpr_value(i.XO.RA))), + e.make_unsigned(jit_insn_add(f, e.gpr_value(i.XO.RB), ca))); + e.update_gpr_value(i.XO.RT, v); + + // if (i.XO.OE) { + // // With XER update. + // e.update_xer_with_overflow_and_carry(b.CreateExtractValue(v, 1)); + // } else { + // e.update_xer_with_carry(b.CreateExtractValue(v, 1)); + // } + + if (i.XO.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + } + + return 0; +} + +XEEMITTER(subfmex, 0x7C0001D0, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(subfzex, 0x7C000190, XO )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + + +// Integer compare (A-4) + +XEEMITTER(cmp, 0x7C000000, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if L = 0 then + // a <- EXTS((RA)[32:63]) + // b <- EXTS((RB)[32:63]) + // else + // a <- (RA) + // b <- (RB) + // if a < b then + // c <- 0b100 + // else if a > b then + // c <- 0b010 + // else + // c <- 0b001 + // CR[4×BF+32:4×BF+35] <- c || XER[SO] + + uint32_t BF = i.X.RT >> 2; + uint32_t L = i.X.RT & 1; + + jit_value_t lhs = e.gpr_value(i.X.RA); + jit_value_t rhs = e.gpr_value(i.X.RB); + if (!L) { + // 32-bit - truncate and sign extend. + lhs = e.trunc_to_int(lhs); + lhs = e.sign_extend(lhs, jit_type_nint); + rhs = e.trunc_to_int(rhs); + rhs = e.sign_extend(rhs, jit_type_nint); + } + + e.update_cr_with_cond(BF, lhs, rhs, true); + + return 0; +} + +XEEMITTER(cmpi, 0x2C000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if L = 0 then + // a <- EXTS((RA)[32:63]) + // else + // a <- (RA) + // if a < EXTS(SI) then + // c <- 0b100 + // else if a > EXTS(SI) then + // c <- 0b010 + // else + // c <- 0b001 + // CR[4×BF+32:4×BF+35] <- c || XER[SO] + + uint32_t BF = i.D.RT >> 2; + uint32_t L = i.D.RT & 1; + + jit_value_t lhs = e.gpr_value(i.D.RA); + if (!L) { + // 32-bit - truncate and sign extend. + lhs = e.trunc_to_int(lhs); + lhs = e.sign_extend(lhs, jit_type_nint); + } + + jit_value_t rhs = e.get_int64(XEEXTS16(i.D.DS)); + e.update_cr_with_cond(BF, lhs, rhs, true); + + return 0; +} + +XEEMITTER(cmpl, 0x7C000040, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if L = 0 then + // a <- i32.0 || (RA)[32:63] + // b <- i32.0 || (RB)[32:63] + // else + // a <- (RA) + // b <- (RB) + // if a u b then + // c <- 0b010 + // else + // c <- 0b001 + // CR[4×BF+32:4×BF+35] <- c || XER[SO] + + uint32_t BF = i.X.RT >> 2; + uint32_t L = i.X.RT & 1; + + jit_value_t lhs = e.gpr_value(i.X.RA); + jit_value_t rhs = e.gpr_value(i.X.RB); + if (!L) { + // 32-bit - truncate and zero extend. + lhs = e.trunc_to_int(lhs); + lhs = e.zero_extend(lhs, jit_type_nint); + rhs = e.trunc_to_int(rhs); + rhs = e.zero_extend(rhs, jit_type_nint); + } + + e.update_cr_with_cond(BF, lhs, rhs, false); + + return 0; +} + +XEEMITTER(cmpli, 0x28000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if L = 0 then + // a <- i32.0 || (RA)[32:63] + // else + // a <- (RA) + // if a u i48.0 || SI then + // c <- 0b010 + // else + // c <- 0b001 + // CR[4×BF+32:4×BF+35] <- c || XER[SO] + + uint32_t BF = i.D.RT >> 2; + uint32_t L = i.D.RT & 1; + + jit_value_t lhs = e.gpr_value(i.D.RA); + if (!L) { + // 32-bit - truncate and zero extend. + lhs = e.trunc_to_int(lhs); + lhs = e.zero_extend(lhs, jit_type_nint); + } + + jit_value_t rhs = e.get_int64(i.D.DS); + e.update_cr_with_cond(BF, lhs, rhs, false); + + return 0; +} + + +// Integer logical (A-5) + +XEEMITTER(andx, 0x7C000038, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // RA <- (RS) & (RB) + + jit_value_t v = jit_insn_and(f, e.gpr_value(i.X.RT), e.gpr_value(i.X.RB)); + e.update_gpr_value(i.X.RA, v); + + if (i.X.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + } + + return 0; +} + +XEEMITTER(andcx, 0x7C000078, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // RA <- (RS) & ¬(RB) + + jit_value_t v = jit_insn_xor(f, e.gpr_value(i.X.RB), + e.get_int64(-1)); + v = jit_insn_and(f, e.gpr_value(i.X.RT), v); + e.update_gpr_value(i.X.RA, v); + + if (i.X.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + } + + return 0; +} + +XEEMITTER(andix, 0x70000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // RA <- (RS) & (i48.0 || UI) + + jit_value_t v = jit_insn_and(f, e.gpr_value(i.D.RT), e.get_uint64(i.D.DS)); + e.update_gpr_value(i.D.RA, v); + + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + + return 0; +} + +XEEMITTER(andisx, 0x74000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // RA <- (RS) & (i32.0 || UI || i16.0) + + jit_value_t v = jit_insn_and(f, e.gpr_value(i.D.RT), + e.get_uint64(((uint64_t)i.D.DS) << 16)); + e.update_gpr_value(i.D.RA, v); + + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + + return 1; +} + +XEEMITTER(cntlzdx, 0x7C000074, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +// XEEMITTER(cntlzwx, 0x7C000034, X )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // n <- 32 +// // do while n < 64 +// // if (RS) = 1 then leave n +// // n <- n + 1 +// // RA <- n - 32 + +// jit_value_t v = e.gpr_value(i.X.RT); +// v = e.trunc_to_int(v); + +// std::vector arg_types; +// arg_types.push_back(b.getInt32Ty()); +// Function* ctlz = Intrinsic::getDeclaration( +// e.fn()->getParent(), Intrinsic::ctlz, arg_types); +// jit_value_t count = b.CreateCall2(ctlz, v, b.getInt1(1)); + +// count = e.zero_extend(count, jit_type_nint); +// e.update_gpr_value(i.X.RA, count); + +// if (i.X.Rc) { +// // With cr0 update. +// e.update_cr_with_cond(0, count, e.get_int64(0), true); +// } + +// return 0; +// } + +XEEMITTER(eqvx, 0x7C000238, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(extsbx, 0x7C000774, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // s <- (RS)[56] + // RA[56:63] <- (RS)[56:63] + // RA[0:55] <- i56.s + + jit_value_t v = e.gpr_value(i.X.RT); + v = e.trunc_to_ubyte(v); + v = e.sign_extend(v, jit_type_nint); + e.update_gpr_value(i.X.RA, v); + + if (i.X.Rc) { + // Update cr0. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + } + + return 0; +} + +XEEMITTER(extshx, 0x7C000734, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(extswx, 0x7C0007B4, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(nandx, 0x7C0003B8, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(norx, 0x7C0000F8, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // RA <- ¬((RS) | (RB)) + + jit_value_t v = jit_insn_or(f, e.gpr_value(i.X.RT), e.gpr_value(i.X.RB)); + v = jit_insn_xor(f, v, e.get_int64(-1)); + e.update_gpr_value(i.X.RA, v); + + if (i.X.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + } + + return 0; +} + +XEEMITTER(orx, 0x7C000378, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // RA <- (RS) | (RB) + + jit_value_t v = jit_insn_or(f, e.gpr_value(i.X.RT), e.gpr_value(i.X.RB)); + e.update_gpr_value(i.X.RA, v); + + if (i.X.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + } + + return 0; +} + +XEEMITTER(orcx, 0x7C000338, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(ori, 0x60000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // RA <- (RS) | (i48.0 || UI) + + jit_value_t v = jit_insn_or(f, e.gpr_value(i.D.RT), + e.get_uint64((uint64_t)i.D.DS)); + e.update_gpr_value(i.D.RA, v); + + return 0; +} + +XEEMITTER(oris, 0x64000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // RA <- (RS) | (i32.0 || UI || i16.0) + + jit_value_t v = jit_insn_or(f, e.gpr_value(i.D.RT), + e.get_uint64(((uint64_t)i.D.DS) << 16)); + e.update_gpr_value(i.D.RA, v); + + return 0; +} + +XEEMITTER(xorx, 0x7C000278, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // RA <- (RS) XOR (RB) + + jit_value_t v = jit_insn_xor(f, e.gpr_value(i.X.RT), e.gpr_value(i.X.RB)); + e.update_gpr_value(i.X.RA, v); + + if (i.X.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + } + + return 0; +} + +XEEMITTER(xori, 0x68000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // RA <- (RS) XOR (i48.0 || UI) + + jit_value_t v = jit_insn_xor(f, e.gpr_value(i.D.RT), + e.get_uint64((uint64_t)i.D.DS)); + e.update_gpr_value(i.D.RA, v); + + return 0; +} + +XEEMITTER(xoris, 0x6C000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // RA <- (RS) XOR (i32.0 || UI || i16.0) + + jit_value_t v = jit_insn_xor(f, e.gpr_value(i.D.RT), + e.get_uint64(((uint64_t)i.D.DS) << 16)); + e.update_gpr_value(i.D.RA, v); + + return 0; +} + + +// Integer rotate (A-6) + +XEEMITTER(rldclx, 0x78000010, MDS)(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(rldcrx, 0x78000012, MDS)(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(rldicx, 0x78000008, MD )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(rldiclx, 0x78000000, MD )(X64Emitter& e, jit_function_t f, InstrData& i) { + // n <- sh[5] || sh[0:4] + // r <- ROTL64((RS), n) + // b <- mb[5] || mb[0:4] + // m <- MASK(b, 63) + // RA <- r & m + + // uint32_t sh = (i.MD.SH5 << 5) | i.MD.SH; + // uint32_t mb = (i.MD.MB5 << 5) | i.MD.MB; + + // jit_value_t v = e.gpr_value(i.MD.RS); + // if (sh) { + // v = // rotate by sh + // } + // if (mb) { + // v = // mask b mb->63 + // } + // e.update_gpr_value(i.MD.RA, v); + + // if (i.MD.Rc) { + // // With cr0 update. + // e.update_cr_with_cond(0, v, e.get_int64(0), true); + // } + + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(rldicrx, 0x78000004, MD )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(rldimix, 0x7800000C, MD )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(rlwimix, 0x50000000, M )(X64Emitter& e, jit_function_t f, InstrData& i) { + // n <- SH + // r <- ROTL32((RS)[32:63], n) + // m <- MASK(MB+32, ME+32) + // RA <- r&m | (RA)&¬m + + // ROTL32(x, y) = rotl(i64.(x||x), y) + jit_value_t v = jit_insn_and(f, e.gpr_value(i.M.RT), + e.get_uint64(UINT32_MAX)); + v = jit_insn_or(f, jit_insn_shl(f, v, e.get_uint32(32)), v); + // (v << shift) | (v >> (32 - shift)); + v = jit_insn_or(f, jit_insn_shl(f, v, e.get_uint32(i.M.SH)), + jit_insn_ushr(f, v, e.get_uint32(32 - i.M.SH))); + uint64_t m = XEMASK(i.M.MB + 32, i.M.ME + 32); + v = jit_insn_and(f, v, e.get_uint64(m)); + v = jit_insn_or(f, v, jit_insn_and(f, e.gpr_value(i.M.RA), + e.get_uint64(~m))); + e.update_gpr_value(i.M.RA, v); + + if (i.M.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + } + + return 0; +} + +XEEMITTER(rlwinmx, 0x54000000, M )(X64Emitter& e, jit_function_t f, InstrData& i) { + // n <- SH + // r <- ROTL32((RS)[32:63], n) + // m <- MASK(MB+32, ME+32) + // RA <- r & m + + // The compiler will generate a bunch of these for the special case of SH=0. + // Which seems to just select some bits and set cr0 for use with a branch. + // We can detect this and do less work. + if (!i.M.SH) { + jit_value_t v = jit_insn_and(f, + e.trunc_to_int(e.gpr_value(i.M.RT)), + e.get_uint32((uint32_t)XEMASK(i.M.MB + 32, i.M.ME + 32))); + v = e.zero_extend(v, jit_type_nint); + e.update_gpr_value(i.M.RA, v); + if (i.M.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + } + return 0; + } + + // ROTL32(x, y) = rotl(i64.(x||x), y) + jit_value_t v = jit_insn_and(f, e.gpr_value(i.M.RT), e.get_uint64(UINT32_MAX)); + v = jit_insn_or(f, jit_insn_shl(f, v, e.get_uint32(32)), v); + // (v << shift) | (v >> (32 - shift)); + v = jit_insn_or(f, jit_insn_shl(f, v, e.get_uint32(i.M.SH)), + jit_insn_ushr(f, v, e.get_uint32(32 - i.M.SH))); + v = jit_insn_and(f, v, e.get_uint64(XEMASK(i.M.MB + 32, i.M.ME + 32))); + e.update_gpr_value(i.M.RA, v); + + if (i.M.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + } + + return 0; +} + +XEEMITTER(rlwnmx, 0x5C000000, M )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + + +// Integer shift (A-7) + +XEEMITTER(sldx, 0x7C000036, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(slwx, 0x7C000030, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // n <- (RB)[59:63] + // r <- ROTL32((RS)[32:63], n) + // if (RB)[58] = 0 then + // m <- MASK(32, 63-n) + // else + // m <- i64.0 + // RA <- r & m + + jit_value_t v = jit_insn_shl(f, e.gpr_value(i.X.RT), e.gpr_value(i.X.RB)); + v = jit_insn_and(f, v, e.get_uint64(UINT32_MAX)); + e.update_gpr_value(i.X.RA, v); + + if (i.X.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + } + + return 0; +} + +XEEMITTER(sradx, 0x7C000634, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(sradix, 0x7C000674, XS )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(srawx, 0x7C000630, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(srawix, 0x7C000670, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // n <- SH + // r <- ROTL32((RS)[32:63], 64-n) + // m <- MASK(n+32, 63) + // s <- (RS)[32] + // RA <- r&m | (i64.s)&¬m + // CA <- s & ((r&¬m)[32:63]≠0) + + jit_value_t rs64 = e.gpr_value(i.X.RT); + jit_value_t rs32 = e.trunc_to_int(rs64); + + jit_value_t v; + jit_value_t ca; + if (!i.X.RB) { + // No shift, just a fancy sign extend and CA clearer. + v = rs32; + ca = e.get_int64(0); + } else { + v = jit_insn_sshr(f, rs32, e.get_uint32(i.X.RB)); + + // CA is set to 1 if the low-order 32 bits of (RS) contain a negative number + // and any 1-bits are shifted out of position 63; otherwise CA is set to 0. + ca = jit_insn_and(f, jit_insn_lt(f, v, e.get_int32(0)), + jit_insn_lt(f, rs64, e.get_int64(0))); + } + v = e.sign_extend(v, jit_type_nint); + e.update_gpr_value(i.X.RA, v); + e.update_xer_with_carry(ca); + + if (i.X.Rc) { + // With cr0 update. + e.update_cr_with_cond(0, v, e.get_int64(0), true); + } + + return 0; +} + +XEEMITTER(srdx, 0x7C000436, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(srwx, 0x7C000430, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + + +void X64RegisterEmitCategoryALU() { + XEREGISTERINSTR(addx, 0x7C000214); + XEREGISTERINSTR(addcx, 0X7C000014); + XEREGISTERINSTR(addex, 0x7C000114); + XEREGISTERINSTR(addi, 0x38000000); + XEREGISTERINSTR(addic, 0x30000000); + XEREGISTERINSTR(addicx, 0x34000000); + XEREGISTERINSTR(addis, 0x3C000000); + XEREGISTERINSTR(addmex, 0x7C0001D4); + XEREGISTERINSTR(addzex, 0x7C000194); + XEREGISTERINSTR(divdx, 0x7C0003D2); + XEREGISTERINSTR(divdux, 0x7C000392); + //XEREGISTERINSTR(divwx, 0x7C0003D6); + //XEREGISTERINSTR(divwux, 0x7C000396); + XEREGISTERINSTR(mulhdx, 0x7C000092); + XEREGISTERINSTR(mulhdux, 0x7C000012); + XEREGISTERINSTR(mulhwx, 0x7C000096); + XEREGISTERINSTR(mulhwux, 0x7C000016); + XEREGISTERINSTR(mulldx, 0x7C0001D2); + XEREGISTERINSTR(mulli, 0x1C000000); + XEREGISTERINSTR(mullwx, 0x7C0001D6); + //XEREGISTERINSTR(negx, 0x7C0000D0); + XEREGISTERINSTR(subfx, 0x7C000050); + XEREGISTERINSTR(subfcx, 0x7C000010); + //XEREGISTERINSTR(subficx, 0x20000000); + XEREGISTERINSTR(subfex, 0x7C000110); + XEREGISTERINSTR(subfmex, 0x7C0001D0); + XEREGISTERINSTR(subfzex, 0x7C000190); + XEREGISTERINSTR(cmp, 0x7C000000); + XEREGISTERINSTR(cmpi, 0x2C000000); + XEREGISTERINSTR(cmpl, 0x7C000040); + XEREGISTERINSTR(cmpli, 0x28000000); + XEREGISTERINSTR(andx, 0x7C000038); + XEREGISTERINSTR(andcx, 0x7C000078); + XEREGISTERINSTR(andix, 0x70000000); + XEREGISTERINSTR(andisx, 0x74000000); + XEREGISTERINSTR(cntlzdx, 0x7C000074); + //XEREGISTERINSTR(cntlzwx, 0x7C000034); + XEREGISTERINSTR(eqvx, 0x7C000238); + XEREGISTERINSTR(extsbx, 0x7C000774); + XEREGISTERINSTR(extshx, 0x7C000734); + XEREGISTERINSTR(extswx, 0x7C0007B4); + XEREGISTERINSTR(nandx, 0x7C0003B8); + XEREGISTERINSTR(norx, 0x7C0000F8); + XEREGISTERINSTR(orx, 0x7C000378); + XEREGISTERINSTR(orcx, 0x7C000338); + XEREGISTERINSTR(ori, 0x60000000); + XEREGISTERINSTR(oris, 0x64000000); + XEREGISTERINSTR(xorx, 0x7C000278); + XEREGISTERINSTR(xori, 0x68000000); + XEREGISTERINSTR(xoris, 0x6C000000); + XEREGISTERINSTR(rldclx, 0x78000010); + XEREGISTERINSTR(rldcrx, 0x78000012); + XEREGISTERINSTR(rldicx, 0x78000008); + XEREGISTERINSTR(rldiclx, 0x78000000); + XEREGISTERINSTR(rldicrx, 0x78000004); + XEREGISTERINSTR(rldimix, 0x7800000C); + XEREGISTERINSTR(rlwimix, 0x50000000); + XEREGISTERINSTR(rlwinmx, 0x54000000); + XEREGISTERINSTR(rlwnmx, 0x5C000000); + XEREGISTERINSTR(sldx, 0x7C000036); + XEREGISTERINSTR(slwx, 0x7C000030); + XEREGISTERINSTR(sradx, 0x7C000634); + XEREGISTERINSTR(sradix, 0x7C000674); + XEREGISTERINSTR(srawx, 0x7C000630); + XEREGISTERINSTR(srawix, 0x7C000670); + XEREGISTERINSTR(srdx, 0x7C000436); + XEREGISTERINSTR(srwx, 0x7C000430); +} + + +} // namespace x64 +} // namespace cpu +} // namespace xe diff --git a/src/xenia/cpu/x64/x64_emit_control.cc b/src/xenia/cpu/x64/x64_emit_control.cc new file mode 100644 index 000000000..68a0becbb --- /dev/null +++ b/src/xenia/cpu/x64/x64_emit_control.cc @@ -0,0 +1,686 @@ +/* + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2013 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include + + +using namespace xe::cpu; +using namespace xe::cpu::ppc; +using namespace xe::cpu::sdb; + + +namespace xe { +namespace cpu { +namespace x64 { + + +int XeEmitIndirectBranchTo( + X64Emitter& e, jit_function_t f, const char* src, uint32_t cia, + bool lk, uint32_t reg) { + // TODO(benvanik): run a DFA pass to see if we can detect whether this is + // a normal function return that is pulling the LR from the stack that + // it set in the prolog. If so, we can omit the dynamic check! + + // NOTE: we avoid spilling registers until we know that the target is not + // a basic block within this function. + + jit_value_t target; + switch (reg) { + case kXEPPCRegLR: + target = e.lr_value(); + break; + case kXEPPCRegCTR: + target = e.ctr_value(); + break; + default: + XEASSERTALWAYS(); + return 1; + } + + // Dynamic test when branching to LR, which is usually used for the return. + // We only do this if LK=0 as returns wouldn't set LR. + // Ideally it's a return and we can just do a simple ret and be done. + // If it's not, we fall through to the full indirection logic. + if (!lk && reg == kXEPPCRegLR) { + // The return block will spill registers for us. + // TODO(benvanik): 'lr_mismatch' debug info. + jit_value_t lr_cmp = jit_insn_eq(f, target, jit_value_get_param(f, 1)); + e.branch_to_return_if(lr_cmp); + } + + // Defer to the generator, which will do fancy things. + bool likely_local = !lk && reg == kXEPPCRegCTR; + return e.GenerateIndirectionBranch(cia, target, lk, likely_local); +} + +int XeEmitBranchTo( + X64Emitter& e, jit_function_t f, const char* src, uint32_t cia, + bool lk, jit_value_t condition) { + FunctionBlock* fn_block = e.fn_block(); + + // Fast-path for branches to other blocks. + // Only valid when not tracing branches. + if (!FLAGS_trace_branches && + fn_block->outgoing_type == FunctionBlock::kTargetBlock) { + e.branch_to_block_if(fn_block->outgoing_address, condition); + return 0; + } + + // Only branch of conditionals when we have one. + jit_label_t post_jump_label = jit_label_undefined; + if (condition) { + // TODO(benvanik): add debug info for this? + // char name[32]; + // xesnprintfa(name, XECOUNT(name), "loc_%.8X_bcx", i.address); + jit_insn_branch_if_not(f, condition, &post_jump_label); + } + + if (FLAGS_trace_branches) { + e.TraceBranch(cia); + } + + // Get the basic block and switch behavior based on outgoing type. + int result = 0; + switch (fn_block->outgoing_type) { + case FunctionBlock::kTargetBlock: + // Taken care of above usually. + e.branch_to_block(fn_block->outgoing_address); + break; + case FunctionBlock::kTargetFunction: + { + // Spill all registers to memory. + // TODO(benvanik): only spill ones used by the target function? Use + // calling convention flags on the function to not spill temp + // registers? + e.SpillRegisters(); + + XEASSERTNOTNULL(fn_block->outgoing_function); + // TODO(benvanik): check to see if this is the last block in the function. + // This would enable tail calls/etc. + bool is_end = false; + if (!lk || is_end) { + // Tail. No need to refill the local register values, just return. + // We optimize this by passing in the LR from our parent instead of the + // next instruction. This allows the return from our callee to pop + // all the way up. + e.call_function(fn_block->outgoing_function, + jit_value_get_param(f, 1), true); + jit_insn_return(f, NULL); + } else { + // Will return here eventually. + // Refill registers from state. + e.call_function(fn_block->outgoing_function, + e.get_uint64(cia + 4), false); + e.FillRegisters(); + } + break; + } + case FunctionBlock::kTargetLR: + { + // An indirect jump. + printf("INDIRECT JUMP VIA LR: %.8X\n", cia); + result = XeEmitIndirectBranchTo(e, f, src, cia, lk, kXEPPCRegLR); + break; + } + case FunctionBlock::kTargetCTR: + { + // An indirect jump. + printf("INDIRECT JUMP VIA CTR: %.8X\n", cia); + result = XeEmitIndirectBranchTo(e, f, src, cia, lk, kXEPPCRegCTR); + break; + } + default: + case FunctionBlock::kTargetNone: + XEASSERTALWAYS(); + result = 1; + break; + } + + if (condition) { + jit_insn_label(f, &post_jump_label); + } + + return result; +} + + +XEEMITTER(bx, 0x48000000, I )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if AA then + // NIA <- EXTS(LI || 0b00) + // else + // NIA <- CIA + EXTS(LI || 0b00) + // if LK then + // LR <- CIA + 4 + + uint32_t nia; + if (i.I.AA) { + nia = XEEXTS26(i.I.LI << 2); + } else { + nia = i.address + XEEXTS26(i.I.LI << 2); + } + if (i.I.LK) { + e.update_lr_value(e.get_uint64(i.address + 4)); + } + + return XeEmitBranchTo(e, f, "bx", i.address, i.I.LK, NULL); +} + +XEEMITTER(bcx, 0x40000000, B )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if ¬BO[2] then + // CTR <- CTR - 1 + // ctr_ok <- BO[2] | ((CTR[0:63] != 0) XOR BO[3]) + // cond_ok <- BO[0] | (CR[BI+32] ≡ BO[1]) + // if ctr_ok & cond_ok then + // if AA then + // NIA <- EXTS(BD || 0b00) + // else + // NIA <- CIA + EXTS(BD || 0b00) + // if LK then + // LR <- CIA + 4 + + // NOTE: the condition bits are reversed! + // 01234 (docs) + // 43210 (real) + + // TODO(benvanik): this may be wrong and overwrite LRs when not desired! + // The docs say always, though... + if (i.B.LK) { + e.update_lr_value(e.get_uint64(i.address + 4)); + } + + jit_value_t ctr_ok = NULL; + if (XESELECTBITS(i.B.BO, 2, 2)) { + // Ignore ctr. + } else { + // Decrement counter. + jit_value_t ctr = e.ctr_value(); + ctr = jit_insn_sub(f, ctr, e.get_int64(1)); + e.update_ctr_value(ctr); + + // Ctr check. + if (XESELECTBITS(i.B.BO, 1, 1)) { + ctr_ok = jit_insn_eq(f, ctr, e.get_int64(0)); + } else { + ctr_ok = jit_insn_ne(f, ctr, e.get_int64(0)); + } + } + + jit_value_t cond_ok = NULL; + if (XESELECTBITS(i.B.BO, 4, 4)) { + // Ignore cond. + } else { + jit_value_t cr = e.cr_value(i.B.BI >> 2); + cr = jit_insn_and(f, cr, e.get_uint32(1 << (i.B.BI & 3))); + if (XESELECTBITS(i.B.BO, 3, 3)) { + cond_ok = jit_insn_ne(f, cr, e.get_int64(0)); + } else { + cond_ok = jit_insn_eq(f, cr, e.get_int64(0)); + } + } + + // We do a bit of optimization here to make the llvm assembly easier to read. + jit_value_t ok = NULL; + if (ctr_ok && cond_ok) { + ok = jit_insn_and(f, ctr_ok, cond_ok); + } else if (ctr_ok) { + ok = ctr_ok; + } else if (cond_ok) { + ok = cond_ok; + } + + uint32_t nia; + if (i.B.AA) { + nia = XEEXTS26(i.B.BD << 2); + } else { + nia = i.address + XEEXTS26(i.B.BD << 2); + } + if (XeEmitBranchTo(e, f, "bcx", i.address, i.B.LK, ok)) { + return 1; + } + + return 0; +} + +XEEMITTER(bcctrx, 0x4C000420, XL )(X64Emitter& e, jit_function_t f, InstrData& i) { + // cond_ok <- BO[0] | (CR[BI+32] ≡ BO[1]) + // if cond_ok then + // NIA <- CTR[0:61] || 0b00 + // if LK then + // LR <- CIA + 4 + + // NOTE: the condition bits are reversed! + // 01234 (docs) + // 43210 (real) + + // TODO(benvanik): this may be wrong and overwrite LRs when not desired! + // The docs say always, though... + if (i.XL.LK) { + e.update_lr_value(e.get_uint64(i.address + 4)); + } + + jit_value_t cond_ok = NULL; + if (XESELECTBITS(i.XL.BO, 4, 4)) { + // Ignore cond. + } else { + jit_value_t cr = e.cr_value(i.XL.BI >> 2); + cr = jit_insn_and(f, cr, e.get_uint64(1 << (i.XL.BI & 3))); + if (XESELECTBITS(i.XL.BO, 3, 3)) { + cond_ok = jit_insn_ne(f, cr, e.get_int64(0)); + } else { + cond_ok = jit_insn_eq(f, cr, e.get_int64(0)); + } + } + + // We do a bit of optimization here to make the llvm assembly easier to read. + jit_value_t ok = NULL; + if (cond_ok) { + ok = cond_ok; + } + + if (XeEmitBranchTo(e, f, "bcctrx", i.address, i.XL.LK, ok)) { + return 1; + } + + return 0; +} + +XEEMITTER(bclrx, 0x4C000020, XL )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if ¬BO[2] then + // CTR <- CTR - 1 + // ctr_ok <- BO[2] | ((CTR[0:63] != 0) XOR BO[3] + // cond_ok <- BO[0] | (CR[BI+32] ≡ BO[1]) + // if ctr_ok & cond_ok then + // NIA <- LR[0:61] || 0b00 + // if LK then + // LR <- CIA + 4 + + // NOTE: the condition bits are reversed! + // 01234 (docs) + // 43210 (real) + + // TODO(benvanik): this may be wrong and overwrite LRs when not desired! + // The docs say always, though... + if (i.XL.LK) { + e.update_lr_value(e.get_uint64(i.address + 4)); + } + + jit_value_t ctr_ok = NULL; + if (XESELECTBITS(i.XL.BO, 2, 2)) { + // Ignore ctr. + } else { + // Decrement counter. + jit_value_t ctr = e.ctr_value(); + ctr = jit_insn_sub(f, ctr, e.get_int64(1)); + + // Ctr check. + if (XESELECTBITS(i.XL.BO, 1, 1)) { + ctr_ok = jit_insn_eq(f, ctr, e.get_int64(0)); + } else { + ctr_ok = jit_insn_ne(f, ctr, e.get_int64(0)); + } + } + + jit_value_t cond_ok = NULL; + if (XESELECTBITS(i.XL.BO, 4, 4)) { + // Ignore cond. + } else { + jit_value_t cr = e.cr_value(i.XL.BI >> 2); + cr = jit_insn_and(f, cr, e.get_uint32(1 << (i.XL.BI & 3))); + if (XESELECTBITS(i.XL.BO, 3, 3)) { + cond_ok = jit_insn_ne(f, cr, e.get_int64(0)); + } else { + cond_ok = jit_insn_eq(f, cr, e.get_int64(0)); + } + } + + // We do a bit of optimization here to make the llvm assembly easier to read. + jit_value_t ok = NULL; + if (ctr_ok && cond_ok) { + ok = jit_insn_and(f, ctr_ok, cond_ok); + } else if (ctr_ok) { + ok = ctr_ok; + } else if (cond_ok) { + ok = cond_ok; + } + + if (XeEmitBranchTo(e, f, "bclrx", i.address, i.XL.LK, ok)) { + return 1; + } + + return 0; +} + + +// Condition register logical (A-23) + +XEEMITTER(crand, 0x4C000202, XL )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(crandc, 0x4C000102, XL )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(creqv, 0x4C000242, XL )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(crnand, 0x4C0001C2, XL )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(crnor, 0x4C000042, XL )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(cror, 0x4C000382, XL )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(crorc, 0x4C000342, XL )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(crxor, 0x4C000182, XL )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(mcrf, 0x4C000000, XL )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + + +// System linkage (A-24) + +XEEMITTER(sc, 0x44000002, SC )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + + +// Trap (A-25) + +int XeEmitTrap(X64Emitter& e, jit_function_t f, InstrData& i, + jit_value_t va, jit_value_t vb, uint32_t TO) { + // if (a < b) & TO[0] then TRAP + // if (a > b) & TO[1] then TRAP + // if (a = b) & TO[2] then TRAP + // if (a u b) & TO[4] then TRAP + // Bits swapped: + // 01234 + // 43210 + + if (!TO) { + return 0; + } + + // TODO(benvanik): port from LLVM + XEASSERTALWAYS(); + + // BasicBlock* after_bb = BasicBlock::Create(*e.context(), "", e.fn(), + // e.GetNextBasicBlock()); + // BasicBlock* trap_bb = BasicBlock::Create(*e.context(), "", e.fn(), + // after_bb); + + // // Create the basic blocks (so we can chain). + // std::vector bbs; + // if (TO & (1 << 4)) { + // bbs.push_back(BasicBlock::Create(*e.context(), "", e.fn(), trap_bb)); + // } + // if (TO & (1 << 3)) { + // bbs.push_back(BasicBlock::Create(*e.context(), "", e.fn(), trap_bb)); + // } + // if (TO & (1 << 2)) { + // bbs.push_back(BasicBlock::Create(*e.context(), "", e.fn(), trap_bb)); + // } + // if (TO & (1 << 1)) { + // bbs.push_back(BasicBlock::Create(*e.context(), "", e.fn(), trap_bb)); + // } + // if (TO & (1 << 0)) { + // bbs.push_back(BasicBlock::Create(*e.context(), "", e.fn(), trap_bb)); + // } + // bbs.push_back(after_bb); + + // // Jump to the first bb. + // b.CreateBr(bbs.front()); + + // // Setup each basic block. + // std::vector::iterator it = bbs.begin(); + // if (TO & (1 << 4)) { + // // a < b + // BasicBlock* bb = *(it++); + // b.SetInsertPoint(bb); + // jit_value_t cmp = b.CreateICmpSLT(va, vb); + // b.CreateCondBr(cmp, trap_bb, *it); + // } + // if (TO & (1 << 3)) { + // // a > b + // BasicBlock* bb = *(it++); + // b.SetInsertPoint(bb); + // jit_value_t cmp = b.CreateICmpSGT(va, vb); + // b.CreateCondBr(cmp, trap_bb, *it); + // } + // if (TO & (1 << 2)) { + // // a = b + // BasicBlock* bb = *(it++); + // b.SetInsertPoint(bb); + // jit_value_t cmp = b.CreateICmpEQ(va, vb); + // b.CreateCondBr(cmp, trap_bb, *it); + // } + // if (TO & (1 << 1)) { + // // a u b + // BasicBlock* bb = *(it++); + // b.SetInsertPoint(bb); + // jit_value_t cmp = b.CreateICmpUGT(va, vb); + // b.CreateCondBr(cmp, trap_bb, *it); + // } + + // // Create trap BB. + // b.SetInsertPoint(trap_bb); + // e.SpillRegisters(); + // // TODO(benvanik): use @llvm.debugtrap? could make debugging better + // b.CreateCall2(e.gen_module()->getFunction("XeTrap"), + // e.fn()->arg_begin(), + // e.get_uint64(i.address)); + // b.CreateBr(after_bb); + + // // Resume. + // b.SetInsertPoint(after_bb); + + return 0; +} + +XEEMITTER(td, 0x7C000088, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // a <- (RA) + // b <- (RB) + // if (a < b) & TO[0] then TRAP + // if (a > b) & TO[1] then TRAP + // if (a = b) & TO[2] then TRAP + // if (a u b) & TO[4] then TRAP + return XeEmitTrap(e, f, i, + e.gpr_value(i.X.RA), + e.gpr_value(i.X.RB), + i.X.RT); +} + +XEEMITTER(tdi, 0x08000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // a <- (RA) + // if (a < EXTS(SI)) & TO[0] then TRAP + // if (a > EXTS(SI)) & TO[1] then TRAP + // if (a = EXTS(SI)) & TO[2] then TRAP + // if (a u EXTS(SI)) & TO[4] then TRAP + return XeEmitTrap(e, f, i, + e.gpr_value(i.D.RA), + e.get_int64(XEEXTS16(i.D.DS)), + i.D.RT); +} + +XEEMITTER(tw, 0x7C000008, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // a <- EXTS((RA)[32:63]) + // b <- EXTS((RB)[32:63]) + // if (a < b) & TO[0] then TRAP + // if (a > b) & TO[1] then TRAP + // if (a = b) & TO[2] then TRAP + // if (a u b) & TO[4] then TRAP + return XeEmitTrap(e, f, i, + e.sign_extend(e.trunc_to_int(e.gpr_value(i.X.RA)), + jit_type_nint), + e.sign_extend(e.trunc_to_int(e.gpr_value(i.X.RB)), + jit_type_nint), + i.X.RT); +} + +XEEMITTER(twi, 0x0C000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // a <- EXTS((RA)[32:63]) + // if (a < EXTS(SI)) & TO[0] then TRAP + // if (a > EXTS(SI)) & TO[1] then TRAP + // if (a = EXTS(SI)) & TO[2] then TRAP + // if (a u EXTS(SI)) & TO[4] then TRAP + return XeEmitTrap(e, f, i, + e.sign_extend(e.trunc_to_int(e.gpr_value(i.D.RA)), + jit_type_nint), + e.get_int64(XEEXTS16(i.D.DS)), + i.D.RT); +} + + +// Processor control (A-26) + +XEEMITTER(mfcr, 0x7C000026, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(mfspr, 0x7C0002A6, XFX)(X64Emitter& e, jit_function_t f, InstrData& i) { + // n <- spr[5:9] || spr[0:4] + // if length(SPR(n)) = 64 then + // RT <- SPR(n) + // else + // RT <- i32.0 || SPR(n) + + const uint32_t n = ((i.XFX.spr & 0x1F) << 5) | ((i.XFX.spr >> 5) & 0x1F); + jit_value_t v = NULL; + switch (n) { + case 1: + // XER + v = e.xer_value(); + break; + case 8: + // LR + v = e.lr_value(); + break; + case 9: + // CTR + v = e.ctr_value(); + break; + default: + XEINSTRNOTIMPLEMENTED(); + return 1; + } + + e.update_gpr_value(i.XFX.RT, v); + + return 0; +} + +XEEMITTER(mftb, 0x7C0002E6, XFX)(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(mtcrf, 0x7C000120, XFX)(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(mtspr, 0x7C0003A6, XFX)(X64Emitter& e, jit_function_t f, InstrData& i) { + // n <- spr[5:9] || spr[0:4] + // if length(SPR(n)) = 64 then + // SPR(n) <- (RS) + // else + // SPR(n) <- (RS)[32:63] + + jit_value_t v = e.gpr_value(i.XFX.RT); + + const uint32_t n = ((i.XFX.spr & 0x1F) << 5) | ((i.XFX.spr >> 5) & 0x1F); + switch (n) { + case 1: + // XER + e.update_xer_value(v); + break; + case 8: + // LR + e.update_lr_value(v); + break; + case 9: + // CTR + e.update_ctr_value(v); + break; + default: + XEINSTRNOTIMPLEMENTED(); + return 1; + } + + return 0; +} + + +void X64RegisterEmitCategoryControl() { + XEREGISTERINSTR(bx, 0x48000000); + XEREGISTERINSTR(bcx, 0x40000000); + XEREGISTERINSTR(bcctrx, 0x4C000420); + XEREGISTERINSTR(bclrx, 0x4C000020); + XEREGISTERINSTR(crand, 0x4C000202); + XEREGISTERINSTR(crandc, 0x4C000102); + XEREGISTERINSTR(creqv, 0x4C000242); + XEREGISTERINSTR(crnand, 0x4C0001C2); + XEREGISTERINSTR(crnor, 0x4C000042); + XEREGISTERINSTR(cror, 0x4C000382); + XEREGISTERINSTR(crorc, 0x4C000342); + XEREGISTERINSTR(crxor, 0x4C000182); + XEREGISTERINSTR(mcrf, 0x4C000000); + XEREGISTERINSTR(sc, 0x44000002); + XEREGISTERINSTR(td, 0x7C000088); + XEREGISTERINSTR(tdi, 0x08000000); + XEREGISTERINSTR(tw, 0x7C000008); + XEREGISTERINSTR(twi, 0x0C000000); + XEREGISTERINSTR(mfcr, 0x7C000026); + XEREGISTERINSTR(mfspr, 0x7C0002A6); + XEREGISTERINSTR(mftb, 0x7C0002E6); + XEREGISTERINSTR(mtcrf, 0x7C000120); + XEREGISTERINSTR(mtspr, 0x7C0003A6); +} + + +} // namespace x64 +} // namespace cpu +} // namespace xe diff --git a/src/xenia/cpu/x64/x64_emit_fpu.cc b/src/xenia/cpu/x64/x64_emit_fpu.cc new file mode 100644 index 000000000..7aa945da3 --- /dev/null +++ b/src/xenia/cpu/x64/x64_emit_fpu.cc @@ -0,0 +1,295 @@ +/* + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2013 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include + + +using namespace xe::cpu; +using namespace xe::cpu::ppc; + + +namespace xe { +namespace cpu { +namespace x64 { + + +// Floating-point arithmetic (A-8) + +XEEMITTER(faddx, 0xFC00002A, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(faddsx, 0xEC00002A, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fdivx, 0xFC000024, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fdivsx, 0xEC000024, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fmulx, 0xFC000032, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fmulsx, 0xEC000032, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fresx, 0xEC000030, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(frsqrtex, 0xFC000034, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fsubx, 0xFC000028, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fsubsx, 0xEC000028, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fselx, 0xFC00002E, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fsqrtx, 0xFC00002C, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fsqrtsx, 0xEC00002C, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + + +// Floating-point multiply-add (A-9) + +XEEMITTER(fmaddx, 0xFC00003A, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fmaddsx, 0xEC00003A, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fmsubx, 0xFC000038, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fmsubsx, 0xEC000038, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fnmaddx, 0xFC00003E, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fnmaddsx, 0xEC00003E, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fnmsubx, 0xFC00003C, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fnmsubsx, 0xEC00003C, A )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + + +// Floating-point rounding and conversion (A-10) + +XEEMITTER(fcfidx, 0xFC00069C, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fctidx, 0xFC00065C, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fctidzx, 0xFC00065E, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fctiwx, 0xFC00001C, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fctiwzx, 0xFC00001E, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(frspx, 0xFC000018, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + + +// Floating-point compare (A-11) + +XEEMITTER(fcmpo, 0xFC000040, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fcmpu, 0xFC000000, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if (FRA) is a NaN or (FRB) is a NaN then + // c <- 0b0001 + // else if (FRA) < (FRB) then + // c <- 0b1000 + // else if (FRA) > (FRB) then + // c <- 0b0100 + // else { + // c <- 0b0010 + // } + // FPCC <- c + // CR[4*BF:4*BF+3] <- c + // if (FRA) is an SNaN or (FRB) is an SNaN then + // VXSNAN <- 1 + XEINSTRNOTIMPLEMENTED(); + return 1; +} + + +// Floating-point status and control register (A + +XEEMITTER(mcrfs, 0xFC000080, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(mffsx, 0xFC00048E, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(mtfsb0x, 0xFC00008C, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(mtfsb1x, 0xFC00004C, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(mtfsfx, 0xFC00058E, XFL)(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(mtfsfix, 0xFC00010C, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + + +// Floating-point move (A-21) + +XEEMITTER(fabsx, 0xFC000210, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fmrx, 0xFC000090, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fnabsx, 0xFC000110, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(fnegx, 0xFC000050, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + + +void X64RegisterEmitCategoryFPU() { + XEREGISTERINSTR(faddx, 0xFC00002A); + XEREGISTERINSTR(faddsx, 0xEC00002A); + XEREGISTERINSTR(fdivx, 0xFC000024); + XEREGISTERINSTR(fdivsx, 0xEC000024); + XEREGISTERINSTR(fmulx, 0xFC000032); + XEREGISTERINSTR(fmulsx, 0xEC000032); + XEREGISTERINSTR(fresx, 0xEC000030); + XEREGISTERINSTR(frsqrtex, 0xFC000034); + XEREGISTERINSTR(fsubx, 0xFC000028); + XEREGISTERINSTR(fsubsx, 0xEC000028); + XEREGISTERINSTR(fselx, 0xFC00002E); + XEREGISTERINSTR(fsqrtx, 0xFC00002C); + XEREGISTERINSTR(fsqrtsx, 0xEC00002C); + XEREGISTERINSTR(fmaddx, 0xFC00003A); + XEREGISTERINSTR(fmaddsx, 0xEC00003A); + XEREGISTERINSTR(fmsubx, 0xFC000038); + XEREGISTERINSTR(fmsubsx, 0xEC000038); + XEREGISTERINSTR(fnmaddx, 0xFC00003E); + XEREGISTERINSTR(fnmaddsx, 0xEC00003E); + XEREGISTERINSTR(fnmsubx, 0xFC00003C); + XEREGISTERINSTR(fnmsubsx, 0xEC00003C); + XEREGISTERINSTR(fcfidx, 0xFC00069C); + XEREGISTERINSTR(fctidx, 0xFC00065C); + XEREGISTERINSTR(fctidzx, 0xFC00065E); + XEREGISTERINSTR(fctiwx, 0xFC00001C); + XEREGISTERINSTR(fctiwzx, 0xFC00001E); + XEREGISTERINSTR(frspx, 0xFC000018); + XEREGISTERINSTR(fcmpo, 0xFC000040); + XEREGISTERINSTR(fcmpu, 0xFC000000); + XEREGISTERINSTR(mcrfs, 0xFC000080); + XEREGISTERINSTR(mffsx, 0xFC00048E); + XEREGISTERINSTR(mtfsb0x, 0xFC00008C); + XEREGISTERINSTR(mtfsb1x, 0xFC00004C); + XEREGISTERINSTR(mtfsfx, 0xFC00058E); + XEREGISTERINSTR(mtfsfix, 0xFC00010C); + XEREGISTERINSTR(fabsx, 0xFC000210); + XEREGISTERINSTR(fmrx, 0xFC000090); + XEREGISTERINSTR(fnabsx, 0xFC000110); + XEREGISTERINSTR(fnegx, 0xFC000050); +} + + +} // namespace x64 +} // namespace cpu +} // namespace xe diff --git a/src/xenia/cpu/x64/x64_emit_memory.cc b/src/xenia/cpu/x64/x64_emit_memory.cc new file mode 100644 index 000000000..0a6333a40 --- /dev/null +++ b/src/xenia/cpu/x64/x64_emit_memory.cc @@ -0,0 +1,1173 @@ +/* + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2013 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include + + +using namespace xe::cpu; +using namespace xe::cpu::ppc; + + +namespace xe { +namespace cpu { +namespace x64 { + + +// Integer load (A-13) + +XEEMITTER(lbz, 0x88000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + EXTS(D) + // RT <- i56.0 || MEM(EA, 1) + + jit_value_t ea = e.get_int64(XEEXTS16(i.D.DS)); + if (i.D.RA) { + ea = jit_insn_add(f, e.gpr_value(i.D.RA), ea); + } + jit_value_t v = e.ReadMemory(i.address, ea, 1, false); + e.update_gpr_value(i.D.RT, v); + + return 0; +} + +XEEMITTER(lbzu, 0x8C000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // EA <- (RA) + EXTS(D) + // RT <- i56.0 || MEM(EA, 1) + // RA <- EA + + jit_value_t ea = jit_insn_add(f, e.gpr_value(i.D.RA), + e.get_int64(XEEXTS16(i.D.DS))); + jit_value_t v = e.ReadMemory(i.address, ea, 1, false); + e.update_gpr_value(i.D.RT, v); + e.update_gpr_value(i.D.RA, ea); + + return 0; +} + +XEEMITTER(lbzux, 0x7C0000EE, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // EA <- (RA) + (RB) + // RT <- i56.0 || MEM(EA, 1) + // RA <- EA + + jit_value_t ea = jit_insn_add(f, e.gpr_value(i.X.RA), e.gpr_value(i.X.RB)); + jit_value_t v = e.ReadMemory(i.address, ea, 1, false); + e.update_gpr_value(i.X.RT, v); + e.update_gpr_value(i.X.RA, ea); + + return 0; +} + +XEEMITTER(lbzx, 0x7C0000AE, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + (RB) + // RT <- i56.0 || MEM(EA, 1) + + jit_value_t ea = e.gpr_value(i.X.RB); + if (i.X.RA) { + ea = jit_insn_add(f, e.gpr_value(i.X.RA), ea); + } + jit_value_t v = e.ReadMemory(i.address, ea, 1, false); + e.update_gpr_value(i.X.RT, v); + + return 0; +} + +XEEMITTER(ld, 0xE8000000, DS )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + EXTS(DS || 0b00) + // RT <- MEM(EA, 8) + + jit_value_t ea = e.get_int64(XEEXTS16(i.DS.DS << 2)); + if (i.DS.RA) { + ea = jit_insn_add(f, e.gpr_value(i.DS.RA), ea); + } + jit_value_t v = e.ReadMemory(i.address, ea, 8, false); + e.update_gpr_value(i.DS.RT, v); + + return 0; +} + +XEEMITTER(ldu, 0xE8000001, DS )(X64Emitter& e, jit_function_t f, InstrData& i) { + // EA <- (RA) + EXTS(DS || 0b00) + // RT <- MEM(EA, 8) + // RA <- EA + + jit_value_t ea = jit_insn_add(f, e.gpr_value(i.DS.RA), + e.get_int64(XEEXTS16(i.DS.DS << 2))); + jit_value_t v = e.ReadMemory(i.address, ea, 8, false); + e.update_gpr_value(i.DS.RT, v); + e.update_gpr_value(i.DS.RA, ea); + + return 0; +} + +XEEMITTER(ldux, 0x7C00006A, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(ldx, 0x7C00002A, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(lha, 0xA8000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + EXTS(D) + // RT <- EXTS(MEM(EA, 2)) + + jit_value_t ea = e.get_int64(XEEXTS16(i.D.DS)); + if (i.D.RA) { + ea = jit_insn_add(f, e.gpr_value(i.D.RA), ea); + } + jit_value_t v = e.sign_extend(e.ReadMemory(i.address, ea, 2, false), + jit_type_nuint); + e.update_gpr_value(i.D.RT, v); + + return 0; +} + +XEEMITTER(lhau, 0xAC000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(lhaux, 0x7C0002EE, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(lhax, 0x7C0002AE, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + (RB) + // RT <- EXTS(MEM(EA, 2)) + + jit_value_t ea = e.gpr_value(i.X.RB); + if (i.X.RA) { + ea = jit_insn_add(f, e.gpr_value(i.X.RA), ea); + } + jit_value_t v = e.sign_extend(e.ReadMemory(i.address, ea, 2, false), + jit_type_nuint); + e.update_gpr_value(i.X.RT, v); + + return 0; +} + +XEEMITTER(lhz, 0xA0000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + EXTS(D) + // RT <- i48.0 || MEM(EA, 2) + + jit_value_t ea = e.get_int64(XEEXTS16(i.D.DS)); + if (i.D.RA) { + ea = jit_insn_add(f, e.gpr_value(i.D.RA), ea); + } + jit_value_t v = e.ReadMemory(i.address, ea, 2, false); + e.update_gpr_value(i.D.RT, v); + + return 0; +} + +XEEMITTER(lhzu, 0xA4000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // EA <- (RA) + EXTS(D) + // RT <- i48.0 || MEM(EA, 2) + // RA <- EA + + jit_value_t ea = jit_insn_add(f, e.gpr_value(i.D.RA), + e.get_int64(XEEXTS16(i.D.DS))); + jit_value_t v = e.ReadMemory(i.address, ea, 2, false); + e.update_gpr_value(i.D.RT, v); + e.update_gpr_value(i.D.RA, ea); + + return 0; +} + +XEEMITTER(lhzux, 0x7C00026E, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // EA <- (RA) + (RB) + // RT <- i48.0 || MEM(EA, 2) + // RA <- EA + + jit_value_t ea = jit_insn_add(f, e.gpr_value(i.X.RA), e.gpr_value(i.X.RB)); + jit_value_t v = e.ReadMemory(i.address, ea, 2, false); + e.update_gpr_value(i.X.RT, v); + e.update_gpr_value(i.X.RA, ea); + + return 0; +} + +XEEMITTER(lhzx, 0x7C00022E, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + (RB) + // RT <- i48.0 || MEM(EA, 2) + + jit_value_t ea = e.gpr_value(i.X.RB); + if (i.X.RA) { + ea = jit_insn_add(f, e.gpr_value(i.X.RA), ea); + } + jit_value_t v = e.ReadMemory(i.address, ea, 2, false); + e.update_gpr_value(i.X.RT, v); + + return 0; +} + +XEEMITTER(lwa, 0xE8000002, DS )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + EXTS(D || 00) + // RT <- EXTS(MEM(EA, 4)) + + jit_value_t ea = e.get_int64(XEEXTS16(i.DS.DS << 2)); + if (i.DS.RA) { + ea = jit_insn_add(f, e.gpr_value(i.DS.RA), ea); + } + jit_value_t v = e.sign_extend(e.ReadMemory(i.address, ea, 4, false), + jit_type_nuint); + e.update_gpr_value(i.DS.RT, v); + + return 0; +} + +XEEMITTER(lwaux, 0x7C0002EA, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // EA <- (RA) + (RB) + // RT <- EXTS(MEM(EA, 4)) + // RA <- EA + + jit_value_t ea = jit_insn_add(f, e.gpr_value(i.X.RA), e.gpr_value(i.X.RB)); + jit_value_t v = e.sign_extend(e.ReadMemory(i.address, ea, 4, false), + jit_type_nuint); + e.update_gpr_value(i.X.RT, v); + e.update_gpr_value(i.X.RA, ea); + + return 0; +} + +XEEMITTER(lwax, 0x7C0002AA, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + (RB) + // RT <- EXTS(MEM(EA, 4)) + + jit_value_t ea = e.gpr_value(i.X.RB); + if (i.X.RA) { + ea = jit_insn_add(f, e.gpr_value(i.X.RA), ea); + } + jit_value_t v = e.sign_extend(e.ReadMemory(i.address, ea, 4, false), + jit_type_nuint); + e.update_gpr_value(i.X.RT, v); + + return 0; +} + +XEEMITTER(lwz, 0x80000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + EXTS(D) + // RT <- i32.0 || MEM(EA, 4) + + jit_value_t ea = e.get_int64(XEEXTS16(i.D.DS)); + if (i.D.RA) { + ea = jit_insn_add(f, e.gpr_value(i.D.RA), ea); + } + jit_value_t v = e.ReadMemory(i.address, ea, 4, false); + e.update_gpr_value(i.D.RT, v); + + return 0; +} + +XEEMITTER(lwzu, 0x84000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // EA <- (RA) + EXTS(D) + // RT <- i32.0 || MEM(EA, 4) + // RA <- EA + + jit_value_t ea = jit_insn_add(f, e.gpr_value(i.D.RA), + e.get_int64(XEEXTS16(i.D.DS))); + jit_value_t v = e.ReadMemory(i.address, ea, 4, false); + e.update_gpr_value(i.D.RT, v); + e.update_gpr_value(i.D.RA, ea); + + return 0; +} + +XEEMITTER(lwzux, 0x7C00006E, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // EA <- (RA) + (RB) + // RT <- i32.0 || MEM(EA, 4) + // RA <- EA + + jit_value_t ea = jit_insn_add(f, e.gpr_value(i.X.RA), e.gpr_value(i.X.RB)); + jit_value_t v = e.ReadMemory(i.address, ea, 4, false); + e.update_gpr_value(i.X.RT, v); + e.update_gpr_value(i.X.RA, ea); + + return 0; +} + +XEEMITTER(lwzx, 0x7C00002E, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + (RB) + // RT <- i32.0 || MEM(EA, 4) + + jit_value_t ea = e.gpr_value(i.X.RB); + if (i.X.RA) { + ea = jit_insn_add(f, e.gpr_value(i.X.RA), ea); + } + jit_value_t v = e.ReadMemory(i.address, ea, 4, false); + e.update_gpr_value(i.X.RT, v); + + return 0; +} + + +// Integer store (A-14) + +XEEMITTER(stb, 0x98000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + EXTS(D) + // MEM(EA, 1) <- (RS)[56:63] + + jit_value_t ea = e.get_int64(XEEXTS16(i.D.DS)); + if (i.D.RA) { + ea = jit_insn_add(f, e.gpr_value(i.D.RA), ea); + } + jit_value_t v = e.gpr_value(i.D.RT); + e.WriteMemory(i.address, ea, 1, v); + + return 0; +} + +XEEMITTER(stbu, 0x9C000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // EA <- (RA) + EXTS(D) + // MEM(EA, 1) <- (RS)[56:63] + // RA <- EA + + jit_value_t ea = jit_insn_add(f, e.gpr_value(i.D.RA), + e.get_int64(XEEXTS16(i.D.DS))); + jit_value_t v = e.gpr_value(i.D.RT); + e.WriteMemory(i.address, ea, 1, v); + e.update_gpr_value(i.D.RA, ea); + + return 0; +} + +XEEMITTER(stbux, 0x7C0001EE, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // EA <- (RA) + (RB) + // MEM(EA, 1) <- (RS)[56:63] + // RA <- EA + + jit_value_t ea = jit_insn_add(f, e.gpr_value(i.X.RA), e.gpr_value(i.X.RB)); + jit_value_t v = e.gpr_value(i.X.RT); + e.WriteMemory(i.address, ea, 1, v); + e.update_gpr_value(i.X.RA, ea); + + return 0; +} + +XEEMITTER(stbx, 0x7C0001AE, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + (RB) + // MEM(EA, 1) <- (RS)[56:63] + + jit_value_t ea = e.gpr_value(i.X.RB); + if (i.D.RA) { + ea = jit_insn_add(f, e.gpr_value(i.X.RA), ea); + } + jit_value_t v = e.gpr_value(i.X.RT); + e.WriteMemory(i.address, ea, 1, v); + + return 0; +} + +XEEMITTER(std, 0xF8000000, DS )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + EXTS(DS || 0b00) + // MEM(EA, 8) <- (RS) + + jit_value_t ea = e.get_int64(XEEXTS16(i.DS.DS << 2)); + if (i.DS.RA) { + ea = jit_insn_add(f, e.gpr_value(i.DS.RA), ea); + } + jit_value_t v = e.gpr_value(i.DS.RT); + e.WriteMemory(i.address, ea, 8, v); + + return 0; +} + +XEEMITTER(stdu, 0xF8000001, DS )(X64Emitter& e, jit_function_t f, InstrData& i) { + // EA <- (RA) + EXTS(DS || 0b00) + // MEM(EA, 8) <- (RS) + // RA <- EA + + jit_value_t ea = jit_insn_add(f, e.gpr_value(i.DS.RA), + e.get_int64(XEEXTS16(i.DS.DS << 2))); + jit_value_t v = e.gpr_value(i.DS.RT); + e.WriteMemory(i.address, ea, 8, v); + e.update_gpr_value(i.DS.RA, ea); + + return 0; +} + +XEEMITTER(stdux, 0x7C00016A, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + (RB) + // MEM(EA, 8) <- (RS) + + jit_value_t ea = e.gpr_value(i.X.RB); + if (i.X.RA) { + ea = jit_insn_add(f, e.gpr_value(i.X.RA), ea); + } + jit_value_t v = e.gpr_value(i.X.RT); + e.WriteMemory(i.address, ea, 8, v); + + return 0; +} + +XEEMITTER(stdx, 0x7C00012A, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // EA <- (RA) + (RB) + // MEM(EA, 8) <- (RS) + // RA <- EA + + jit_value_t ea = jit_insn_add(f, e.gpr_value(i.X.RA), e.gpr_value(i.X.RB)); + jit_value_t v = e.gpr_value(i.X.RT); + e.WriteMemory(i.address, ea, 8, v); + e.update_gpr_value(i.X.RA, ea); + + return 0; +} + +XEEMITTER(sth, 0xB0000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + EXTS(D) + // MEM(EA, 2) <- (RS)[48:63] + + jit_value_t ea = e.get_int64(XEEXTS16(i.D.DS)); + if (i.D.RA) { + ea = jit_insn_add(f, e.gpr_value(i.D.RA), ea); + } + jit_value_t v = e.gpr_value(i.D.RT); + e.WriteMemory(i.address, ea, 2, v); + + return 0; +} + +XEEMITTER(sthu, 0xB4000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // EA <- (RA) + EXTS(D) + // MEM(EA, 2) <- (RS)[48:63] + // RA <- EA + + jit_value_t ea = jit_insn_add(f, e.gpr_value(i.D.RA), + e.get_int64(XEEXTS16(i.D.DS))); + jit_value_t v = e.gpr_value(i.D.RT); + e.WriteMemory(i.address, ea, 2, v); + e.update_gpr_value(i.D.RA, ea); + + return 0; +} + +XEEMITTER(sthux, 0x7C00036E, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // EA <- (RA) + (RB) + // MEM(EA, 2) <- (RS)[48:63] + // RA <- EA + + jit_value_t ea = jit_insn_add(f, e.gpr_value(i.X.RA), e.gpr_value(i.X.RB)); + jit_value_t v = e.gpr_value(i.X.RT); + e.WriteMemory(i.address, ea, 2, v); + e.update_gpr_value(i.X.RA, ea); + + return 0; +} + +XEEMITTER(sthx, 0x7C00032E, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + (RB) + // MEM(EA, 2) <- (RS)[48:63] + + jit_value_t ea = e.gpr_value(i.X.RB); + if (i.D.RA) { + ea = jit_insn_add(f, e.gpr_value(i.X.RA), ea); + } + jit_value_t v = e.gpr_value(i.X.RT); + e.WriteMemory(i.address, ea, 2, v); + + return 0; +} + +XEEMITTER(stw, 0x90000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + EXTS(D) + // MEM(EA, 4) <- (RS)[32:63] + + jit_value_t ea = e.get_int64(XEEXTS16(i.D.DS)); + if (i.D.RA) { + ea = jit_insn_add(f, e.gpr_value(i.D.RA), ea); + } + jit_value_t v = e.gpr_value(i.D.RT); + e.WriteMemory(i.address, ea, 4, v); + + return 0; +} + +XEEMITTER(stwu, 0x94000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + // EA <- (RA) + EXTS(D) + // MEM(EA, 4) <- (RS)[32:63] + // RA <- EA + + jit_value_t ea = jit_insn_add(f, e.gpr_value(i.D.RA), + e.get_int64(XEEXTS16(i.D.DS))); + jit_value_t v = e.gpr_value(i.D.RT); + e.WriteMemory(i.address, ea, 4, v); + e.update_gpr_value(i.D.RA, ea); + + return 0; +} + +XEEMITTER(stwux, 0x7C00016E, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // EA <- (RA) + (RB) + // MEM(EA, 4) <- (RS)[32:63] + // RA <- EA + + jit_value_t ea = jit_insn_add(f, e.gpr_value(i.X.RA), e.gpr_value(i.X.RB)); + jit_value_t v = e.gpr_value(i.X.RT); + e.WriteMemory(i.address, ea, 4, v); + e.update_gpr_value(i.X.RA, ea); + + return 0; +} + +XEEMITTER(stwx, 0x7C00012E, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // if RA = 0 then + // b <- 0 + // else + // b <- (RA) + // EA <- b + (RB) + // MEM(EA, 4) <- (RS)[32:63] + + jit_value_t ea = e.gpr_value(i.X.RB); + if (i.X.RA) { + ea = jit_insn_add(f, e.gpr_value(i.X.RA), ea); + } + jit_value_t v = e.gpr_value(i.X.RT); + e.WriteMemory(i.address, ea, 4, v); + + return 0; +} + + +// Integer load and store with byte reverse (A-1 + +XEEMITTER(lhbrx, 0x7C00062C, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(lwbrx, 0x7C00042C, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(ldbrx, 0x7C000428, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(sthbrx, 0x7C00072C, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(stwbrx, 0x7C00052C, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(stdbrx, 0x7C000528, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + + +// Integer load and store multiple (A-16) + +XEEMITTER(lmw, 0xB8000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(stmw, 0xBC000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + + +// Integer load and store string (A-17) + +XEEMITTER(lswi, 0x7C0004AA, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(lswx, 0x7C00042A, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(stswi, 0x7C0005AA, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(stswx, 0x7C00052A, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + + +// Memory synchronization (A-18) + +XEEMITTER(eieio, 0x7C0006AC, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(isync, 0x4C00012C, XL )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(ldarx, 0x7C0000A8, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +// XEEMITTER(lwarx, 0x7C000028, X )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // if RA = 0 then +// // b <- 0 +// // else +// // b <- (RA) +// // EA <- b + (RB) +// // RESERVE <- 1 +// // RESERVE_LENGTH <- 4 +// // RESERVE_ADDR <- real_addr(EA) +// // RT <- i32.0 || MEM(EA, 4) + +// // TODO(benvanik): make this right + +// jit_value_t ea = e.gpr_value(i.X.RB); +// if (i.X.RA) { +// ea = jit_insn_add(f, e.gpr_value(i.X.RA), ea); +// } +// jit_value_t v = e.ReadMemory(i.address, ea, 4, /* acquire */ true); +// e.update_gpr_value(i.X.RT, v); + +// return 0; +// } + +// XEEMITTER(stdcx, 0x7C0001AD, X )(X64Emitter& e, jit_function_t f, InstrData& i) { +// XEINSTRNOTIMPLEMENTED(); +// return 1; +// } + +// XEEMITTER(stwcx, 0x7C00012D, X )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // if RA = 0 then +// // b <- 0 +// // else +// // b <- (RA) +// // EA <- b + (RB) +// // RESERVE stuff... +// // MEM(EA, 4) <- (RS)[32:63] +// // n <- 1 if store performed +// // CR0[LT GT EQ SO] = 0b00 || n || XER[SO] + +// // TODO(benvanik): make this right + +// jit_value_t ea = e.get_int64(XEEXTS16(i.D.DS)); +// if (i.D.RA) { +// ea = jit_insn_add(f, e.gpr_value(i.D.RA), ea); +// } +// jit_value_t v = e.gpr_value(i.D.RT); +// e.WriteMemory(i.address, ea, 4, v, /* release */ true); + +// // We always succeed. +// e.update_cr_value(0, e.get_int64(1 << 2)); + +// return 0; +// } + +XEEMITTER(sync, 0x7C0004AC, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + + +// // Floating-point load (A-19) + +// XEEMITTER(lfd, 0xC8000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // if RA = 0 then +// // b <- 0 +// // else +// // b <- (RA) +// // EA <- b + EXTS(D) +// // FRT <- MEM(EA, 8) + +// jit_value_t ea = e.get_int64(XEEXTS16(i.D.DS)); +// if (i.D.RA) { +// ea = jit_insn_add(f, e.gpr_value(i.D.RA), ea); +// } +// jit_value_t v = e.ReadMemory(i.address, ea, 8, false); +// v = b.CreateBitCast(v, jit_type_float64); +// e.update_fpr_value(i.D.RT, v); + +// return 0; +// } + +// XEEMITTER(lfdu, 0xCC000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // EA <- (RA) + EXTS(D) +// // FRT <- MEM(EA, 8) +// // RA <- EA + +// jit_value_t ea = jit_insn_add(f, e.gpr_value(i.D.RA), e.get_int64(XEEXTS16(i.D.DS))); +// jit_value_t v = e.ReadMemory(i.address, ea, 8, false); +// v = b.CreateBitCast(v, jit_type_float64); +// e.update_fpr_value(i.D.RT, v); +// e.update_gpr_value(i.D.RA, ea); + +// return 0; +// } + +// XEEMITTER(lfdux, 0x7C0004EE, X )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // EA <- (RA) + (RB) +// // FRT <- MEM(EA, 8) +// // RA <- EA + +// jit_value_t ea = jit_insn_add(f, e.gpr_value(i.X.RA), e.gpr_value(i.X.RB)); +// jit_value_t v = e.ReadMemory(i.address, ea, 8, false); +// v = b.CreateBitCast(v, jit_type_float64); +// e.update_fpr_value(i.X.RT, v); +// e.update_gpr_value(i.X.RA, ea); + +// return 0; +// } + +// XEEMITTER(lfdx, 0x7C0004AE, X )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // if RA = 0 then +// // b <- 0 +// // else +// // b <- (RA) +// // EA <- b + (RB) +// // FRT <- MEM(EA, 8) + +// jit_value_t ea = e.gpr_value(i.X.RB); +// if (i.X.RA) { +// ea = jit_insn_add(f, e.gpr_value(i.X.RA), ea); +// } +// jit_value_t v = e.ReadMemory(i.address, ea, 8, false); +// v = b.CreateBitCast(v, jit_type_float64); +// e.update_fpr_value(i.X.RT, v); + +// return 0; +// } + +// XEEMITTER(lfs, 0xC0000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // if RA = 0 then +// // b <- 0 +// // else +// // b <- (RA) +// // EA <- b + EXTS(D) +// // FRT <- DOUBLE(MEM(EA, 4)) + +// jit_value_t ea = e.get_int64(XEEXTS16(i.D.DS)); +// if (i.D.RA) { +// ea = jit_insn_add(f, e.gpr_value(i.D.RA), ea); +// } +// jit_value_t v = e.ReadMemory(i.address, ea, 4, false); +// v = b.CreateFPExt(b.CreateBitCast(v, b.getFloatTy()), jit_type_float64); +// e.update_fpr_value(i.D.RT, v); + +// return 0; +// } + +// XEEMITTER(lfsu, 0xC4000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // EA <- (RA) + EXTS(D) +// // FRT <- DOUBLE(MEM(EA, 4)) +// // RA <- EA + +// jit_value_t ea = jit_insn_add(f, e.gpr_value(i.D.RA), e.get_int64(XEEXTS16(i.D.DS))); +// jit_value_t v = e.ReadMemory(i.address, ea, 4, false); +// v = b.CreateFPExt(b.CreateBitCast(v, b.getFloatTy()), jit_type_float64); +// e.update_fpr_value(i.D.RT, v); +// e.update_gpr_value(i.D.RA, ea); + +// return 0; +// } + +// XEEMITTER(lfsux, 0x7C00046E, X )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // EA <- (RA) + (RB) +// // FRT <- DOUBLE(MEM(EA, 4)) +// // RA <- EA + +// jit_value_t ea = jit_insn_add(f, e.gpr_value(i.X.RA), e.gpr_value(i.X.RB)); +// jit_value_t v = e.ReadMemory(i.address, ea, 4, false); +// v = b.CreateFPExt(b.CreateBitCast(v, b.getFloatTy()), jit_type_float64); +// e.update_fpr_value(i.X.RT, v); +// e.update_gpr_value(i.X.RA, ea); + +// return 0; +// } + +// XEEMITTER(lfsx, 0x7C00042E, X )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // if RA = 0 then +// // b <- 0 +// // else +// // b <- (RA) +// // EA <- b + (RB) +// // FRT <- DOUBLE(MEM(EA, 4)) + +// jit_value_t ea = e.gpr_value(i.X.RB); +// if (i.X.RA) { +// ea = jit_insn_add(f, e.gpr_value(i.X.RA), ea); +// } +// jit_value_t v = e.ReadMemory(i.address, ea, 4, false); +// v = b.CreateFPExt(b.CreateBitCast(v, b.getFloatTy()), jit_type_float64); +// e.update_fpr_value(i.X.RT, v); + +// return 0; +// } + + +// // Floating-point store (A-20) + +// XEEMITTER(stfd, 0xD8000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // if RA = 0 then +// // b <- 0 +// // else +// // b <- (RA) +// // EA <- b + EXTS(D) +// // MEM(EA, 8) <- (FRS) + +// jit_value_t ea = e.get_int64(XEEXTS16(i.D.DS)); +// if (i.D.RA) { +// ea = jit_insn_add(f, e.gpr_value(i.D.RA), ea); +// } +// jit_value_t v = e.fpr_value(i.D.RT); +// v = b.CreateBitCast(v, jit_type_nint); +// e.WriteMemory(i.address, ea, 8, v); + +// return 0; +// } + +// XEEMITTER(stfdu, 0xDC000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // EA <- (RA) + EXTS(D) +// // MEM(EA, 8) <- (FRS) +// // RA <- EA + +// jit_value_t ea = jit_insn_add(f, e.gpr_value(i.D.RA), +// e.get_int64(XEEXTS16(i.D.DS))); +// jit_value_t v = e.fpr_value(i.D.RT); +// v = b.CreateBitCast(v, jit_type_nint); +// e.WriteMemory(i.address, ea, 8, v); +// e.update_gpr_value(i.D.RA, ea); + +// return 0; +// } + +// XEEMITTER(stfdux, 0x7C0005EE, X )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // EA <- (RA) + (RB) +// // MEM(EA, 8) <- (FRS) +// // RA <- EA + +// jit_value_t ea = jit_insn_add(f, e.gpr_value(i.X.RA), e.gpr_value(i.X.RB)); +// jit_value_t v = e.fpr_value(i.X.RT); +// v = b.CreateBitCast(v, jit_type_nint); +// e.WriteMemory(i.address, ea, 8, v); +// e.update_gpr_value(i.X.RA, ea); + +// return 0; +// } + +// XEEMITTER(stfdx, 0x7C0005AE, X )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // if RA = 0 then +// // b <- 0 +// // else +// // b <- (RA) +// // EA <- b + (RB) +// // MEM(EA, 8) <- (FRS) + +// jit_value_t ea = e.gpr_value(i.X.RB); +// if (i.X.RA) { +// ea = jit_insn_add(f, e.gpr_value(i.X.RA), ea); +// } +// jit_value_t v = e.fpr_value(i.X.RT); +// v = b.CreateBitCast(v, jit_type_nint); +// e.WriteMemory(i.address, ea, 8, v); + +// return 0; +// } + +// XEEMITTER(stfiwx, 0x7C0007AE, X )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // if RA = 0 then +// // b <- 0 +// // else +// // b <- (RA) +// // EA <- b + (RB) +// // MEM(EA, 4) <- (FRS)[32:63] + +// jit_value_t ea = e.gpr_value(i.X.RB); +// if (i.X.RA) { +// ea = jit_insn_add(f, e.gpr_value(i.D.RA), ea); +// } +// jit_value_t v = e.fpr_value(i.X.RT); +// v = b.CreateBitCast(v, jit_type_nint); +// e.WriteMemory(i.address, ea, 4, v); + +// return 0; +// } + +// XEEMITTER(stfs, 0xD0000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // if RA = 0 then +// // b <- 0 +// // else +// // b <- (RA) +// // EA <- b + EXTS(D) +// // MEM(EA, 4) <- SINGLE(FRS) + +// jit_value_t ea = e.get_int64(XEEXTS16(i.D.DS)); +// if (i.D.RA) { +// ea = jit_insn_add(f, e.gpr_value(i.D.RA), ea); +// } +// jit_value_t v = e.fpr_value(i.D.RT); +// v = b.CreateBitCast(b.CreateFPTrunc(v, b.getFloatTy()), b.getInt32Ty()); +// e.WriteMemory(i.address, ea, 4, v); + +// return 0; +// } + +// XEEMITTER(stfsu, 0xD4000000, D )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // EA <- (RA) + EXTS(D) +// // MEM(EA, 4) <- SINGLE(FRS) +// // RA <- EA + +// jit_value_t ea = jit_insn_add(f, e.gpr_value(i.D.RA), +// e.get_int64(XEEXTS16(i.D.DS))); +// jit_value_t v = e.fpr_value(i.D.RT); +// v = b.CreateBitCast(b.CreateFPTrunc(v, b.getFloatTy()), b.getInt32Ty()); +// e.WriteMemory(i.address, ea, 4, v); +// e.update_gpr_value(i.D.RA, ea); + +// return 0; +// } + +// XEEMITTER(stfsux, 0x7C00056E, X )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // EA <- (RA) + (RB) +// // MEM(EA, 4) <- SINGLE(FRS) +// // RA <- EA + +// jit_value_t ea = jit_insn_add(f, e.gpr_value(i.X.RA), e.gpr_value(i.X.RB)); +// jit_value_t v = e.fpr_value(i.X.RT); +// v = b.CreateBitCast(b.CreateFPTrunc(v, b.getFloatTy()), b.getInt32Ty()); +// e.WriteMemory(i.address, ea, 4, v); +// e.update_gpr_value(i.X.RA, ea); + +// return 0; +// } + +// XEEMITTER(stfsx, 0x7C00052E, X )(X64Emitter& e, jit_function_t f, InstrData& i) { +// // if RA = 0 then +// // b <- 0 +// // else +// // b <- (RA) +// // EA <- b + (RB) +// // MEM(EA, 4) <- SINGLE(FRS) + +// jit_value_t ea = e.gpr_value(i.X.RB); +// if (i.X.RA) { +// ea = jit_insn_add(f, e.gpr_value(i.X.RA), ea); +// } +// jit_value_t v = e.fpr_value(i.X.RT); +// v = b.CreateBitCast(b.CreateFPTrunc(v, b.getFloatTy()), b.getInt32Ty()); +// e.WriteMemory(i.address, ea, 4, v); + +// return 0; +// } + + +// Cache management (A-27) + +XEEMITTER(dcbf, 0x7C0000AC, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(dcbst, 0x7C00006C, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(dcbt, 0x7C00022C, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // No-op for now. + // TODO(benvanik): use @llvm.prefetch + return 0; +} + +XEEMITTER(dcbtst, 0x7C0001EC, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // No-op for now. + // TODO(benvanik): use @llvm.prefetch + return 0; +} + +XEEMITTER(dcbz, 0x7C0007EC, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + // or dcbz128 0x7C2007EC + XEINSTRNOTIMPLEMENTED(); + return 1; +} + +XEEMITTER(icbi, 0x7C0007AC, X )(X64Emitter& e, jit_function_t f, InstrData& i) { + XEINSTRNOTIMPLEMENTED(); + return 1; +} + + +void X64RegisterEmitCategoryMemory() { + XEREGISTERINSTR(lbz, 0x88000000); + XEREGISTERINSTR(lbzu, 0x8C000000); + XEREGISTERINSTR(lbzux, 0x7C0000EE); + XEREGISTERINSTR(lbzx, 0x7C0000AE); + XEREGISTERINSTR(ld, 0xE8000000); + XEREGISTERINSTR(ldu, 0xE8000001); + XEREGISTERINSTR(ldux, 0x7C00006A); + XEREGISTERINSTR(ldx, 0x7C00002A); + XEREGISTERINSTR(lha, 0xA8000000); + XEREGISTERINSTR(lhau, 0xAC000000); + XEREGISTERINSTR(lhaux, 0x7C0002EE); + XEREGISTERINSTR(lhax, 0x7C0002AE); + XEREGISTERINSTR(lhz, 0xA0000000); + XEREGISTERINSTR(lhzu, 0xA4000000); + XEREGISTERINSTR(lhzux, 0x7C00026E); + XEREGISTERINSTR(lhzx, 0x7C00022E); + XEREGISTERINSTR(lwa, 0xE8000002); + XEREGISTERINSTR(lwaux, 0x7C0002EA); + XEREGISTERINSTR(lwax, 0x7C0002AA); + XEREGISTERINSTR(lwz, 0x80000000); + XEREGISTERINSTR(lwzu, 0x84000000); + XEREGISTERINSTR(lwzux, 0x7C00006E); + XEREGISTERINSTR(lwzx, 0x7C00002E); + XEREGISTERINSTR(stb, 0x98000000); + XEREGISTERINSTR(stbu, 0x9C000000); + XEREGISTERINSTR(stbux, 0x7C0001EE); + XEREGISTERINSTR(stbx, 0x7C0001AE); + XEREGISTERINSTR(std, 0xF8000000); + XEREGISTERINSTR(stdu, 0xF8000001); + XEREGISTERINSTR(stdux, 0x7C00016A); + XEREGISTERINSTR(stdx, 0x7C00012A); + XEREGISTERINSTR(sth, 0xB0000000); + XEREGISTERINSTR(sthu, 0xB4000000); + XEREGISTERINSTR(sthux, 0x7C00036E); + XEREGISTERINSTR(sthx, 0x7C00032E); + XEREGISTERINSTR(stw, 0x90000000); + XEREGISTERINSTR(stwu, 0x94000000); + XEREGISTERINSTR(stwux, 0x7C00016E); + XEREGISTERINSTR(stwx, 0x7C00012E); + XEREGISTERINSTR(lhbrx, 0x7C00062C); + XEREGISTERINSTR(lwbrx, 0x7C00042C); + XEREGISTERINSTR(ldbrx, 0x7C000428); + XEREGISTERINSTR(sthbrx, 0x7C00072C); + XEREGISTERINSTR(stwbrx, 0x7C00052C); + XEREGISTERINSTR(stdbrx, 0x7C000528); + XEREGISTERINSTR(lmw, 0xB8000000); + XEREGISTERINSTR(stmw, 0xBC000000); + XEREGISTERINSTR(lswi, 0x7C0004AA); + XEREGISTERINSTR(lswx, 0x7C00042A); + XEREGISTERINSTR(stswi, 0x7C0005AA); + XEREGISTERINSTR(stswx, 0x7C00052A); + XEREGISTERINSTR(eieio, 0x7C0006AC); + XEREGISTERINSTR(isync, 0x4C00012C); + XEREGISTERINSTR(ldarx, 0x7C0000A8); + // XEREGISTERINSTR(lwarx, 0x7C000028); + // XEREGISTERINSTR(stdcx, 0x7C0001AD); + // XEREGISTERINSTR(stwcx, 0x7C00012D); + XEREGISTERINSTR(sync, 0x7C0004AC); + // XEREGISTERINSTR(lfd, 0xC8000000); + // XEREGISTERINSTR(lfdu, 0xCC000000); + // XEREGISTERINSTR(lfdux, 0x7C0004EE); + // XEREGISTERINSTR(lfdx, 0x7C0004AE); + // XEREGISTERINSTR(lfs, 0xC0000000); + // XEREGISTERINSTR(lfsu, 0xC4000000); + // XEREGISTERINSTR(lfsux, 0x7C00046E); + // XEREGISTERINSTR(lfsx, 0x7C00042E); + // XEREGISTERINSTR(stfd, 0xD8000000); + // XEREGISTERINSTR(stfdu, 0xDC000000); + // XEREGISTERINSTR(stfdux, 0x7C0005EE); + // XEREGISTERINSTR(stfdx, 0x7C0005AE); + // XEREGISTERINSTR(stfiwx, 0x7C0007AE); + // XEREGISTERINSTR(stfs, 0xD0000000); + // XEREGISTERINSTR(stfsu, 0xD4000000); + // XEREGISTERINSTR(stfsux, 0x7C00056E); + // XEREGISTERINSTR(stfsx, 0x7C00052E); + XEREGISTERINSTR(dcbf, 0x7C0000AC); + XEREGISTERINSTR(dcbst, 0x7C00006C); + XEREGISTERINSTR(dcbt, 0x7C00022C); + XEREGISTERINSTR(dcbtst, 0x7C0001EC); + XEREGISTERINSTR(dcbz, 0x7C0007EC); + XEREGISTERINSTR(icbi, 0x7C0007AC); +} + + +} // namespace x64 +} // namespace cpu +} // namespace xe diff --git a/src/xenia/cpu/x64/x64_emitter.cc b/src/xenia/cpu/x64/x64_emitter.cc new file mode 100644 index 000000000..a3814af10 --- /dev/null +++ b/src/xenia/cpu/x64/x64_emitter.cc @@ -0,0 +1,1408 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2013 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include +#include + +#include + + +using namespace xe::cpu::ppc; +using namespace xe::cpu::sdb; +using namespace xe::cpu::x64; + + +DEFINE_bool(memory_address_verification, false, + "Whether to add additional checks to generated memory load/stores."); +DEFINE_bool(log_codegen, false, + "Log codegen to stdout."); + + +/** + * This generates function code. + * One context is created and shared for each function to generate. + * Each basic block in the function is created and stashed in one pass, then + * filled in the next. + * + * This context object is a stateful representation of the current machine state + * and all accessors to registers should occur through it. By doing so it's + * possible to exploit the SSA nature of LLVM to reuse register values within + * a function without needing to flush to memory. + * + * Function calls (any branch outside of the function) will result in an + * expensive flush of registers. + * + * TODO(benvanik): track arguments by looking for register reads without writes + * TODO(benvanik): avoid flushing registers for leaf nodes + * TODO(benvnaik): pass return value in LLVM return, not by memory + */ + + +X64Emitter::X64Emitter(xe_memory_ref memory, jit_context_t context) { + memory_ = memory; + context_ = context; + + // Grab global exports. + cpu::GetGlobalExports(&global_exports_); + + // Function type for all functions. + // TODO(benvanik): evaluate using jit_abi_fastcall + jit_type_t fn_params[] = { + jit_type_void_ptr, + jit_type_nuint, + }; + fn_signature_ = jit_type_create_signature( + jit_abi_cdecl, + jit_type_void, + fn_params, XECOUNT(fn_params), + 0); + + jit_type_t shim_params[] = { + jit_type_void_ptr, + jit_type_void_ptr, + }; + shim_signature_ = jit_type_create_signature( + jit_abi_cdecl, + jit_type_void, + shim_params, XECOUNT(shim_params), + 0); + + jit_type_t global_export_params_2[] = { + jit_type_void_ptr, + jit_type_ulong, + }; + global_export_signature_2_ = jit_type_create_signature( + jit_abi_cdecl, + jit_type_void, + global_export_params_2, XECOUNT(global_export_params_2), + 0); + jit_type_t global_export_params_3[] = { + jit_type_void_ptr, + jit_type_ulong, + jit_type_ulong, + }; + global_export_signature_3_ = jit_type_create_signature( + jit_abi_cdecl, + jit_type_void, + global_export_params_3, XECOUNT(global_export_params_3), + 0); + jit_type_t global_export_params_4[] = { + jit_type_void_ptr, + jit_type_ulong, + jit_type_ulong, + jit_type_void_ptr, + }; + global_export_signature_4_ = jit_type_create_signature( + jit_abi_cdecl, + jit_type_void, + global_export_params_4, XECOUNT(global_export_params_4), + 0); +} + +X64Emitter::~X64Emitter() { + jit_type_free(fn_signature_); + jit_type_free(shim_signature_); + jit_type_free(global_export_signature_2_); + jit_type_free(global_export_signature_3_); + jit_type_free(global_export_signature_4_); +} + +jit_context_t X64Emitter::context() { + return context_; +} + +namespace { +int libjit_on_demand_compile(jit_function_t fn) { + X64Emitter* emitter = (X64Emitter*)jit_function_get_meta(fn, 0x1000); + FunctionSymbol* symbol = (FunctionSymbol*)jit_function_get_meta(fn, 0x1001); + XELOGE("Compile(%s): beginning on-demand compilation...", symbol->name()); + int result_code = emitter->MakeFunction(symbol, fn); + if (result_code) { + XELOGCPU("Compile(%s): failed to make function", symbol->name()); + return JIT_RESULT_COMPILE_ERROR; + } + return JIT_RESULT_OK; +} +} + +int X64Emitter::PrepareFunction(FunctionSymbol* symbol) { + if (symbol->impl_value) { + return 0; + } + + jit_context_build_start(context_); + + // Create the function and setup for on-demand compilation. + jit_function_t fn = jit_function_create(context_, fn_signature_); + jit_function_set_meta(fn, 0x1000, this, NULL, 0); + jit_function_set_meta(fn, 0x1001, symbol, NULL, 0); + jit_function_set_on_demand_compiler(fn, libjit_on_demand_compile); + + // Set optimization options. + // TODO(benvanik): add gflags + uint32_t max_level = jit_function_get_max_optimization_level(); + uint32_t opt_level = max_level; // 0 + opt_level = MIN(max_level, MAX(0, opt_level)); + jit_function_set_optimization_level(fn, opt_level); + + // Stash for later. + symbol->impl_value = fn; + jit_context_build_end(context_); + + return 0; +} + +int X64Emitter::MakeFunction(FunctionSymbol* symbol, jit_function_t fn) { + symbol_ = symbol; + fn_ = fn; + + fn_block_ = NULL; + return_block_ = jit_label_undefined; + internal_indirection_block_ = jit_label_undefined; + external_indirection_block_ = jit_label_undefined; + + bbs_.clear(); + + cia_ = 0; + + access_bits_.Clear(); + + locals_.indirection_target = NULL; + locals_.indirection_cia = NULL; + + locals_.xer = NULL; + locals_.lr = NULL; + locals_.ctr = NULL; + for (size_t n = 0; n < XECOUNT(locals_.cr); n++) { + locals_.cr[n] = NULL; + } + for (size_t n = 0; n < XECOUNT(locals_.gpr); n++) { + locals_.gpr[n] = NULL; + } + for (size_t n = 0; n < XECOUNT(locals_.fpr); n++) { + locals_.fpr[n] = NULL; + } + + if (FLAGS_log_codegen) { + printf("%s:\n", symbol->name()); + } + + int result_code = 0; + switch (symbol->type) { + case FunctionSymbol::User: + result_code = MakeUserFunction(); + break; + case FunctionSymbol::Kernel: + if (symbol->kernel_export && symbol->kernel_export->is_implemented) { + result_code = MakePresentImportFunction(); + } else { + result_code = MakeMissingImportFunction(); + } + break; + default: + XEASSERTALWAYS(); + result_code = 1; + break; + } + + if (!result_code) { + // libjit opcodes. + if (FLAGS_log_codegen) { + jit_dump_function(stdout, fn_, symbol->name()); + } + + // Compile right now. + jit_function_compile(fn_); + + // x64 instructions. + if (FLAGS_log_codegen) { + jit_dump_function(stdout, fn_, symbol->name()); + } + + XELOGE("Compile(%s): compiled to 0x%p - 0x%p (%db)", + symbol->name(), + jit_function_get_code_start_address(fn_), + jit_function_get_code_end_address(fn_), + (uint32_t)( + (intptr_t)jit_function_get_code_end_address(fn_) - + (intptr_t)jit_function_get_code_start_address(fn_))); + } + + return result_code; +} + +int X64Emitter::MakeUserFunction() { + if (FLAGS_trace_user_calls) { + jit_value_t trace_args[] = { + jit_value_get_param(fn_, 0), + jit_value_create_long_constant(fn_, jit_type_ulong, + (jit_ulong)symbol_->start_address), + jit_value_get_param(fn_, 1), + jit_value_create_long_constant(fn_, jit_type_ulong, + (jit_ulong)symbol_), + }; + jit_insn_call_native( + fn_, + "XeTraceUserCall", + global_exports_.XeTraceUserCall, + global_export_signature_4_, + trace_args, XECOUNT(trace_args), + 0); + } + + // Emit. + GenerateBasicBlocks(); + return 0; +} + +int X64Emitter::MakePresentImportFunction() { + if (FLAGS_trace_kernel_calls) { + jit_value_t trace_args[] = { + jit_value_get_param(fn_, 0), + jit_value_create_long_constant(fn_, jit_type_ulong, + (jit_ulong)symbol_->start_address), + jit_value_get_param(fn_, 1), + jit_value_create_long_constant(fn_, jit_type_ulong, + (jit_ulong)symbol_->kernel_export), + }; + jit_insn_call_native( + fn_, + "XeTraceKernelCall", + global_exports_.XeTraceKernelCall, + global_export_signature_4_, + trace_args, XECOUNT(trace_args), + 0); + } + + // void shim(ppc_state*, shim_data*) + jit_value_t shim_args[] = { + jit_value_get_param(fn_, 0), + jit_value_create_long_constant(fn_, jit_type_ulong, + (jit_ulong)symbol_->kernel_export->function_data.shim_data), + }; + jit_insn_call_native( + fn_, + symbol_->kernel_export->name, + symbol_->kernel_export->function_data.shim, + shim_signature_, + shim_args, XECOUNT(shim_args), + 0); + + jit_insn_return(fn_, NULL); + + return 0; +} + +int X64Emitter::MakeMissingImportFunction() { + if (FLAGS_trace_kernel_calls) { + jit_value_t trace_args[] = { + jit_value_get_param(fn_, 0), + jit_value_create_long_constant(fn_, jit_type_ulong, + (jit_ulong)symbol_->start_address), + jit_value_get_param(fn_, 1), + jit_value_create_long_constant(fn_, jit_type_ulong, + (jit_ulong)symbol_->kernel_export), + }; + jit_insn_call_native( + fn_, + "XeTraceKernelCall", + global_exports_.XeTraceKernelCall, + global_export_signature_4_, + trace_args, XECOUNT(trace_args), + 0); + } + + jit_insn_return(fn_, NULL); + + return 0; +} + +FunctionSymbol* X64Emitter::symbol() { + return symbol_; +} + +jit_function_t X64Emitter::fn() { + return fn_; +} + +FunctionBlock* X64Emitter::fn_block() { + return fn_block_; +} + +void X64Emitter::GenerateBasicBlocks() { + // If this function is empty, abort! + if (!symbol_->blocks.size()) { + jit_insn_return(fn_, NULL); + return; + } + + // Pass 1 creates all of the labels - this way we can branch to them. + // We also track registers used so that when know which ones to fill/spill. + // No actual blocks or instructions are created here. + // TODO(benvanik): move this to SDB? would remove an entire pass over the + // code. + for (std::map::iterator it = + symbol_->blocks.begin(); it != symbol_->blocks.end(); ++it) { + FunctionBlock* block = it->second; + XEIGNORE(PrepareBasicBlock(block)); + } + + // Setup all local variables now that we know what we need. + // This happens in the entry block. + SetupLocals(); + + // Setup initial register fill in the entry block. + // We can only do this once all the locals have been created. + FillRegisters(); + + // Pass 2 fills in instructions. + for (std::map::iterator it = symbol_->blocks.begin(); + it != symbol_->blocks.end(); ++it) { + FunctionBlock* block = it->second; + GenerateBasicBlock(block); + } + + // Setup the shared return/indirection/etc blocks now that we know all the + // blocks we need and all the registers used. + GenerateSharedBlocks(); +} + +void X64Emitter::GenerateSharedBlocks() { + // Create a return block. + // This spills registers and returns. All non-tail returns should branch + // here to do the return and ensure registers are spilled. + // This will be moved to the end after all the other blocks are created. + jit_insn_label(fn_, &return_block_); + SpillRegisters(); + jit_insn_return(fn_, NULL); + +// jit_value_t indirect_branch = gen_module_->getFunction("XeIndirectBranch"); +// +// // Build indirection block on demand. +// // We have already prepped all basic blocks, so we can build these tables now. +// if (external_indirection_block_) { +// // This will spill registers and call the external function. +// // It is only meant for LK=0. +// b.SetInsertPoint(external_indirection_block_); +// SpillRegisters(); +// b.CreateCall3(indirect_branch, +// fn_->arg_begin(), +// b.CreateLoad(locals_.indirection_target), +// b.CreateLoad(locals_.indirection_cia)); +// b.CreateRetVoid(); +// } +// +// if (internal_indirection_block_) { +// // This will not spill registers and instead try to switch on local blocks. +// // If it fails then the external indirection path is taken. +// // NOTE: we only generate this if a likely local branch is taken. +// b.SetInsertPoint(internal_indirection_block_); +// SwitchInst* switch_i = b.CreateSwitch( +// b.CreateLoad(locals_.indirection_target), +// external_indirection_block_, +// static_cast(bbs_.size())); +// for (std::map::iterator it = bbs_.begin(); +// it != bbs_.end(); ++it) { +// switch_i->addCase(b.getInt64(it->first), it->second); +// } +// } +} + +int X64Emitter::PrepareBasicBlock(FunctionBlock* block) { + // Add an undefined entry in the table. + // The label will be created on-demand. + bbs_.insert(std::pair( + block->start_address, jit_label_undefined)); + + // TODO(benvanik): set label name? would help debugging disasm + // char name[32]; + // xesnprintfa(name, XECOUNT(name), "loc_%.8X", block->start_address); + + // Scan and disassemble each instruction in the block to get accurate + // register access bits. In the future we could do other optimization checks + // in this pass. + // TODO(benvanik): perhaps we want to stash this for each basic block? + // We could use this for faster checking of cr/ca checks/etc. + InstrAccessBits access_bits; + uint8_t* p = xe_memory_addr(memory_, 0); + for (uint32_t ia = block->start_address; ia <= block->end_address; ia += 4) { + InstrData i; + i.address = ia; + i.code = XEGETUINT32BE(p + ia); + i.type = ppc::GetInstrType(i.code); + + // Ignore unknown or ones with no disassembler fn. + if (!i.type || !i.type->disassemble) { + continue; + } + + // We really need to know the registers modified, so die if we've been lazy + // and haven't implemented the disassemble method yet. + ppc::InstrDisasm d; + XEASSERTNOTNULL(i.type->disassemble); + int result_code = i.type->disassemble(i, d); + XEASSERTZERO(result_code); + if (result_code) { + return result_code; + } + + // Accumulate access bits. + access_bits.Extend(d.access_bits); + } + + // Add in access bits to function access bits. + access_bits_.Extend(access_bits); + + return 0; +} + +void X64Emitter::GenerateBasicBlock(FunctionBlock* block) { + fn_block_ = block; + + // Create new block. + // This will create a label if it hasn't already been done. + std::map::iterator label_it = + bbs_.find(block->start_address); + XEASSERT(label_it != bbs_.end()); + jit_insn_label(fn_, &label_it->second); + + if (FLAGS_log_codegen) { + printf(" bb %.8X-%.8X:\n", block->start_address, block->end_address); + } + + // Walk instructions in block. + uint8_t* p = xe_memory_addr(memory_, 0); + for (uint32_t ia = block->start_address; ia <= block->end_address; ia += 4) { + InstrData i; + i.address = ia; + i.code = XEGETUINT32BE(p + ia); + i.type = ppc::GetInstrType(i.code); + + jit_value_t trace_args[] = { + jit_value_get_param(fn_, 0), + jit_value_create_long_constant(fn_, jit_type_ulong, + (jit_ulong)i.address), + jit_value_create_long_constant(fn_, jit_type_ulong, + (jit_ulong)i.code), + }; + + // Add debugging tag. + // TODO(benvanik): mark type. + //jit_insn_mark_breakpoint(fn_, 1, ia); + + if (FLAGS_trace_instructions) { + SpillRegisters(); + jit_insn_call_native( + fn_, + "XeTraceInstruction", + global_exports_.XeTraceInstruction, + global_export_signature_3_, + trace_args, XECOUNT(trace_args), + 0); + } + + if (!i.type) { + XELOGCPU("Invalid instruction %.8X %.8X", ia, i.code); + SpillRegisters(); + jit_insn_call_native( + fn_, + "XeInvalidInstruction", + global_exports_.XeInvalidInstruction, + global_export_signature_3_, + trace_args, XECOUNT(trace_args), + 0); + continue; + } + + if (FLAGS_log_codegen) { + if (i.type->disassemble) { + ppc::InstrDisasm d; + i.type->disassemble(i, d); + std::string disasm; + d.Dump(disasm); + printf(" %.8X: %.8X %s\n", ia, i.code, disasm.c_str()); + } else { + printf(" %.8X: %.8X %s ???\n", ia, i.code, i.type->name); + } + } + + typedef int (*InstrEmitter)(X64Emitter& g, jit_function_t f, + InstrData& i); + InstrEmitter emit = (InstrEmitter)i.type->emit; + if (!i.type->emit || emit(*this, fn_, i)) { + // This printf is handy for sort/uniquify to find instructions. + //printf("unimplinstr %s\n", i.type->name); + + XELOGCPU("Unimplemented instr %.8X %.8X %s", + ia, i.code, i.type->name); + SpillRegisters(); + jit_insn_call_native( + fn_, + "XeInvalidInstruction", + global_exports_.XeInvalidInstruction, + global_export_signature_3_, + trace_args, XECOUNT(trace_args), + 0); + } + } + + // If we fall through, create the branch. + if (block->outgoing_type == FunctionBlock::kTargetNone) { + // BasicBlock* next_bb = GetNextBasicBlock(); + // XEASSERTNOTNULL(next_bb); + // b.CreateBr(next_bb); + } else if (block->outgoing_type == FunctionBlock::kTargetUnknown) { + // Hrm. + // TODO(benvanik): assert this doesn't occur - means a bad sdb run! + XELOGCPU("SDB function scan error in %.8X: bb %.8X has unknown exit", + symbol_->start_address, block->start_address); + jit_insn_return(fn_, NULL); + } + + // TODO(benvanik): finish up BB +} + +jit_value_t X64Emitter::get_int32(int32_t value) { + return jit_value_create_nint_constant(fn_, jit_type_int, value); +} + +jit_value_t X64Emitter::get_uint32(uint32_t value) { + return jit_value_create_nint_constant(fn_, jit_type_uint, value); +} + +jit_value_t X64Emitter::get_int64(int64_t value) { + return jit_value_create_nint_constant(fn_, jit_type_nint, value); +} + +jit_value_t X64Emitter::get_uint64(uint64_t value) { + return jit_value_create_nint_constant(fn_, jit_type_nuint, value); +} + +jit_value_t X64Emitter::make_signed(jit_value_t value) { + jit_type_t source_type = jit_value_get_type(value); + source_type = jit_type_normalize(source_type); + jit_type_t signed_source_type = source_type; + switch (jit_type_get_kind(source_type)) { + case JIT_TYPE_UBYTE: signed_source_type = jit_type_sbyte; break; + case JIT_TYPE_USHORT: signed_source_type = jit_type_short; break; + case JIT_TYPE_UINT: signed_source_type = jit_type_int; break; + case JIT_TYPE_NUINT: signed_source_type = jit_type_nint; break; + case JIT_TYPE_ULONG: signed_source_type = jit_type_long; break; + } + if (signed_source_type != source_type) { + value = jit_insn_convert(fn_, value, signed_source_type, 0); + } + return value; +} + +jit_value_t X64Emitter::make_unsigned(jit_value_t value) { + jit_type_t source_type = jit_value_get_type(value); + source_type = jit_type_normalize(source_type); + jit_type_t unsigned_source_type = source_type; + switch (jit_type_get_kind(source_type)) { + case JIT_TYPE_SBYTE: unsigned_source_type = jit_type_ubyte; break; + case JIT_TYPE_SHORT: unsigned_source_type = jit_type_ushort; break; + case JIT_TYPE_INT: unsigned_source_type = jit_type_uint; break; + case JIT_TYPE_NINT: unsigned_source_type = jit_type_nuint; break; + case JIT_TYPE_LONG: unsigned_source_type = jit_type_ulong; break; + } + if (unsigned_source_type != source_type) { + value = jit_insn_convert(fn_, value, unsigned_source_type, 0); + } + return value; +} + +jit_value_t X64Emitter::sign_extend(jit_value_t value, + jit_type_t target_type) { + // TODO(benvanik): better conversion checking. + // X64 follows the C rules, which is that the source type indicates whether + // sign extension occurs. + // For example, int -> ulong is sign extended, + // uint -> ulong is zero extended. + // We convert to the same type with the expected sign and then use the built + // in convert, only if needed. + + // No-op if the same types. + jit_type_t source_type = jit_value_get_type(value); + source_type = jit_type_normalize(source_type); + target_type = jit_type_normalize(target_type); + if (source_type == target_type) { + return value; + } + + // If just a sign change, simple conversion. + if (jit_type_get_size(source_type) == jit_type_get_size(target_type)) { + return jit_insn_convert(fn_, value, target_type, 0); + } + + // Otherwise, need to convert to signed of the current type then extend. + value = make_signed(value); + return jit_insn_convert(fn_, value, target_type, 0); +} + +jit_value_t X64Emitter::zero_extend(jit_value_t value, + jit_type_t target_type) { + // See the comment in ::sign_extend for more information. + + // No-op if the same types. + jit_type_t source_type = jit_value_get_type(value); + source_type = jit_type_normalize(source_type); + target_type = jit_type_normalize(target_type); + if (source_type == target_type) { + return value; + } + + // If just a sign change, simple conversion. + if (jit_type_get_size(source_type) == jit_type_get_size(target_type)) { + return jit_insn_convert(fn_, value, target_type, 0); + } + + // Otherwise, need to convert to signed of the current type then extend. + value = make_unsigned(value); + return jit_insn_convert(fn_, value, target_type, 0); +} + +jit_value_t X64Emitter::trunc_to_sbyte(jit_value_t value) { + jit_type_t source_type = jit_value_get_type(value); + source_type = jit_type_normalize(source_type); + if (source_type == jit_type_sbyte) { + return value; + } + return jit_insn_convert(fn_, value, jit_type_sbyte, 0); +} + +jit_value_t X64Emitter::trunc_to_ubyte(jit_value_t value) { + jit_type_t source_type = jit_value_get_type(value); + source_type = jit_type_normalize(source_type); + if (source_type == jit_type_ubyte) { + return value; + } + return jit_insn_convert(fn_, value, jit_type_ubyte, 0); +} + +jit_value_t X64Emitter::trunc_to_short(jit_value_t value) { + jit_type_t source_type = jit_value_get_type(value); + source_type = jit_type_normalize(source_type); + if (source_type == jit_type_sbyte) { + return value; + } + return jit_insn_convert(fn_, value, jit_type_short, 0); +} + +jit_value_t X64Emitter::trunc_to_int(jit_value_t value) { + jit_type_t source_type = jit_value_get_type(value); + source_type = jit_type_normalize(source_type); + if (source_type == jit_type_sbyte) { + return value; + } + return jit_insn_convert(fn_, value, jit_type_int, 0); +} + +int X64Emitter::branch_to_block(uint32_t address) { + std::map::iterator it = bbs_.find(address); + return jit_insn_branch(fn_, &it->second); +} + +int X64Emitter::branch_to_block_if(uint32_t address, jit_value_t value) { + std::map::iterator it = bbs_.find(address); + if (value) { + return jit_insn_branch_if(fn_, value, &it->second); + } else { + return jit_insn_branch(fn_, &it->second); + } +} + +int X64Emitter::branch_to_block_if_not(uint32_t address, jit_value_t value) { + XEASSERTNOTNULL(value); + std::map::iterator it = bbs_.find(address); + return jit_insn_branch_if_not(fn_, value, &it->second); +} + +int X64Emitter::branch_to_return() { + return jit_insn_branch(fn_, &return_block_); +} + +int X64Emitter::branch_to_return_if(jit_value_t value) { + return jit_insn_branch_if(fn_, value, &return_block_); +} + +int X64Emitter::branch_to_return_if_not(jit_value_t value) { + return jit_insn_branch_if_not(fn_, value, &return_block_); +} + +int X64Emitter::call_function(FunctionSymbol* target_symbol, + jit_value_t lr, bool tail) { + PrepareFunction(target_symbol); + jit_function_t target_fn = (jit_function_t)target_symbol->impl_value; + XEASSERTNOTNULL(target_fn); + int flags = 0; + if (tail) { + flags |= JIT_CALL_TAIL; + } + jit_value_t args[] = {jit_value_get_param(fn_, 0), lr}; + jit_insn_call(fn_, target_symbol->name(), target_fn, fn_signature_, + args, XECOUNT(args), flags); + return 1; +} + +void X64Emitter::TraceBranch(uint32_t cia) { + SpillRegisters(); + + // Pick target. If it's an indirection the tracing function will handle it. + uint64_t target = 0; + switch (fn_block_->outgoing_type) { + case FunctionBlock::kTargetBlock: + target = fn_block_->outgoing_address; + break; + case FunctionBlock::kTargetFunction: + target = fn_block_->outgoing_function->start_address; + break; + case FunctionBlock::kTargetLR: + target = kXEPPCRegLR; + break; + case FunctionBlock::kTargetCTR: + target = kXEPPCRegCTR; + break; + default: + case FunctionBlock::kTargetNone: + XEASSERTALWAYS(); + break; + } + + jit_value_t trace_args[] = { + jit_value_get_param(fn_, 0), + jit_value_create_long_constant(fn_, jit_type_ulong, cia), + jit_value_create_long_constant(fn_, jit_type_ulong, target), + }; + jit_insn_call_native( + fn_, + "XeTraceBranch", + global_exports_.XeTraceBranch, + global_export_signature_3_, + trace_args, XECOUNT(trace_args), + 0); +} + +int X64Emitter::GenerateIndirectionBranch(uint32_t cia, jit_value_t target, + bool lk, bool likely_local) { + // This function is called by the control emitters when they know that an + // indirect branch is required. + // It first tries to see if the branch is to an address within the function + // and, if so, uses a local switch table. If that fails because we don't know + // the block the function is regenerated (ACK!). If the target is external + // then an external call occurs. + + // TODO(benvanik): port indirection. + //XEASSERTALWAYS(); + + // BasicBlock* next_block = GetNextBasicBlock(); + + // PushInsertPoint(); + + // // Request builds of the indirection blocks on demand. + // // We can't build here because we don't know what registers will be needed + // // yet, so we just create the blocks and let GenerateSharedBlocks handle it + // // after we are done with all user instructions. + // if (!external_indirection_block_) { + // // Setup locals in the entry block. + // b.SetInsertPoint(&fn_->getEntryBlock()); + // locals_.indirection_target = b.CreateAlloca( + // jit_type_nuint, 0, "indirection_target"); + // locals_.indirection_cia = b.CreateAlloca( + // jit_type_nuint, 0, "indirection_cia"); + + // external_indirection_block_ = BasicBlock::Create( + // *context_, "external_indirection_block", fn_, return_block_); + // } + // if (likely_local && !internal_indirection_block_) { + // internal_indirection_block_ = BasicBlock::Create( + // *context_, "internal_indirection_block", fn_, return_block_); + // } + + // PopInsertPoint(); + + // // Check to see if the target address is within the function. + // // If it is jump to that basic block. If the basic block is not found it means + // // we have a jump inside the function that wasn't identified via static + // // analysis. These are bad as they require function regeneration. + // if (likely_local) { + // // Note that we only support LK=0, as we are using shared tables. + // XEASSERT(!lk); + // b.CreateStore(target, locals_.indirection_target); + // b.CreateStore(b.getInt64(cia), locals_.indirection_cia); + // jit_value_t symbol_ge_cmp = b.CreateICmpUGE(target, b.getInt64(symbol_->start_address)); + // jit_value_t symbol_l_cmp = b.CreateICmpULT(target, b.getInt64(symbol_->end_address)); + // jit_value_t symbol_target_cmp = jit_insn_and(fn_, symbol_ge_cmp, symbol_l_cmp); + // b.CreateCondBr(symbol_target_cmp, + // internal_indirection_block_, external_indirection_block_); + // return 0; + // } + + // // If we are LK=0 jump to the shared indirection block. This prevents us + // // from needing to fill the registers again after the call and shares more + // // code. + // if (!lk) { + // b.CreateStore(target, locals_.indirection_target); + // b.CreateStore(b.getInt64(cia), locals_.indirection_cia); + // b.CreateBr(external_indirection_block_); + // } else { + // // Slowest path - spill, call the external function, and fill. + // // We should avoid this at all costs. + + // // Spill registers. We could probably share this. + // SpillRegisters(); + + // // Issue the full indirection branch. + // jit_value_t branch_args[] = { + // jit_value_get_param(fn_, 0), + // target, + // get_uint64(cia), + // }; + // jit_insn_call_native( + // fn_, + // "XeIndirectBranch", + // global_exports_.XeIndirectBranch, + // global_export_signature_3_, + // branch_args, XECOUNT(branch_args), + // 0); + + // if (next_block) { + // // Only refill if not a tail call. + // FillRegisters(); + // b.CreateBr(next_block); + // } else { + // jit_insn_return(fn_, NULL); + // } + // } + + return 0; +} + +jit_value_t X64Emitter::LoadStateValue(size_t offset, jit_type_t type, + const char* name) { + // Load from ppc_state[offset]. + // TODO(benvanik): tag with debug info? + return jit_insn_load_relative( + fn_, jit_value_get_param(fn_, 0), offset, type); +} + +void X64Emitter::StoreStateValue(size_t offset, jit_type_t type, + jit_value_t value) { + // Store to ppc_state[offset]. + jit_insn_store_relative( + fn_, jit_value_get_param(fn_, 0), offset, value); +} + +void X64Emitter::SetupLocals() { + uint64_t spr_t = access_bits_.spr; + if (spr_t & 0x3) { + locals_.xer = SetupLocal(jit_type_nuint, "xer"); + } + spr_t >>= 2; + if (spr_t & 0x3) { + locals_.lr = SetupLocal(jit_type_nuint, "lr"); + } + spr_t >>= 2; + if (spr_t & 0x3) { + locals_.ctr = SetupLocal(jit_type_nuint, "ctr"); + } + spr_t >>= 2; + // TODO: FPCSR + + char name[32]; + + uint64_t cr_t = access_bits_.cr; + for (int n = 0; n < 8; n++) { + if (cr_t & 3) { + //xesnprintfa(name, XECOUNT(name), "cr%d", n); + locals_.cr[n] = SetupLocal(jit_type_ubyte, name); + } + cr_t >>= 2; + } + + uint64_t gpr_t = access_bits_.gpr; + for (int n = 0; n < 32; n++) { + if (gpr_t & 3) { + //xesnprintfa(name, XECOUNT(name), "r%d", n); + locals_.gpr[n] = SetupLocal(jit_type_nuint, name); + } + gpr_t >>= 2; + } + + uint64_t fpr_t = access_bits_.fpr; + for (int n = 0; n < 32; n++) { + if (fpr_t & 3) { + //xesnprintfa(name, XECOUNT(name), "f%d", n); + locals_.fpr[n] = SetupLocal(jit_type_float64, name); + } + fpr_t >>= 2; + } +} + +jit_value_t X64Emitter::SetupLocal(jit_type_t type, const char* name) { + // Note that the value is created in the current block, but will be pushed + // up to function level if used in another block. + jit_value_t value = jit_value_create(fn_, type); + // TODO(benvanik): set a name? + return value; +} + +void X64Emitter::FillRegisters() { + // This updates all of the local register values from the state memory. + // It should be called on function entry for initial setup and after any + // calls that may modify the registers. + + // TODO(benvanik): use access flags to see if we need to do reads/writes. + + if (locals_.xer) { + jit_insn_store(fn_, + locals_.xer, + LoadStateValue(offsetof(xe_ppc_state_t, xer), jit_type_nuint)); + } + + if (locals_.lr) { + jit_insn_store(fn_, + locals_.lr, + LoadStateValue(offsetof(xe_ppc_state_t, lr), jit_type_nuint)); + } + + if (locals_.ctr) { + jit_insn_store(fn_, + locals_.ctr, + LoadStateValue(offsetof(xe_ppc_state_t, ctr), jit_type_nuint)); + } + + // Fill the split CR values by extracting each one from the CR. + // This could probably be done faster via an extractvalues or something. + // Perhaps we could also change it to be a vector<8*i8>. + jit_value_t cr = NULL; + for (size_t n = 0; n < XECOUNT(locals_.cr); n++) { + jit_value_t cr_n = locals_.cr[n]; + if (!cr_n) { + continue; + } + if (!cr) { + // Only fetch once. Doing this in here prevents us from having to + // always fetch even if unused. + cr = LoadStateValue(offsetof(xe_ppc_state_t, cr), jit_type_nuint); + } + // (cr >> 28 - n * 4) & 0xF + jit_value_t shamt = jit_value_create_nint_constant( + fn_, jit_type_nuint, 28 - n * 4); + jit_insn_store(fn_, cr_n, + jit_insn_and(fn_, + jit_insn_ushr(fn_, cr, shamt), + jit_value_create_nint_constant(fn_, jit_type_ubyte, 0xF))); + } + + for (size_t n = 0; n < XECOUNT(locals_.gpr); n++) { + if (locals_.gpr[n]) { + jit_insn_store(fn_, + locals_.gpr[n], + LoadStateValue(offsetof(xe_ppc_state_t, r) + 8 * n, jit_type_nuint)); + } + } + + for (size_t n = 0; n < XECOUNT(locals_.fpr); n++) { + if (locals_.fpr[n]) { + jit_insn_store(fn_, + locals_.fpr[n], + LoadStateValue(offsetof(xe_ppc_state_t, f) + 8 * n, + jit_type_float64)); + } + } +} + +void X64Emitter::SpillRegisters() { + // This flushes all local registers (if written) to the register bank and + // resets their values. + + // TODO(benvanik): only flush if actually required, or selective flushes. + + if (locals_.xer) { + StoreStateValue( + offsetof(xe_ppc_state_t, xer), + jit_type_nuint, + jit_insn_load(fn_, locals_.xer)); + } + + if (locals_.lr) { + StoreStateValue( + offsetof(xe_ppc_state_t, lr), + jit_type_nuint, + jit_insn_load(fn_, locals_.lr)); + } + + if (locals_.ctr) { + StoreStateValue( + offsetof(xe_ppc_state_t, ctr), + jit_type_nuint, + jit_insn_load(fn_, locals_.ctr)); + } + + // Stitch together all split CR values. + // TODO(benvanik): don't flush across calls? + jit_value_t cr = NULL; + for (size_t n = 0; n < XECOUNT(locals_.cr); n++) { + jit_value_t cr_n = locals_.cr[n]; + if (!cr_n) { + continue; + } + // cr |= (cr_n << n * 4) + jit_value_t shamt = jit_value_create_nint_constant( + fn_, jit_type_nuint, n * 4); + cr_n = jit_insn_convert(fn_, jit_insn_load(fn_, cr_n), jit_type_nuint, 0); + cr_n = jit_insn_shl(fn_, cr_n, shamt); + if (!cr) { + cr = cr_n; + } else { + cr = jit_insn_or(fn_, cr, cr_n); + } + } + if (cr) { + StoreStateValue( + offsetof(xe_ppc_state_t, cr), + jit_type_nuint, + cr); + } + + for (uint32_t n = 0; n < XECOUNT(locals_.gpr); n++) { + jit_value_t v = locals_.gpr[n]; + if (v) { + StoreStateValue( + offsetof(xe_ppc_state_t, r) + 8 * n, + jit_type_nuint, + jit_insn_load(fn_, v)); + } + } + + for (uint32_t n = 0; n < XECOUNT(locals_.fpr); n++) { + jit_value_t v = locals_.fpr[n]; + if (v) { + StoreStateValue( + offsetof(xe_ppc_state_t, f) + 8 * n, + jit_type_float64, + jit_insn_load(fn_, v)); + } + } +} + +jit_value_t X64Emitter::xer_value() { + XEASSERTNOTNULL(locals_.xer); + return jit_insn_load(fn_, locals_.xer); +} + +void X64Emitter::update_xer_value(jit_value_t value) { + XEASSERTNOTNULL(locals_.xer); + + // Extend to 64bits if needed. + value = zero_extend(value, jit_type_nuint); + jit_insn_store(fn_, locals_.xer, value); +} + +void X64Emitter::update_xer_with_overflow(jit_value_t value) { + XEASSERTNOTNULL(locals_.xer); + + // Expects a i1 indicating overflow. + // Trust the caller that if it's larger than that it's already truncated. + value = zero_extend(value, jit_type_nuint); + + jit_value_t xer = xer_value(); + xer = jit_insn_and(fn_, xer, get_uint64(0xFFFFFFFFBFFFFFFF)); // clear bit 30 + xer = jit_insn_or(fn_, xer, jit_insn_shl(fn_, value, get_uint32(31))); + xer = jit_insn_or(fn_, xer, jit_insn_shl(fn_, value, get_uint32(30))); + jit_insn_store(fn_, locals_.xer, value); +} + +void X64Emitter::update_xer_with_carry(jit_value_t value) { + XEASSERTNOTNULL(locals_.xer); + + // Expects a i1 indicating carry. + // Trust the caller that if it's larger than that it's already truncated. + value = zero_extend(value, jit_type_nuint); + + jit_value_t xer = xer_value(); + xer = jit_insn_and(fn_, xer, get_uint64(0xFFFFFFFFDFFFFFFF)); // clear bit 29 + xer = jit_insn_or(fn_, xer, jit_insn_shl(fn_, value, get_uint32(29))); + jit_insn_store(fn_, locals_.xer, value); +} + +void X64Emitter::update_xer_with_overflow_and_carry(jit_value_t value) { + XEASSERTNOTNULL(locals_.xer); + + // Expects a i1 indicating overflow. + // Trust the caller that if it's larger than that it's already truncated. + value = zero_extend(value, jit_type_nuint); + + // This is effectively an update_xer_with_overflow followed by an + // update_xer_with_carry, but since the logic is largely the same share it. + jit_value_t xer = xer_value(); + // clear bit 30 & 29 + xer = jit_insn_and(fn_, xer, get_uint64(0xFFFFFFFF9FFFFFFF)); + xer = jit_insn_or(fn_, xer, jit_insn_shl(fn_, value, get_uint32(31))); + xer = jit_insn_or(fn_, xer, jit_insn_shl(fn_, value, get_uint32(30))); + xer = jit_insn_or(fn_, xer, jit_insn_shl(fn_, value, get_uint32(29))); + jit_insn_store(fn_, locals_.xer, value); +} + +jit_value_t X64Emitter::lr_value() { + XEASSERTNOTNULL(locals_.lr); + return jit_insn_load(fn_, locals_.lr); +} + +void X64Emitter::update_lr_value(jit_value_t value) { + XEASSERTNOTNULL(locals_.lr); + + // Extend to 64bits if needed. + value = zero_extend(value, jit_type_nuint); + jit_insn_store(fn_, locals_.lr, value); +} + +jit_value_t X64Emitter::ctr_value() { + XEASSERTNOTNULL(locals_.ctr); + return jit_insn_load(fn_, locals_.ctr); +} + +void X64Emitter::update_ctr_value(jit_value_t value) { + XEASSERTNOTNULL(locals_.ctr); + + // Extend to 64bits if needed. + value = zero_extend(value, jit_type_nuint); + jit_insn_store(fn_, locals_.ctr, value); +} + +jit_value_t X64Emitter::cr_value(uint32_t n) { + XEASSERT(n >= 0 && n < 8); + XEASSERTNOTNULL(locals_.cr[n]); + + jit_value_t value = jit_insn_load(fn_, locals_.cr[n]); + value = zero_extend(value, jit_type_nuint); + return value; +} + +void X64Emitter::update_cr_value(uint32_t n, jit_value_t value) { + XEASSERT(n >= 0 && n < 8); + XEASSERTNOTNULL(locals_.cr[n]); + + // Truncate to 8 bits if needed. + // TODO(benvanik): also widen? + value = trunc_to_ubyte(value); + + jit_insn_store(fn_, locals_.cr[n], value); +} + +void X64Emitter::update_cr_with_cond( + uint32_t n, jit_value_t lhs, jit_value_t rhs, bool is_signed) { + // bit0 = RA < RB + // bit1 = RA > RB + // bit2 = RA = RB + // bit3 = XER[SO] + + // TODO(benvanik): inline this using the x86 cmp instruction - this prevents + // the need for a lot of the compares and ensures we lower to the best + // possible x86. + // jit_value_t cmp = InlineAsm::get( + // FunctionType::get(), + // "cmp $0, $1 \n" + // "mov from compare registers \n", + // "r,r", ?? + // true); + + // Convert input signs, if needed. + if (is_signed) { + lhs = make_signed(lhs); + rhs = make_signed(rhs); + } else { + lhs = make_unsigned(lhs); + rhs = make_unsigned(rhs); + } + jit_value_t c = jit_insn_lt(fn_, lhs, rhs); + c = jit_insn_or(fn_, c, + jit_insn_shl(fn_, jit_insn_gt(fn_, lhs, rhs), get_uint32(1))); + c = jit_insn_or(fn_, c, + jit_insn_shl(fn_, jit_insn_eq(fn_, lhs, rhs), get_uint32(2))); + + // TODO(benvanik): set bit 4 to XER[SO] + + // Insert the 4 bits into their location in the CR. + update_cr_value(n, c); +} + +jit_value_t X64Emitter::gpr_value(uint32_t n) { + XEASSERT(n >= 0 && n < 32); + XEASSERTNOTNULL(locals_.gpr[n]); + + // Actually r0 is writable, even though nobody should ever do that. + // Perhaps we can check usage and enable this if safe? + // if (n == 0) { + // return get_uint64(0); + // } + + return jit_insn_load(fn_, locals_.gpr[n]); +} + +void X64Emitter::update_gpr_value(uint32_t n, jit_value_t value) { + XEASSERT(n >= 0 && n < 32); + XEASSERTNOTNULL(locals_.gpr[n]); + + // See above - r0 can be written. + // if (n == 0) { + // // Ignore writes to zero. + // return; + // } + + // Extend to 64bits if needed. + value = zero_extend(value, jit_type_nuint); + + jit_insn_store(fn_, locals_.gpr[n], value); +} + +jit_value_t X64Emitter::fpr_value(uint32_t n) { + XEASSERT(n >= 0 && n < 32); + XEASSERTNOTNULL(locals_.fpr[n]); + return jit_insn_load(fn_, locals_.fpr[n]); +} + +void X64Emitter::update_fpr_value(uint32_t n, jit_value_t value) { + XEASSERT(n >= 0 && n < 32); + XEASSERTNOTNULL(locals_.fpr[n]); + jit_insn_store(fn_, locals_.fpr[n], value); +} + +jit_value_t X64Emitter::TouchMemoryAddress(uint32_t cia, jit_value_t addr) { + // Input address is always in 32-bit space. + addr = jit_insn_and(fn_, + zero_extend(addr, jit_type_nuint), + jit_value_create_nint_constant(fn_, jit_type_uint, UINT_MAX)); + + // Add runtime memory address checks, if needed. + // if (FLAGS_memory_address_verification) { + // BasicBlock* invalid_bb = BasicBlock::Create(*context_, "", fn_); + // BasicBlock* valid_bb = BasicBlock::Create(*context_, "", fn_); + + // // The heap starts at 0x1000 - if we write below that we're boned. + // jit_value_t gt = b.CreateICmpUGE(addr, b.getInt64(0x00001000)); + // b.CreateCondBr(gt, valid_bb, invalid_bb); + + // b.SetInsertPoint(invalid_bb); + // jit_value_t access_violation = gen_module_->getFunction("XeAccessViolation"); + // SpillRegisters(); + // b.CreateCall3(access_violation, + // fn_->arg_begin(), + // b.getInt32(cia), + // addr); + // b.CreateBr(valid_bb); + + // b.SetInsertPoint(valid_bb); + // } + + // Rebase off of memory pointer. + addr = jit_insn_add(fn_, + addr, + jit_value_create_nint_constant(fn_, + jit_type_nuint, (jit_nuint)xe_memory_addr(memory_, 0))); + + return addr; +} + +jit_value_t X64Emitter::ReadMemory( + uint32_t cia, jit_value_t addr, uint32_t size, bool acquire) { + jit_type_t data_type = NULL; + bool needs_swap = false; + switch (size) { + case 1: + data_type = jit_type_ubyte; + break; + case 2: + data_type = jit_type_ushort; + needs_swap = true; + break; + case 4: + data_type = jit_type_uint; + needs_swap = true; + break; + case 8: + data_type = jit_type_ulong; + needs_swap = true; + break; + default: + XEASSERTALWAYS(); + return NULL; + } + + // Rebase off of memory base pointer. + jit_value_t address = TouchMemoryAddress(cia, addr); + jit_value_t value = jit_insn_load_relative(fn_, address, 0, data_type); + if (acquire) { + // TODO(benvanik): acquire semantics. + // load_value->setAlignment(size); + // load_value->setVolatile(true); + // load_value->setAtomic(Acquire); + jit_value_set_volatile(value); + } + + // Swap after loading. + // TODO(benvanik): find a way to avoid this! + if (needs_swap) { + value = jit_insn_bswap(fn_, value); + } + + return value; +} + +void X64Emitter::WriteMemory( + uint32_t cia, jit_value_t addr, uint32_t size, jit_value_t value, + bool release) { + jit_type_t data_type = NULL; + bool needs_swap = false; + switch (size) { + case 1: + data_type = jit_type_ubyte; + break; + case 2: + data_type = jit_type_ushort; + needs_swap = true; + break; + case 4: + data_type = jit_type_uint; + needs_swap = true; + break; + case 8: + data_type = jit_type_ulong; + needs_swap = true; + break; + default: + XEASSERTALWAYS(); + return; + } + + // Truncate, if required. + if (jit_value_get_type(value) != data_type) { + value = jit_insn_convert(fn_, value, data_type, 0); + } + + // Swap before storing. + // TODO(benvanik): find a way to avoid this! + if (needs_swap) { + value = jit_insn_bswap(fn_, value); + } + + // TODO(benvanik): release semantics + // if (release) { + // store_value->setAlignment(size); + // store_value->setVolatile(true); + // store_value->setAtomic(Release); + // } + + // Rebase off of memory base pointer. + jit_value_t address = TouchMemoryAddress(cia, addr); + jit_insn_store_relative(fn_, address, 0, value); +} diff --git a/src/xenia/cpu/x64/x64_emitter.h b/src/xenia/cpu/x64/x64_emitter.h new file mode 100644 index 000000000..b3de00534 --- /dev/null +++ b/src/xenia/cpu/x64/x64_emitter.h @@ -0,0 +1,154 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2013 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_CPU_X64_X64_EMITTER_H_ +#define XENIA_CPU_X64_X64_EMITTER_H_ + +#include +#include +#include + +#include + + +namespace xe { +namespace cpu { +namespace x64 { + + +class X64Emitter { +public: + X64Emitter(xe_memory_ref memory, jit_context_t context); + ~X64Emitter(); + + jit_context_t context(); + + int PrepareFunction(sdb::FunctionSymbol* symbol); + int MakeFunction(sdb::FunctionSymbol* symbol, jit_function_t fn); + + sdb::FunctionSymbol* symbol(); + jit_function_t fn(); + sdb::FunctionBlock* fn_block(); + + jit_value_t get_int32(int32_t value); + jit_value_t get_uint32(uint32_t value); + jit_value_t get_int64(int64_t value); + jit_value_t get_uint64(uint64_t value); + jit_value_t make_signed(jit_value_t value); + jit_value_t make_unsigned(jit_value_t value); + jit_value_t sign_extend(jit_value_t value, jit_type_t target_type); + jit_value_t zero_extend(jit_value_t value, jit_type_t target_type); + jit_value_t trunc_to_sbyte(jit_value_t value); + jit_value_t trunc_to_ubyte(jit_value_t value); + jit_value_t trunc_to_short(jit_value_t value); + jit_value_t trunc_to_int(jit_value_t value); + + int branch_to_block(uint32_t address); + int branch_to_block_if(uint32_t address, jit_value_t value); + int branch_to_block_if_not(uint32_t address, jit_value_t value); + int branch_to_return(); + int branch_to_return_if(jit_value_t value); + int branch_to_return_if_not(jit_value_t value); + int call_function(sdb::FunctionSymbol* target_symbol, jit_value_t lr, + bool tail); + + void TraceBranch(uint32_t cia); + int GenerateIndirectionBranch(uint32_t cia, jit_value_t target, + bool lk, bool likely_local); + + jit_value_t LoadStateValue(size_t offset, jit_type_t type, + const char* name = ""); + void StoreStateValue(size_t offset, jit_type_t type, jit_value_t value); + + jit_value_t SetupLocal(jit_type_t type, const char* name); + void FillRegisters(); + void SpillRegisters(); + + jit_value_t xer_value(); + void update_xer_value(jit_value_t value); + void update_xer_with_overflow(jit_value_t value); + void update_xer_with_carry(jit_value_t value); + void update_xer_with_overflow_and_carry(jit_value_t value); + + jit_value_t lr_value(); + void update_lr_value(jit_value_t value); + + jit_value_t ctr_value(); + void update_ctr_value(jit_value_t value); + + jit_value_t cr_value(uint32_t n); + void update_cr_value(uint32_t n, jit_value_t value); + void update_cr_with_cond(uint32_t n, jit_value_t lhs, jit_value_t rhs, + bool is_signed); + + jit_value_t gpr_value(uint32_t n); + void update_gpr_value(uint32_t n, jit_value_t value); + jit_value_t fpr_value(uint32_t n); + void update_fpr_value(uint32_t n, jit_value_t value); + + jit_value_t TouchMemoryAddress(uint32_t cia, jit_value_t addr); + jit_value_t ReadMemory( + uint32_t cia, jit_value_t addr, uint32_t size, bool acquire = false); + void WriteMemory( + uint32_t cia, jit_value_t addr, uint32_t size, jit_value_t value, + bool release = false); + +private: + int MakeUserFunction(); + int MakePresentImportFunction(); + int MakeMissingImportFunction(); + + void GenerateBasicBlocks(); + void GenerateSharedBlocks(); + int PrepareBasicBlock(sdb::FunctionBlock* block); + void GenerateBasicBlock(sdb::FunctionBlock* block); + void SetupLocals(); + + xe_memory_ref memory_; + jit_context_t context_; + jit_type_t fn_signature_; + jit_type_t shim_signature_; + GlobalExports global_exports_; + jit_type_t global_export_signature_2_; + jit_type_t global_export_signature_3_; + jit_type_t global_export_signature_4_; + + sdb::FunctionSymbol* symbol_; + jit_function_t fn_; + sdb::FunctionBlock* fn_block_; + jit_label_t return_block_; + jit_label_t internal_indirection_block_; + jit_label_t external_indirection_block_; + + std::map bbs_; + + // Address of the instruction being generated. + uint32_t cia_; + + ppc::InstrAccessBits access_bits_; + struct { + jit_value_t indirection_target; + jit_value_t indirection_cia; + + jit_value_t xer; + jit_value_t lr; + jit_value_t ctr; + jit_value_t cr[8]; + jit_value_t gpr[32]; + jit_value_t fpr[32]; + } locals_; +}; + + +} // namespace x64 +} // namespace cpu +} // namespace xe + + +#endif // XENIA_CPU_X64_X64_EMITTER_H_ diff --git a/src/xenia/cpu/x64/x64_jit.cc b/src/xenia/cpu/x64/x64_jit.cc new file mode 100644 index 000000000..683e91b46 --- /dev/null +++ b/src/xenia/cpu/x64/x64_jit.cc @@ -0,0 +1,93 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2013 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include +#include +#include + + +using namespace xe; +using namespace xe::cpu; +using namespace xe::cpu::sdb; +using namespace xe::cpu::x64; + + +X64JIT::X64JIT(xe_memory_ref memory, SymbolTable* sym_table) : + JIT(memory, sym_table), + context_(NULL), emitter_(NULL) { +} + +X64JIT::~X64JIT() { + delete emitter_; + if (context_) { + jit_context_destroy(context_); + } +} + +int X64JIT::Setup() { + int result_code = 1; + + // Shared libjit context. + context_ = jit_context_create(); + XEEXPECTNOTNULL(context_); + + // Create the emitter used to generate functions. + emitter_ = new X64Emitter(memory_, context_); + + // Inject global functions/variables/etc. + XEEXPECTZERO(InjectGlobals()); + + result_code = 0; +XECLEANUP: + return result_code; +} + +int X64JIT::InjectGlobals() { + return 0; +} + +int X64JIT::InitModule(ExecModule* module) { + return 0; +} + +int X64JIT::UninitModule(ExecModule* module) { + return 0; +} + +int X64JIT::Execute(xe_ppc_state_t* ppc_state, FunctionSymbol* fn_symbol) { + XELOGCPU("Execute(%.8X): %s...", fn_symbol->start_address, fn_symbol->name()); + + // Check function. + jit_function_t jit_fn = (jit_function_t)fn_symbol->impl_value; + if (!jit_fn) { + // Function hasn't been prepped yet - prep it. + if (emitter_->PrepareFunction(fn_symbol)) { + XELOGCPU("Execute(%.8X): unable to make function %s", + fn_symbol->start_address, fn_symbol->name()); + return 1; + } + jit_fn = (jit_function_t)fn_symbol->impl_value; + XEASSERTNOTNULL(jit_fn); + } + + // Call into the function. This will compile it if needed. + jit_nuint lr = ppc_state->lr; + void* args[] = {&ppc_state, &lr}; + uint64_t return_value; + int apply_result = jit_function_apply(jit_fn, (void**)&args, &return_value); + if (!apply_result) { + XELOGCPU("Execute(%.8X): apply failed with %d", + fn_symbol->start_address, apply_result); + return 1; + } + + return 0; +} diff --git a/src/xenia/cpu/x64/x64_jit.h b/src/xenia/cpu/x64/x64_jit.h new file mode 100644 index 000000000..7eeec5832 --- /dev/null +++ b/src/xenia/cpu/x64/x64_jit.h @@ -0,0 +1,54 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2013 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_CPU_X64_X64_JIT_H_ +#define XENIA_CPU_X64_X64_JIT_H_ + +#include + +#include +#include +#include +#include + +#include + + +namespace xe { +namespace cpu { +namespace x64 { + + +class X64JIT : public JIT { +public: + X64JIT(xe_memory_ref memory, sdb::SymbolTable* sym_table); + virtual ~X64JIT(); + + virtual int Setup(); + + virtual int InitModule(ExecModule* module); + virtual int UninitModule(ExecModule* module); + + virtual int Execute(xe_ppc_state_t* ppc_state, + sdb::FunctionSymbol* fn_symbol); + +protected: + int InjectGlobals(); + + jit_context_t context_; + X64Emitter* emitter_; +}; + + +} // namespace x64 +} // namespace cpu +} // namespace xe + + +#endif // XENIA_CPU_X64_X64_JIT_H_ diff --git a/tools/xenia-run/xenia-run.cc b/tools/xenia-run/xenia-run.cc index f41cd4ebd..f5f1904d2 100644 --- a/tools/xenia-run/xenia-run.cc +++ b/tools/xenia-run/xenia-run.cc @@ -46,7 +46,7 @@ int Run::Setup() { xe_zero_struct(&pal_options, sizeof(pal_options)); XEEXPECTZERO(xe_pal_init(pal_options)); - //backend_ = shared_ptr(new xe::cpu::libjit::LibjitBackend()); + backend_ = shared_ptr(new xe::cpu::x64::X64Backend()); debugger_ = shared_ptr(new Debugger()); diff --git a/tools/xenia-test/xenia-test.cc b/tools/xenia-test/xenia-test.cc index 98bab1d50..c20a5da2e 100644 --- a/tools/xenia-test/xenia-test.cc +++ b/tools/xenia-test/xenia-test.cc @@ -120,7 +120,7 @@ int run_test(string& src_file_path) { memory = xe_memory_create(memory_options); XEEXPECTNOTNULL(memory); - backend_ = shared_ptr(new xe::cpu::libjit::LibjitBackend()); + backend_ = shared_ptr(new xe::cpu::x64::X64Backend()); processor = shared_ptr(new Processor(memory, backend)); XEEXPECTZERO(processor->Setup());