From 5a85263e5f1089f496eeb86540c21ca106324a47 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Mon, 26 May 2014 20:28:21 -0700 Subject: [PATCH] Trying out a new style of JIT pattern matching. --- src/alloy/backend/ivm/ivm_intcode.cc | 42 +- .../x64/lowering/lowering_sequences.cc | 3257 ------------ .../backend/x64/lowering/lowering_table.cc | 71 - .../backend/x64/lowering/lowering_table.h | 58 - src/alloy/backend/x64/lowering/op_utils.inl | 1063 ---- src/alloy/backend/x64/lowering/sources.gypi | 12 - src/alloy/backend/x64/sources.gypi | 9 +- src/alloy/backend/x64/x64_backend.cc | 12 +- src/alloy/backend/x64/x64_backend.h | 5 - src/alloy/backend/x64/x64_emitter.cc | 351 +- src/alloy/backend/x64/x64_emitter.h | 110 +- src/alloy/backend/x64/x64_sequence.inl | 714 +++ src/alloy/backend/x64/x64_sequences.cc | 4488 +++++++++++++++++ .../lowering_sequences.h => x64_sequences.h} | 20 +- .../{lowering/tracers.cc => x64_tracers.cc} | 22 +- .../x64/{lowering/tracers.h => x64_tracers.h} | 17 +- .../passes/constant_propagation_pass.cc | 7 + .../compiler/passes/context_promotion_pass.cc | 16 +- .../passes/control_flow_analysis_pass.cc | 6 - .../passes/data_flow_analysis_pass.cc | 2 - .../passes/register_allocation_pass.cc | 760 +-- .../passes/register_allocation_pass.h | 67 +- src/alloy/compiler/passes/validation_pass.cc | 14 +- src/alloy/core.h | 4 + src/alloy/frontend/ppc/ppc_emit_alu.cc | 54 +- src/alloy/frontend/ppc/ppc_hir_builder.cc | 18 +- src/alloy/hir/block.cc | 39 + src/alloy/hir/block.h | 2 + src/alloy/hir/hir_builder.cc | 32 +- src/alloy/hir/hir_builder.h | 3 +- src/alloy/hir/instr.cc | 13 - src/alloy/hir/instr.h | 25 - src/alloy/hir/opcodes.inl | 204 +- src/alloy/hir/sources.gypi | 1 + src/alloy/hir/value.cc | 20 + src/alloy/hir/value.h | 10 +- third_party/xbyak | 2 +- xenia.gyp | 13 + 38 files changed, 6403 insertions(+), 5160 deletions(-) delete mode 100644 src/alloy/backend/x64/lowering/lowering_sequences.cc delete mode 100644 src/alloy/backend/x64/lowering/lowering_table.cc delete mode 100644 src/alloy/backend/x64/lowering/lowering_table.h delete mode 100644 src/alloy/backend/x64/lowering/op_utils.inl delete mode 100644 src/alloy/backend/x64/lowering/sources.gypi create mode 100644 src/alloy/backend/x64/x64_sequence.inl create mode 100644 src/alloy/backend/x64/x64_sequences.cc rename src/alloy/backend/x64/{lowering/lowering_sequences.h => x64_sequences.h} (59%) rename src/alloy/backend/x64/{lowering/tracers.cc => x64_tracers.cc} (96%) rename src/alloy/backend/x64/{lowering/tracers.h => x64_tracers.h} (89%) create mode 100644 src/alloy/hir/block.cc diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc index 211f466c7..6001cb15b 100644 --- a/src/alloy/backend/ivm/ivm_intcode.cc +++ b/src/alloy/backend/ivm/ivm_intcode.cc @@ -40,10 +40,10 @@ namespace ivm { #define DPRINT #define DFLUSH() -//#define IPRINT if (ics.thread_state->thread_id() == 1) printf -//#define IFLUSH() fflush(stdout) -//#define DPRINT if (ics.thread_state->thread_id() == 1) printf -//#define DFLUSH() fflush(stdout) +#define IPRINT if (ics.thread_state->thread_id() == 1) printf +#define IFLUSH() fflush(stdout) +#define DPRINT if (ics.thread_state->thread_id() == 1) printf +#define DFLUSH() fflush(stdout) #if XE_CPU_BIGENDIAN #define VECB16(v,n) (v.b16[n]) @@ -1364,31 +1364,31 @@ int Translate_LOAD_CLOCK(TranslationContext& ctx, Instr* i) { } uint32_t IntCode_LOAD_LOCAL_I8(IntCodeState& ics, const IntCode* i) { - ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.locals + ics.rf[i->src1_reg].u64)); + ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.locals + ics.rf[i->src1_reg].u32)); return IA_NEXT; } uint32_t IntCode_LOAD_LOCAL_I16(IntCodeState& ics, const IntCode* i) { - ics.rf[i->dest_reg].i16 = *((int16_t*)(ics.locals + ics.rf[i->src1_reg].u64)); + ics.rf[i->dest_reg].i16 = *((int16_t*)(ics.locals + ics.rf[i->src1_reg].u32)); return IA_NEXT; } uint32_t IntCode_LOAD_LOCAL_I32(IntCodeState& ics, const IntCode* i) { - ics.rf[i->dest_reg].i32 = *((int32_t*)(ics.locals + ics.rf[i->src1_reg].u64)); + ics.rf[i->dest_reg].i32 = *((int32_t*)(ics.locals + ics.rf[i->src1_reg].u32)); return IA_NEXT; } uint32_t IntCode_LOAD_LOCAL_I64(IntCodeState& ics, const IntCode* i) { - ics.rf[i->dest_reg].i64 = *((int64_t*)(ics.locals + ics.rf[i->src1_reg].u64)); + ics.rf[i->dest_reg].i64 = *((int64_t*)(ics.locals + ics.rf[i->src1_reg].u32)); return IA_NEXT; } uint32_t IntCode_LOAD_LOCAL_F32(IntCodeState& ics, const IntCode* i) { - ics.rf[i->dest_reg].f32 = *((float*)(ics.locals + ics.rf[i->src1_reg].u64)); + ics.rf[i->dest_reg].f32 = *((float*)(ics.locals + ics.rf[i->src1_reg].u32)); return IA_NEXT; } uint32_t IntCode_LOAD_LOCAL_F64(IntCodeState& ics, const IntCode* i) { - ics.rf[i->dest_reg].f64 = *((double*)(ics.locals + ics.rf[i->src1_reg].u64)); + ics.rf[i->dest_reg].f64 = *((double*)(ics.locals + ics.rf[i->src1_reg].u32)); return IA_NEXT; } uint32_t IntCode_LOAD_LOCAL_V128(IntCodeState& ics, const IntCode* i) { - ics.rf[i->dest_reg].v128 = *((vec128_t*)(ics.locals + ics.rf[i->src1_reg].u64)); + ics.rf[i->dest_reg].v128 = *((vec128_t*)(ics.locals + ics.rf[i->src1_reg].u32)); return IA_NEXT; } int Translate_LOAD_LOCAL(TranslationContext& ctx, Instr* i) { @@ -1405,31 +1405,31 @@ int Translate_LOAD_LOCAL(TranslationContext& ctx, Instr* i) { } uint32_t IntCode_STORE_LOCAL_I8(IntCodeState& ics, const IntCode* i) { - *((int8_t*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i8; + *((int8_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i8; return IA_NEXT; } uint32_t IntCode_STORE_LOCAL_I16(IntCodeState& ics, const IntCode* i) { - *((int16_t*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i16; + *((int16_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i16; return IA_NEXT; } uint32_t IntCode_STORE_LOCAL_I32(IntCodeState& ics, const IntCode* i) { - *((int32_t*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i32; + *((int32_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i32; return IA_NEXT; } uint32_t IntCode_STORE_LOCAL_I64(IntCodeState& ics, const IntCode* i) { - *((int64_t*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].i64; + *((int64_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].i64; return IA_NEXT; } uint32_t IntCode_STORE_LOCAL_F32(IntCodeState& ics, const IntCode* i) { - *((float*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].f32; + *((float*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].f32; return IA_NEXT; } uint32_t IntCode_STORE_LOCAL_F64(IntCodeState& ics, const IntCode* i) { - *((double*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].f64; + *((double*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].f64; return IA_NEXT; } uint32_t IntCode_STORE_LOCAL_V128(IntCodeState& ics, const IntCode* i) { - *((vec128_t*)(ics.locals + ics.rf[i->src1_reg].u64)) = ics.rf[i->src2_reg].v128; + *((vec128_t*)(ics.locals + ics.rf[i->src1_reg].u32)) = ics.rf[i->src2_reg].v128; return IA_NEXT; } int Translate_STORE_LOCAL(TranslationContext& ctx, Instr* i) { @@ -3715,17 +3715,17 @@ int Translate_CNTLZ(TranslationContext& ctx, Instr* i) { uint32_t IntCode_EXTRACT_INT8_V128(IntCodeState& ics, const IntCode* i) { const vec128_t& src1 = ics.rf[i->src1_reg].v128; - ics.rf[i->dest_reg].i8 = VECB16(src1,ics.rf[i->src2_reg].i64); + ics.rf[i->dest_reg].i8 = VECB16(src1,ics.rf[i->src2_reg].i8); return IA_NEXT; } uint32_t IntCode_EXTRACT_INT16_V128(IntCodeState& ics, const IntCode* i) { const vec128_t& src1 = ics.rf[i->src1_reg].v128; - ics.rf[i->dest_reg].i16 = VECS8(src1,ics.rf[i->src2_reg].i64); + ics.rf[i->dest_reg].i16 = VECS8(src1,ics.rf[i->src2_reg].i8); return IA_NEXT; } uint32_t IntCode_EXTRACT_INT32_V128(IntCodeState& ics, const IntCode* i) { const vec128_t& src1 = ics.rf[i->src1_reg].v128; - ics.rf[i->dest_reg].i32 = VECI4(src1,ics.rf[i->src2_reg].i64); + ics.rf[i->dest_reg].i32 = VECI4(src1,ics.rf[i->src2_reg].i8); return IA_NEXT; } int Translate_EXTRACT(TranslationContext& ctx, Instr* i) { diff --git a/src/alloy/backend/x64/lowering/lowering_sequences.cc b/src/alloy/backend/x64/lowering/lowering_sequences.cc deleted file mode 100644 index 5ab38f41f..000000000 --- a/src/alloy/backend/x64/lowering/lowering_sequences.cc +++ /dev/null @@ -1,3257 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// TODO(benvanik): reimplement packing functions -#include - -using namespace alloy; -using namespace alloy::backend::x64; -using namespace alloy::backend::x64::lowering; -using namespace alloy::hir; -using namespace alloy::runtime; - -using namespace Xbyak; - -namespace { - -// Make loads/stores to ints check to see if they are doing a register value. -// This is slow, and with proper constant propagation we may be able to always -// avoid it. -// TODO(benvanik): make a compile time flag? -#define DYNAMIC_REGISTER_ACCESS_CHECK 1 - -#define UNIMPLEMENTED_SEQ() __debugbreak() -#define ASSERT_INVALID_TYPE() XEASSERTALWAYS() - -#define ITRACE 1 -#define DTRACE 1 - -#define SHUFPS_SWAP_DWORDS 0x1B - - -// Major templating foo lives in here. -#include - - -enum XmmConst { - XMMZero = 0, - XMMOne = 1, - XMMNegativeOne = 2, - XMMMaskX16Y16 = 3, - XMMFlipX16Y16 = 4, - XMMFixX16Y16 = 5, - XMMNormalizeX16Y16 = 6, - XMM3301 = 7, - XMMSignMaskPS = 8, - XMMSignMaskPD = 9, - XMMByteSwapMask = 10, - XMMPermuteControl15 = 11, - XMMUnpackD3DCOLOR = 12, - XMMOneOver255 = 13, -}; -static const vec128_t xmm_consts[] = { - /* XMMZero */ vec128f(0.0f, 0.0f, 0.0f, 0.0f), - /* XMMOne */ vec128f(1.0f, 1.0f, 1.0f, 1.0f), - /* XMMNegativeOne */ vec128f(-1.0f, -1.0f, -1.0f, -1.0f), - /* XMMMaskX16Y16 */ vec128i(0x0000FFFF, 0xFFFF0000, 0x00000000, 0x00000000), - /* XMMFlipX16Y16 */ vec128i(0x00008000, 0x00000000, 0x00000000, 0x00000000), - /* XMMFixX16Y16 */ vec128f(-32768.0f, 0.0f, 0.0f, 0.0f), - /* XMMNormalizeX16Y16 */ vec128f(1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f), - /* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f), - /* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u), - /* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u), - /* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu), - /* XMMPermuteControl15 */ vec128b(15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15), - /* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF02, 0xFFFFFF01, 0xFFFFFF00, 0xFFFFFF02), - /* XMMOneOver255 */ vec128f(1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f), -}; -// Use consts by first loading the base register then accessing memory: -// e.mov(e.rax, XMMCONSTBASE) -// e.andps(reg, XMMCONST(XMM3303)) -// TODO(benvanik): find a way to do this without the base register. -#define XMMCONSTBASE (uint64_t)&xmm_consts[0] -#define XMMCONST(base_reg, name) e.ptr[base_reg + name * 16] - -static vec128_t lvsl_table[17] = { - vec128b( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), - vec128b( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), - vec128b( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17), - vec128b( 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18), - vec128b( 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19), - vec128b( 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20), - vec128b( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21), - vec128b( 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22), - vec128b( 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23), - vec128b( 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24), - vec128b(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25), - vec128b(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), - vec128b(12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27), - vec128b(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28), - vec128b(14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29), - vec128b(15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30), - vec128b(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31), -}; -static vec128_t lvsr_table[17] = { - vec128b(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31), - vec128b(15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30), - vec128b(14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29), - vec128b(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28), - vec128b(12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27), - vec128b(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), - vec128b(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25), - vec128b( 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24), - vec128b( 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23), - vec128b( 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22), - vec128b( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21), - vec128b( 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20), - vec128b( 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19), - vec128b( 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18), - vec128b( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17), - vec128b( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), - vec128b( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), -}; -static vec128_t extract_table_32[4] = { - vec128b( 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), - vec128b( 7, 6, 5, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), - vec128b(11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), - vec128b(15, 14, 13, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), -}; - -// A note about vectors: -// Alloy represents vectors as xyzw pairs, with indices 0123. -// XMM registers are xyzw pairs with indices 3210, making them more like wzyx. -// This makes things somewhat confusing. It'd be nice to just shuffle the -// registers around on load/store, however certain operations require that -// data be in the right offset. -// Basically, this identity must hold: -// shuffle(vec, b00011011) -> {x,y,z,w} => {x,y,z,w} -// All indices and operations must respect that. -// -// Memory (big endian): -// [00 01 02 03] [04 05 06 07] [08 09 0A 0B] [0C 0D 0E 0F] (x, y, z, w) -// load into xmm register: -// [0F 0E 0D 0C] [0B 0A 09 08] [07 06 05 04] [03 02 01 00] (w, z, y, x) - -void Dummy() { - // -} - -void UndefinedCallExtern(void* raw_context, FunctionInfo* symbol_info) { - XELOGW("undefined extern call to %.8X %s", - symbol_info->address(), - symbol_info->name()); -} - -uint64_t DynamicRegisterLoad(void* raw_context, uint32_t address) { - auto thread_state = *((ThreadState**)raw_context); - auto cbs = thread_state->runtime()->access_callbacks(); - while (cbs) { - if (cbs->handles(cbs->context, address)) { - return cbs->read(cbs->context, address); - } - } - return 0; -} - -void DynamicRegisterStore(void* raw_context, uint32_t address, uint64_t value) { - auto thread_state = *((ThreadState**)raw_context); - auto cbs = thread_state->runtime()->access_callbacks(); - while (cbs) { - if (cbs->handles(cbs->context, address)) { - cbs->write(cbs->context, address, value); - return; - } - } -} - -void Unpack_FLOAT16_2(void* raw_context, __m128& v) { - uint32_t src = v.m128_i32[3]; - v.m128_f32[0] = DirectX::PackedVector::XMConvertHalfToFloat((uint16_t)src); - v.m128_f32[1] = DirectX::PackedVector::XMConvertHalfToFloat((uint16_t)(src >> 16)); - v.m128_f32[2] = 0.0f; - v.m128_f32[3] = 1.0f; -} - -uint64_t LoadClock(void* raw_context) { - LARGE_INTEGER counter; - uint64_t time = 0; - if (QueryPerformanceCounter(&counter)) { - time = counter.QuadPart; - } - return time; -} - -// TODO(benvanik): fancy stuff. -void* ResolveFunctionSymbol(void* raw_context, FunctionInfo* symbol_info) { - // TODO(benvanik): generate this thunk at runtime? or a shim? - auto thread_state = *((ThreadState**)raw_context); - - Function* fn = NULL; - thread_state->runtime()->ResolveFunction(symbol_info->address(), &fn); - XEASSERTNOTNULL(fn); - auto x64_fn = (X64Function*)fn; - return x64_fn->machine_code(); -} -void* ResolveFunctionAddress(void* raw_context, uint32_t target_address) { - // TODO(benvanik): generate this thunk at runtime? or a shim? - auto thread_state = *((ThreadState**)raw_context); - - Function* fn = NULL; - thread_state->runtime()->ResolveFunction(target_address, &fn); - XEASSERTNOTNULL(fn); - auto x64_fn = (X64Function*)fn; - return x64_fn->machine_code(); -} -void TransitionToHost(X64Emitter& e) { - // Expects: - // rcx = context - // rdx = target host function - // r8 = arg0 - // r9 = arg1 - // Returns: - // rax = host return - auto thunk = e.backend()->guest_to_host_thunk(); - e.mov(e.rax, (uint64_t)thunk); - e.call(e.rax); -} -void IssueCall(X64Emitter& e, FunctionInfo* symbol_info, uint32_t flags) { - auto fn = (X64Function*)symbol_info->function(); - // Resolve address to the function to call and store in rax. - // TODO(benvanik): caching/etc. For now this makes debugging easier. - if (fn) { - e.mov(e.rax, (uint64_t)fn->machine_code()); - } else { - e.mov(e.rdx, (uint64_t)symbol_info); - CallNative(e, ResolveFunctionSymbol); - } - - // Actually jump/call to rax. - if (flags & CALL_TAIL) { - // Pass the callers return address over. - e.mov(e.rdx, e.qword[e.rsp + StackLayout::GUEST_RET_ADDR]); - - e.add(e.rsp, (uint32_t)e.stack_size()); - e.jmp(e.rax); - } else { - // Return address is from the previous SET_RETURN_ADDRESS. - e.mov(e.rdx, e.qword[e.rsp + StackLayout::GUEST_CALL_RET_ADDR]); - - e.call(e.rax); - } -} -void IssueCallIndirect(X64Emitter& e, Value* target, uint32_t flags) { - Reg64 r; - e.BeginOp(target, r, 0); - - // Check if return. - if (flags & CALL_POSSIBLE_RETURN) { - e.cmp(r.cvt32(), e.dword[e.rsp + StackLayout::GUEST_RET_ADDR]); - e.je("epilog", CodeGenerator::T_NEAR); - } - - // Resolve address to the function to call and store in rax. - // TODO(benvanik): caching/etc. For now this makes debugging easier. - if (r != e.rdx) { - e.mov(e.rdx, r); - } - e.EndOp(r); - CallNative(e, ResolveFunctionAddress); - - // Actually jump/call to rax. - if (flags & CALL_TAIL) { - // Pass the callers return address over. - e.mov(e.rdx, e.qword[e.rsp + StackLayout::GUEST_RET_ADDR]); - - e.add(e.rsp, (uint32_t)e.stack_size()); - e.jmp(e.rax); - } else { - // Return address is from the previous SET_RETURN_ADDRESS. - e.mov(e.rdx, e.qword[e.rsp + StackLayout::GUEST_CALL_RET_ADDR]); - - e.call(e.rax); - } -} - -} // namespace - - -void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { -// -------------------------------------------------------------------------- -// General -// -------------------------------------------------------------------------- - -table->AddSequence(OPCODE_COMMENT, [](X64Emitter& e, Instr*& i) { -#if ITRACE - // TODO(benvanik): pass through. - // TODO(benvanik): don't just leak this memory. - auto str = (const char*)i->src1.offset; - auto str_copy = xestrdupa(str); - e.mov(e.rdx, (uint64_t)str_copy); - CallNative(e, TraceString); -#endif // ITRACE - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_NOP, [](X64Emitter& e, Instr*& i) { - // If we got this, chances are we want it. - e.nop(); - i = e.Advance(i); - return true; -}); - -// -------------------------------------------------------------------------- -// Debugging -// -------------------------------------------------------------------------- - -table->AddSequence(OPCODE_SOURCE_OFFSET, [](X64Emitter& e, Instr*& i) { -#if XE_DEBUG - e.nop(); - e.nop(); - e.mov(e.eax, (uint32_t)i->src1.offset); - e.nop(); - e.nop(); -#endif // XE_DEBUG - - e.MarkSourceOffset(i); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_DEBUG_BREAK, [](X64Emitter& e, Instr*& i) { - // TODO(benvanik): insert a call to the debug break function to let the - // debugger know. - e.db(0xCC); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_DEBUG_BREAK_TRUE, [](X64Emitter& e, Instr*& i) { - e.inLocalLabel(); - CheckBoolean(e, i->src1.value); - e.jz(".x", e.T_SHORT); - // TODO(benvanik): insert a call to the debug break function to let the - // debugger know. - e.db(0xCC); - e.L(".x"); - e.outLocalLabel(); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_TRAP, [](X64Emitter& e, Instr*& i) { - // TODO(benvanik): insert a call to the trap function to let the - // debugger know. - e.db(0xCC); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_TRAP_TRUE, [](X64Emitter& e, Instr*& i) { - e.inLocalLabel(); - CheckBoolean(e, i->src1.value); - e.jz(".x", e.T_SHORT); - // TODO(benvanik): insert a call to the trap function to let the - // debugger know. - e.db(0xCC); - e.L(".x"); - e.outLocalLabel(); - i = e.Advance(i); - return true; -}); - -// -------------------------------------------------------------------------- -// Calls -// -------------------------------------------------------------------------- - -table->AddSequence(OPCODE_CALL, [](X64Emitter& e, Instr*& i) { - IssueCall(e, i->src1.symbol_info, i->flags); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_CALL_TRUE, [](X64Emitter& e, Instr*& i) { - e.inLocalLabel(); - CheckBoolean(e, i->src1.value); - e.jz(".x", e.T_SHORT); - IssueCall(e, i->src2.symbol_info, i->flags); - e.L(".x"); - e.outLocalLabel(); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_CALL_INDIRECT, [](X64Emitter& e, Instr*& i) { - IssueCallIndirect(e, i->src1.value, i->flags); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_CALL_INDIRECT_TRUE, [](X64Emitter& e, Instr*& i) { - e.inLocalLabel(); - CheckBoolean(e, i->src1.value); - e.jz(".x", e.T_SHORT); - IssueCallIndirect(e, i->src2.value, i->flags); - e.L(".x"); - e.outLocalLabel(); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_CALL_EXTERN, [](X64Emitter& e, Instr*& i) { - auto symbol_info = i->src1.symbol_info; - XEASSERT(symbol_info->behavior() == FunctionInfo::BEHAVIOR_EXTERN); - if (!symbol_info->extern_handler()) { - e.mov(e.rdx, (uint64_t)symbol_info); - CallNative(e, UndefinedCallExtern); - } else { - // rdx = target host function - // r8 = arg0 - // r9 = arg1 - e.mov(e.rdx, (uint64_t)symbol_info->extern_handler()); - e.mov(e.r8, (uint64_t)symbol_info->extern_arg0()); - e.mov(e.r9, (uint64_t)symbol_info->extern_arg1()); - TransitionToHost(e); - ReloadRDX(e); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_RETURN, [](X64Emitter& e, Instr*& i) { - // If this is the last instruction in the last block, just let us - // fall through. - if (i->next || i->block->next) { - e.jmp("epilog", CodeGenerator::T_NEAR); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_RETURN_TRUE, [](X64Emitter& e, Instr*& i) { - CheckBoolean(e, i->src1.value); - e.jnz("epilog", CodeGenerator::T_NEAR); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_SET_RETURN_ADDRESS, [](X64Emitter& e, Instr*& i) { - XEASSERT(i->src1.value->IsConstant()); - e.mov(e.qword[e.rsp + StackLayout::GUEST_CALL_RET_ADDR], - i->src1.value->AsUint64()); - i = e.Advance(i); - return true; -}); - -// -------------------------------------------------------------------------- -// Branches -// -------------------------------------------------------------------------- - -table->AddSequence(OPCODE_BRANCH, [](X64Emitter& e, Instr*& i) { - auto target = i->src1.label; - e.jmp(target->name, e.T_NEAR); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_BRANCH_TRUE, [](X64Emitter& e, Instr*& i) { - CheckBoolean(e, i->src1.value); - auto target = i->src2.label; - e.jnz(target->name, e.T_NEAR); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_BRANCH_FALSE, [](X64Emitter& e, Instr*& i) { - CheckBoolean(e, i->src1.value); - auto target = i->src2.label; - e.jz(target->name, e.T_NEAR); - i = e.Advance(i); - return true; -}); - -// -------------------------------------------------------------------------- -// Types -// -------------------------------------------------------------------------- - -table->AddSequence(OPCODE_ASSIGN, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - IntUnaryOp( - e, i, - [](X64Emitter& e, Instr& i, const Reg& dest_src) { - // nop - the mov will have happened. - }); - } else if (IsFloatType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); - } else if (IsVecType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_CAST, [](X64Emitter& e, Instr*& i) { - if (i->dest->type == INT32_TYPE) { - if (i->src1.value->type == FLOAT32_TYPE) { - Reg32 dest; - Xmm src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.vmovd(dest, src); - e.EndOp(dest, src); - } else { - UNIMPLEMENTED_SEQ(); - } - } else if (i->dest->type == INT64_TYPE) { - if (i->src1.value->type == FLOAT64_TYPE) { - Reg64 dest; - Xmm src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.vmovq(dest, src); - e.EndOp(dest, src); - } else { - UNIMPLEMENTED_SEQ(); - } - } else if (i->dest->type == FLOAT32_TYPE) { - if (i->src1.value->type == INT32_TYPE) { - Xmm dest; - Reg32 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.vmovd(dest, src); - e.EndOp(dest, src); - } else { - UNIMPLEMENTED_SEQ(); - } - } else if (i->dest->type == FLOAT64_TYPE) { - if (i->src1.value->type == INT64_TYPE) { - Xmm dest; - Reg64 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.vmovq(dest, src); - e.EndOp(dest, src); - } else { - UNIMPLEMENTED_SEQ(); - } - } else if (IsVecType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_ZERO_EXTEND, [](X64Emitter& e, Instr*& i) { - if (i->Match(SIG_TYPE_I16, SIG_TYPE_I8)) { - Reg16 dest; - Reg8 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.movzx(dest, src); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_I8)) { - Reg32 dest; - Reg8 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.movzx(dest, src); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_I16)) { - Reg32 dest; - Reg16 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.movzx(dest, src); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_I8)) { - Reg64 dest; - Reg8 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.movzx(dest, src); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_I16)) { - Reg64 dest; - Reg16 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.movzx(dest, src); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_I32)) { - Reg64 dest; - Reg32 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.mov(dest.cvt32(), src.cvt32()); - e.EndOp(dest, src); - } else { - UNIMPLEMENTED_SEQ(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_SIGN_EXTEND, [](X64Emitter& e, Instr*& i) { - if (i->Match(SIG_TYPE_I16, SIG_TYPE_I8)) { - Reg16 dest; - Reg8 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.movsx(dest, src); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_I8)) { - Reg32 dest; - Reg8 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.movsx(dest, src); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_I16)) { - Reg32 dest; - Reg16 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.movsx(dest, src); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_I8)) { - Reg64 dest; - Reg8 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.movsx(dest, src); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_I16)) { - Reg64 dest; - Reg16 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.movsx(dest, src); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_I32)) { - Reg64 dest; - Reg32 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.movsxd(dest, src); - e.EndOp(dest, src); - } else { - UNIMPLEMENTED_SEQ(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_TRUNCATE, [](X64Emitter& e, Instr*& i) { - if (i->Match(SIG_TYPE_I8, SIG_TYPE_I16)) { - Reg8 dest; - Reg16 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.mov(dest, src.cvt8()); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_I8, SIG_TYPE_I32)) { - Reg8 dest; - Reg16 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.mov(dest, src.cvt8()); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_I8, SIG_TYPE_I64)) { - Reg8 dest; - Reg64 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.mov(dest, src.cvt8()); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_I16, SIG_TYPE_I32)) { - Reg16 dest; - Reg32 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.mov(dest, src.cvt16()); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_I16, SIG_TYPE_I64)) { - Reg16 dest; - Reg64 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.mov(dest, src.cvt16()); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_I64)) { - Reg32 dest; - Reg64 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.mov(dest, src.cvt32()); - e.EndOp(dest, src); - } else { - UNIMPLEMENTED_SEQ(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_CONVERT, [](X64Emitter& e, Instr*& i) { - if (i->Match(SIG_TYPE_I32, SIG_TYPE_F32)) { - Reg32 dest; - Xmm src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - // TODO(benvanik): additional checks for saturation/etc? cvtt* (trunc?) - e.cvttss2si(dest, src); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_F64)) { - Reg32 dest; - Xmm src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - // TODO(benvanik): additional checks for saturation/etc? cvtt* (trunc?) - e.cvtsd2ss(e.xmm0, src); - e.cvttss2si(dest, e.xmm0); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_F64)) { - Reg64 dest; - Xmm src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - // TODO(benvanik): additional checks for saturation/etc? cvtt* (trunc?) - e.cvttsd2si(dest, src); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_F32, SIG_TYPE_I32)) { - Xmm dest; - Reg32 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - // TODO(benvanik): additional checks for saturation/etc? - e.cvtsi2ss(dest, src); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_F32, SIG_TYPE_F64)) { - Xmm dest, src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - // TODO(benvanik): additional checks for saturation/etc? - e.cvtsd2ss(dest, src); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_F64, SIG_TYPE_I64)) { - Xmm dest; - Reg64 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - // TODO(benvanik): additional checks for saturation/etc? - e.cvtsi2sd(dest, src); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_F64, SIG_TYPE_F32)) { - Xmm dest, src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.cvtss2sd(dest, src); - e.EndOp(dest, src); - } else { - UNIMPLEMENTED_SEQ(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_ROUND, [](X64Emitter& e, Instr*& i) { - // flags = ROUND_TO_* - if (IsFloatType(i->dest->type)) { - XmmUnaryOp(e, i, 0, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { - if (i.src1.value->type == FLOAT32_TYPE) { - switch (i.flags) { - case ROUND_TO_ZERO: - e.roundss(dest, src, B00000011); - break; - case ROUND_TO_NEAREST: - e.roundss(dest, src, B00000000); - break; - case ROUND_TO_MINUS_INFINITY: - e.roundss(dest, src, B00000001); - break; - case ROUND_TO_POSITIVE_INFINITY: - e.roundss(dest, src, B00000010); - break; - } - } else { - switch (i.flags) { - case ROUND_TO_ZERO: - e.roundsd(dest, src, B00000011); - break; - case ROUND_TO_NEAREST: - e.roundsd(dest, src, B00000000); - break; - case ROUND_TO_MINUS_INFINITY: - e.roundsd(dest, src, B00000001); - break; - case ROUND_TO_POSITIVE_INFINITY: - e.roundsd(dest, src, B00000010); - break; - } - } - }); - } else if (IsVecType(i->dest->type)) { - XmmUnaryOp(e, i, 0, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { - switch (i.flags) { - case ROUND_TO_ZERO: - e.roundps(dest, src, B00000011); - break; - case ROUND_TO_NEAREST: - e.roundps(dest, src, B00000000); - break; - case ROUND_TO_MINUS_INFINITY: - e.roundps(dest, src, B00000001); - break; - case ROUND_TO_POSITIVE_INFINITY: - e.roundps(dest, src, B00000010); - break; - } - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_VECTOR_CONVERT_I2F, [](X64Emitter& e, Instr*& i) { - // flags = ARITHMETIC_UNSIGNED - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { - // TODO(benvanik): are these really the same? VC++ thinks so. - if (i.flags & ARITHMETIC_UNSIGNED) { - e.cvtdq2ps(dest, src); - } else { - e.cvtdq2ps(dest, src); - } - }); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_VECTOR_CONVERT_F2I, [](X64Emitter& e, Instr*& i) { - // flags = ARITHMETIC_SATURATE | ARITHMETIC_UNSIGNED - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { - // TODO(benvanik): are these really the same? VC++ thinks so. - if (i.flags & ARITHMETIC_UNSIGNED) { - e.cvttps2dq(dest, src); - } else { - e.cvttps2dq(dest, src); - } - if (i.flags & ARITHMETIC_SATURATE) { - UNIMPLEMENTED_SEQ(); - } - }); - i = e.Advance(i); - return true; -}); - -// -------------------------------------------------------------------------- -// Constants -// -------------------------------------------------------------------------- - -// specials for zeroing/etc (xor/etc) - -table->AddSequence(OPCODE_LOAD_VECTOR_SHL, [](X64Emitter& e, Instr*& i) { - XEASSERT(i->dest->type == VEC128_TYPE); - if (i->src1.value->IsConstant()) { - Xmm dest; - e.BeginOp(i->dest, dest, REG_DEST); - auto sh = MIN(16, i->src1.value->AsUint32()); - e.mov(e.rax, (uintptr_t)&lvsl_table[sh]); - e.movaps(dest, e.ptr[e.rax]); - e.EndOp(dest); - } else { - Xmm dest; - Reg8 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - // TODO(benvanik): probably a way to do this with addressing. - e.mov(TEMP_REG, 16); - e.movzx(e.rax, src); - e.cmp(src, 16); - e.cmovb(TEMP_REG, e.rax); - e.shl(TEMP_REG, 4); - e.mov(e.rax, (uintptr_t)lvsl_table); - e.movaps(dest, e.ptr[e.rax + TEMP_REG]); - e.EndOp(dest, src); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_LOAD_VECTOR_SHR, [](X64Emitter& e, Instr*& i) { - XEASSERT(i->dest->type == VEC128_TYPE); - if (i->src1.value->IsConstant()) { - Xmm dest; - e.BeginOp(i->dest, dest, REG_DEST); - auto sh = MIN(16, i->src1.value->AsUint32()); - e.mov(e.rax, (uintptr_t)&lvsr_table[sh]); - e.movaps(dest, e.ptr[e.rax]); - e.EndOp(dest); - } else { - Xmm dest; - Reg8 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - // TODO(benvanik): probably a way to do this with addressing. - e.mov(TEMP_REG, 16); - e.movzx(e.rax, src); - e.cmp(src, 16); - e.cmovb(TEMP_REG, e.rax); - e.shl(TEMP_REG, 4); - e.mov(e.rax, (uintptr_t)lvsr_table); - e.movaps(dest, e.ptr[e.rax + TEMP_REG]); - e.EndOp(dest, src); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_LOAD_CLOCK, [](X64Emitter& e, Instr*& i) { - // It'd be cool to call QueryPerformanceCounter directly, but w/e. - CallNative(e, LoadClock); - Reg64 dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.mov(dest, e.rax); - e.EndOp(dest); - i = e.Advance(i); - return true; -}); - -// -------------------------------------------------------------------------- -// Stack Locals -// -------------------------------------------------------------------------- - -table->AddSequence(OPCODE_LOAD_LOCAL, [](X64Emitter& e, Instr*& i) { - auto addr = e.rsp + i->src1.value->AsUint32(); - if (i->Match(SIG_TYPE_I8, SIG_TYPE_IGNORE)) { - Reg8 dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.mov(dest, e.byte[addr]); - e.EndOp(dest); - } else if (i->Match(SIG_TYPE_I16, SIG_TYPE_IGNORE)) { - Reg16 dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.mov(dest, e.word[addr]); - e.EndOp(dest); - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_IGNORE)) { - Reg32 dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.mov(dest, e.dword[addr]); - e.EndOp(dest); - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_IGNORE)) { - Reg64 dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.mov(dest, e.qword[addr]); - e.EndOp(dest); - } else if (i->Match(SIG_TYPE_F32, SIG_TYPE_IGNORE)) { - Xmm dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.movss(dest, e.dword[addr]); - e.EndOp(dest); - } else if (i->Match(SIG_TYPE_F64, SIG_TYPE_IGNORE)) { - Xmm dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.movsd(dest, e.qword[addr]); - e.EndOp(dest); - } else if (i->Match(SIG_TYPE_V128, SIG_TYPE_IGNORE)) { - Xmm dest; - e.BeginOp(i->dest, dest, REG_DEST); - // NOTE: we always know we are aligned. - e.movaps(dest, e.ptr[addr]); - e.EndOp(dest); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_STORE_LOCAL, [](X64Emitter& e, Instr*& i) { - auto addr = e.rsp + i->src1.value->AsUint32(); - if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I8)) { - Reg8 src; - e.BeginOp(i->src2.value, src, 0); - e.mov(e.byte[addr], src); - e.EndOp(src); - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I8C)) { - e.mov(e.byte[addr], i->src2.value->constant.i8); - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I16)) { - Reg16 src; - e.BeginOp(i->src2.value, src, 0); - e.mov(e.word[addr], src); - e.EndOp(src); - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I16C)) { - e.mov(e.word[addr], i->src2.value->constant.i16); - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I32)) { - Reg32 src; - e.BeginOp(i->src2.value, src, 0); - e.mov(e.dword[addr], src); - e.EndOp(src); - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I32C)) { - e.mov(e.dword[addr], i->src2.value->constant.i32); - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I64)) { - Reg64 src; - e.BeginOp(i->src2.value, src, 0); - e.mov(e.qword[addr], src); - e.EndOp(src); - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I64C)) { - MovMem64(e, addr, i->src2.value->constant.i64); - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32)) { - Xmm src; - e.BeginOp(i->src2.value, src, 0); - e.movss(e.dword[addr], src); - e.EndOp(src); - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32C)) { - e.mov(e.dword[addr], i->src2.value->constant.i32); - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64)) { - Xmm src; - e.BeginOp(i->src2.value, src, 0); - e.movsd(e.qword[addr], src); - e.EndOp(src); - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64C)) { - MovMem64(e, addr, i->src2.value->constant.i64); - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128)) { - Xmm src; - e.BeginOp(i->src2.value, src, 0); - // NOTE: we always know we are aligned. - e.movaps(e.ptr[addr], src); - e.EndOp(src); - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128C)) { - // TODO(benvanik): check zero - // TODO(benvanik): correct order? - MovMem64(e, addr, i->src2.value->constant.v128.low); - MovMem64(e, addr + 8, i->src2.value->constant.v128.high); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -// -------------------------------------------------------------------------- -// Context -// -------------------------------------------------------------------------- - -table->AddSequence(OPCODE_LOAD_CONTEXT, [](X64Emitter& e, Instr*& i) { - auto addr = e.rcx + i->src1.offset; - if (i->Match(SIG_TYPE_I8, SIG_TYPE_IGNORE)) { - Reg8 dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.mov(dest, e.byte[addr]); - e.EndOp(dest); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.mov(e.r8b, dest); - CallNative(e, TraceContextLoadI8); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_I16, SIG_TYPE_IGNORE)) { - Reg16 dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.mov(dest, e.word[addr]); - e.EndOp(dest); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.mov(e.r8w, dest); - CallNative(e, TraceContextLoadI16); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_IGNORE)) { - Reg32 dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.mov(dest, e.dword[addr]); - e.EndOp(dest); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.mov(e.r8d, dest); - CallNative(e, TraceContextLoadI32); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_IGNORE)) { - Reg64 dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.mov(dest, e.qword[addr]); - e.EndOp(dest); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.mov(e.r8, dest); - CallNative(e, TraceContextLoadI64); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_F32, SIG_TYPE_IGNORE)) { - Xmm dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.movss(dest, e.dword[addr]); - e.EndOp(dest); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.lea(e.r8, Stash(e, dest)); - CallNative(e, TraceContextLoadF32); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_F64, SIG_TYPE_IGNORE)) { - Xmm dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.movsd(dest, e.qword[addr]); - e.EndOp(dest); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.lea(e.r8, Stash(e, dest)); - CallNative(e, TraceContextLoadF64); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_V128, SIG_TYPE_IGNORE)) { - Xmm dest; - e.BeginOp(i->dest, dest, REG_DEST); - // NOTE: we always know we are aligned. - e.movaps(dest, e.ptr[addr]); - e.EndOp(dest); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.lea(e.r8, Stash(e, dest)); - CallNative(e, TraceContextLoadV128); -#endif // DTRACE - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_STORE_CONTEXT, [](X64Emitter& e, Instr*& i) { - auto addr = e.rcx + i->src1.offset; - if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I8)) { - Reg8 src; - e.BeginOp(i->src2.value, src, 0); - e.mov(e.byte[addr], src); - e.EndOp(src); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.mov(e.r8b, src); - CallNative(e, TraceContextStoreI8); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I8C)) { - e.mov(e.byte[addr], i->src2.value->constant.i8); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.mov(e.r8b, i->src2.value->constant.i8); - CallNative(e, TraceContextStoreI8); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I16)) { - Reg16 src; - e.BeginOp(i->src2.value, src, 0); - e.mov(e.word[addr], src); - e.EndOp(src); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.mov(e.r8w, src); - CallNative(e, TraceContextStoreI16); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I16C)) { - e.mov(e.word[addr], i->src2.value->constant.i16); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.mov(e.r8w, i->src2.value->constant.i16); - CallNative(e, TraceContextStoreI16); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I32)) { - Reg32 src; - e.BeginOp(i->src2.value, src, 0); - e.mov(e.dword[addr], src); - e.EndOp(src); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.mov(e.r8d, src); - CallNative(e, TraceContextStoreI32); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I32C)) { - e.mov(e.dword[addr], i->src2.value->constant.i32); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.mov(e.r8d, i->src2.value->constant.i32); - CallNative(e, TraceContextStoreI32); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I64)) { - Reg64 src; - e.BeginOp(i->src2.value, src, 0); - e.mov(e.qword[addr], src); - e.EndOp(src); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.mov(e.r8, src); - CallNative(e, TraceContextStoreI64); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I64C)) { - MovMem64(e, addr, i->src2.value->constant.i64); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.mov(e.r8, i->src2.value->constant.i64); - CallNative(e, TraceContextStoreI64); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32)) { - Xmm src; - e.BeginOp(i->src2.value, src, 0); - e.movss(e.dword[addr], src); - e.EndOp(src); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.lea(e.r8, Stash(e, src)); - CallNative(e, TraceContextStoreF32); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32C)) { - e.mov(e.dword[addr], i->src2.value->constant.i32); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.mov(e.eax, i->src2.value->constant.i32); - e.vmovd(e.xmm0, e.eax); - e.lea(e.r8, Stash(e, e.xmm0)); - CallNative(e, TraceContextStoreF32); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64)) { - Xmm src; - e.BeginOp(i->src2.value, src, 0); - e.movsd(e.qword[addr], src); - e.EndOp(src); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.lea(e.r8, Stash(e, src)); - CallNative(e, TraceContextStoreF64); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64C)) { - MovMem64(e, addr, i->src2.value->constant.i64); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.mov(e.rax, i->src2.value->constant.i64); - e.vmovq(e.xmm0, e.rax); - e.lea(e.r8, Stash(e, e.xmm0)); - CallNative(e, TraceContextStoreF64); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128)) { - Xmm src; - e.BeginOp(i->src2.value, src, 0); - // NOTE: we always know we are aligned. - e.movaps(e.ptr[addr], src); - e.EndOp(src); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.lea(e.r8, Stash(e, src)); - CallNative(e, TraceContextStoreV128); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128C)) { - // TODO(benvanik): check zero - // TODO(benvanik): correct order? - MovMem64(e, addr, i->src2.value->constant.v128.low); - MovMem64(e, addr + 8, i->src2.value->constant.v128.high); -#if DTRACE - e.mov(e.rdx, i->src1.offset); - e.lea(e.r8, e.ptr[addr]); - CallNative(e, TraceContextStoreV128); -#endif // DTRACE - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -// -------------------------------------------------------------------------- -// Memory -// -------------------------------------------------------------------------- - -table->AddSequence(OPCODE_LOAD, [](X64Emitter& e, Instr*& i) { - // If this is a constant address load, check to see if it's in a register - // range. We'll also probably want a dynamic check for unverified loads. - // So far, most games use constants. - if (i->src1.value->IsConstant()) { - uint64_t address = i->src1.value->AsUint64(); - auto cbs = e.runtime()->access_callbacks(); - while (cbs) { - if (cbs->handles(cbs->context, address)) { - // Eh, hacking lambdas. - i->src3.offset = (uint64_t)cbs; - IntUnaryOp(e, i, [](X64Emitter& e, Instr& i, const Reg& dest_src) { - auto cbs = (RegisterAccessCallbacks*)i.src3.offset; - e.mov(e.rcx, (uint64_t)cbs->context); - e.mov(e.rdx, i.src1.value->AsUint64()); - CallNative(e, cbs->read); - switch (i.dest->type) { - case INT8_TYPE: - break; - case INT16_TYPE: - e.xchg(e.al, e.ah); - break; - case INT32_TYPE: - e.bswap(e.eax); - break; - case INT64_TYPE: - e.bswap(e.rax); - break; - default: ASSERT_INVALID_TYPE(); break; - } - e.mov(dest_src, e.rax); - }); - i = e.Advance(i); - return true; - } - cbs = cbs->next; - } - } - - // mov reg, [membase + address.32] - if (i->src1.value->IsConstant()) { - e.mov(e.eax, i->src1.value->AsUint32()); - } else { - Reg64 addr_off; - e.BeginOp(i->src1.value, addr_off, 0); - e.mov(e.eax, addr_off.cvt32()); // trunc to 32bits - e.EndOp(addr_off); - } - auto addr = e.rdx + e.rax; - -#if DYNAMIC_REGISTER_ACCESS_CHECK - e.inLocalLabel(); - // if ((address & 0xFF000000) == 0x7F000000) do check; - e.lea(e.r8d, e.ptr[addr]); - e.and(e.r8d, 0xFF000000); - e.cmp(e.r8d, 0x7F000000); - e.jne(".normal_addr"); - if (IsIntType(i->dest->type)) { - e.mov(e.rdx, e.rax); - CallNative(e, DynamicRegisterLoad); - Reg64 dyn_dest; - e.BeginOp(i->dest, dyn_dest, REG_DEST); - switch (i->dest->type) { - case INT8_TYPE: - e.movzx(dyn_dest, e.al); - break; - case INT16_TYPE: - e.xchg(e.al, e.ah); - e.movzx(dyn_dest, e.ax); - break; - case INT32_TYPE: - e.bswap(e.eax); - e.mov(dyn_dest.cvt32(), e.eax); - break; - case INT64_TYPE: - e.bswap(e.rax); - e.mov(dyn_dest, e.rax); - break; - default: - e.db(0xCC); - break; - } - e.EndOp(dyn_dest); - } else { - e.db(0xCC); - } - e.jmp(".skip_access"); - e.L(".normal_addr"); -#endif // DYNAMIC_REGISTER_ACCESS_CHECK - - if (i->Match(SIG_TYPE_I8, SIG_TYPE_IGNORE)) { - Reg8 dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.mov(dest, e.byte[addr]); - e.EndOp(dest); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.mov(e.r8b, dest); - CallNative(e, TraceMemoryLoadI8); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_I16, SIG_TYPE_IGNORE)) { - Reg16 dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.mov(dest, e.word[addr]); - e.EndOp(dest); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.mov(e.r8w, dest); - CallNative(e, TraceMemoryLoadI16); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_IGNORE)) { - Reg32 dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.mov(dest, e.dword[addr]); - e.EndOp(dest); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.mov(e.r8d, dest); - CallNative(e, TraceMemoryLoadI32); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_IGNORE)) { - Reg64 dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.mov(dest, e.qword[addr]); - e.EndOp(dest); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.mov(e.r8, dest); - CallNative(e, TraceMemoryLoadI64); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_F32, SIG_TYPE_IGNORE)) { - Xmm dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.movss(dest, e.dword[addr]); - e.EndOp(dest); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.lea(e.r8, Stash(e, dest)); - CallNative(e, TraceMemoryLoadF32); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_F64, SIG_TYPE_IGNORE)) { - Xmm dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.movsd(dest, e.qword[addr]); - e.EndOp(dest); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.lea(e.r8, Stash(e, dest)); - CallNative(e, TraceMemoryLoadF64); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_V128, SIG_TYPE_IGNORE)) { - Xmm dest; - e.BeginOp(i->dest, dest, REG_DEST); - // TODO(benvanik): we should try to stick to movaps if possible. - e.movups(dest, e.ptr[addr]); - e.EndOp(dest); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.lea(e.r8, Stash(e, dest)); - CallNative(e, TraceMemoryLoadV128); -#endif // DTRACE - } else { - ASSERT_INVALID_TYPE(); - } - -#if DYNAMIC_REGISTER_ACCESS_CHECK - e.L(".skip_access"); - e.outLocalLabel(); -#endif // DYNAMIC_REGISTER_ACCESS_CHECK - - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_STORE, [](X64Emitter& e, Instr*& i) { - // If this is a constant address store, check to see if it's in a - // register range. We'll also probably want a dynamic check for - // unverified stores. So far, most games use constants. - if (i->src1.value->IsConstant()) { - uint64_t address = i->src1.value->AsUint64(); - auto cbs = e.runtime()->access_callbacks(); - while (cbs) { - if (cbs->handles(cbs->context, address)) { - e.mov(e.rcx, (uint64_t)cbs->context); - e.mov(e.rdx, address); - if (i->src2.value->IsConstant()) { - e.mov(e.r8, i->src2.value->AsUint64()); - } else { - Reg64 src2; - e.BeginOp(i->src2.value, src2, 0); - switch (i->src2.value->type) { - case INT8_TYPE: - e.movzx(e.r8d, src2.cvt8()); - break; - case INT16_TYPE: - e.movzx(e.rax, src2.cvt16()); - e.xchg(e.al, e.ah); - e.mov(e.r8, e.rax); - break; - case INT32_TYPE: - e.movzx(e.r8, src2.cvt32()); - e.bswap(e.r8d); - break; - case INT64_TYPE: - e.mov(e.r8, src2); - e.bswap(e.r8); - break; - default: ASSERT_INVALID_TYPE(); break; - } - e.EndOp(src2); - } - CallNative(e, cbs->write); - i = e.Advance(i); - return true; - } - cbs = cbs->next; - } - } - - // mov [membase + address.32], reg - if (i->src1.value->IsConstant()) { - e.mov(e.eax, i->src1.value->AsUint32()); - } else { - Reg64 addr_off; - e.BeginOp(i->src1.value, addr_off, 0); - e.mov(e.eax, addr_off.cvt32()); // trunc to 32bits - e.EndOp(addr_off); - } - auto addr = e.rdx + e.rax; - -#if DYNAMIC_REGISTER_ACCESS_CHECK - // if ((address & 0xFF000000) == 0x7F000000) do check; - e.lea(e.r8d, e.ptr[addr]); - e.and(e.r8d, 0xFF000000); - e.cmp(e.r8d, 0x7F000000); - e.inLocalLabel(); - e.jne(".normal_addr"); - if (IsIntType(i->src2.value->type)) { - Reg64 dyn_src; - e.BeginOp(i->src2.value, dyn_src, 0); - switch (i->src2.value->type) { - case INT8_TYPE: - e.movzx(e.r8, dyn_src.cvt8()); - break; - case INT16_TYPE: - e.movzx(e.rax, dyn_src.cvt16()); - e.xchg(e.al, e.ah); - e.mov(e.r8, e.rax); - break; - case INT32_TYPE: - e.mov(e.r8d, dyn_src.cvt32()); - e.bswap(e.r8d); - break; - case INT64_TYPE: - e.mov(e.r8, dyn_src); - e.bswap(e.r8); - break; - default: - e.db(0xCC); - break; - } - e.EndOp(dyn_src); - e.mov(e.rdx, e.rax); - CallNative(e, DynamicRegisterStore); - } else { - e.db(0xCC); - } - e.jmp(".skip_access"); - e.L(".normal_addr"); -#endif // DYNAMIC_REGISTER_ACCESS_CHECK - - if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I8)) { - Reg8 src; - e.BeginOp(i->src2.value, src, 0); - e.mov(e.byte[addr], src); - e.EndOp(src); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.mov(e.r8b, src); - CallNative(e, TraceMemoryStoreI8); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I8C)) { - e.mov(e.byte[addr], i->src2.value->constant.i8); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.mov(e.r8b, i->src2.value->constant.i8); - CallNative(e, TraceMemoryStoreI8); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I16)) { - Reg16 src; - e.BeginOp(i->src2.value, src, 0); - e.mov(e.word[addr], src); - e.EndOp(src); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.mov(e.r8w, src); - CallNative(e, TraceMemoryStoreI16); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I16C)) { - e.mov(e.word[addr], i->src2.value->constant.i16); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.mov(e.r8w, i->src2.value->constant.i16); - CallNative(e, TraceMemoryStoreI16); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I32)) { - Reg32 src; - e.BeginOp(i->src2.value, src, 0); - e.mov(e.dword[addr], src); - e.EndOp(src); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.mov(e.r8d, src); - CallNative(e, TraceMemoryStoreI32); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I32C)) { - e.mov(e.dword[addr], i->src2.value->constant.i32); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.mov(e.r8d, i->src2.value->constant.i32); - CallNative(e, TraceMemoryStoreI32); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I64)) { - Reg64 src; - e.BeginOp(i->src2.value, src, 0); - e.mov(e.qword[addr], src); - e.EndOp(src); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.mov(e.r8, src); - CallNative(e, TraceMemoryStoreI64); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_I64C)) { - MovMem64(e, addr, i->src2.value->constant.i64); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.mov(e.r8, i->src2.value->constant.i64); - CallNative(e, TraceMemoryStoreI64); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32)) { - Xmm src; - e.BeginOp(i->src2.value, src, 0); - e.movss(e.dword[addr], src); - e.EndOp(src); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.lea(e.r8, Stash(e, src)); - CallNative(e, TraceMemoryStoreF32); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F32C)) { - e.mov(e.dword[addr], i->src2.value->constant.i32); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.mov(e.eax, i->src2.value->constant.i32); - e.vmovd(e.xmm0, e.eax); - e.lea(e.r8, Stash(e, e.xmm0)); - CallNative(e, TraceMemoryStoreF32); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64)) { - Xmm src; - e.BeginOp(i->src2.value, src, 0); - e.movsd(e.qword[addr], src); - e.EndOp(src); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.lea(e.r8, Stash(e, src)); - CallNative(e, TraceMemoryStoreF64); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_F64C)) { - MovMem64(e, addr, i->src2.value->constant.i64); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.movsd(e.xmm0, e.ptr[addr]); - CallNative(e, TraceMemoryStoreF64); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128)) { - Xmm src; - e.BeginOp(i->src2.value, src, 0); - // TODO(benvanik): we should try to stick to movaps if possible. - e.movups(e.ptr[addr], src); - e.EndOp(src); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.lea(e.r8, Stash(e, src)); - CallNative(e, TraceMemoryStoreV128); -#endif // DTRACE - } else if (i->Match(SIG_TYPE_X, SIG_TYPE_IGNORE, SIG_TYPE_V128C)) { - // TODO(benvanik): check zero - // TODO(benvanik): correct order? - MovMem64(e, addr, i->src2.value->constant.v128.low); - MovMem64(e, addr + 8, i->src2.value->constant.v128.high); -#if DTRACE - e.lea(e.rdx, e.ptr[addr]); - e.lea(e.r8, e.ptr[addr]); - CallNative(e, TraceMemoryStoreV128); -#endif // DTRACE - } else { - ASSERT_INVALID_TYPE(); - } - -#if DYNAMIC_REGISTER_ACCESS_CHECK - e.L(".skip_access"); - e.outLocalLabel(); -#endif // DYNAMIC_REGISTER_ACCESS_CHECK - - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_PREFETCH, [](X64Emitter& e, Instr*& i) { - UNIMPLEMENTED_SEQ(); - i = e.Advance(i); - return true; -}); - -// -------------------------------------------------------------------------- -// Comparisons -// -------------------------------------------------------------------------- - -table->AddSequence(OPCODE_MAX, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); - } else if (IsFloatType(i->dest->type)) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - if (i.src1.value->type == FLOAT32_TYPE) { - e.maxss(dest_src, src); - } else { - e.maxsd(dest_src, src); - } - }); - } else if (IsVecType(i->dest->type)) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - e.maxps(dest_src, src); - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_MIN, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); - } else if (IsFloatType(i->dest->type)) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - if (i.src1.value->type == FLOAT32_TYPE) { - e.minss(dest_src, src); - } else { - e.minsd(dest_src, src); - } - }); - } else if (IsVecType(i->dest->type)) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - e.minps(dest_src, src); - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_SELECT, [](X64Emitter& e, Instr*& i) { - CheckBoolean(e, i->src1.value); - if (IsIntType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); - } else if (IsFloatType(i->dest->type) || IsVecType(i->dest->type)) { - Xmm dest, src2, src3; - e.BeginOp(i->dest, dest, REG_DEST, - i->src2.value, src2, 0, - i->src3.value, src3, 0); - // TODO(benvanik): find a way to do this without branches. - e.inLocalLabel(); - e.movaps(dest, src3); - e.jz(".skip"); - e.movaps(dest, src2); - e.L(".skip"); - e.outLocalLabel(); - e.EndOp(dest, src2, src3); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_IS_TRUE, [](X64Emitter& e, Instr*& i) { - CheckBoolean(e, i->src1.value); - Reg8 dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.setnz(dest); - e.EndOp(dest); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_IS_FALSE, [](X64Emitter& e, Instr*& i) { - CheckBoolean(e, i->src1.value); - Reg8 dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.setz(dest); - e.EndOp(dest); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_COMPARE_EQ, [](X64Emitter& e, Instr*& i) { - CompareXX(e, i, [](X64Emitter& e, Reg8& dest, bool invert) { - if (!invert) { - e.sete(dest); - } else { - e.setne(dest); - } - }); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_COMPARE_NE, [](X64Emitter& e, Instr*& i) { - CompareXX(e, i, [](X64Emitter& e, Reg8& dest, bool invert) { - if (!invert) { - e.setne(dest); - } else { - e.sete(dest); - } - }); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_COMPARE_SLT, [](X64Emitter& e, Instr*& i) { - CompareXX(e, i, [](X64Emitter& e, Reg8& dest, bool invert) { - if (!invert) { - e.setl(dest); - } else { - e.setge(dest); - } - }); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_COMPARE_SLE, [](X64Emitter& e, Instr*& i) { - CompareXX(e, i, [](X64Emitter& e, Reg8& dest, bool invert) { - if (!invert) { - e.setle(dest); - } else { - e.setg(dest); - } - }); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_COMPARE_SGT, [](X64Emitter& e, Instr*& i) { - CompareXX(e, i, [](X64Emitter& e, Reg8& dest, bool invert) { - if (!invert) { - e.setg(dest); - } else { - e.setle(dest); - } - }); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_COMPARE_SGE, [](X64Emitter& e, Instr*& i) { - CompareXX(e, i, [](X64Emitter& e, Reg8& dest, bool invert) { - if (!invert) { - e.setge(dest); - } else { - e.setl(dest); - } - }); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_COMPARE_ULT, [](X64Emitter& e, Instr*& i) { - CompareXX(e, i, [](X64Emitter& e, Reg8& dest, bool invert) { - if (!invert) { - e.setb(dest); - } else { - e.setae(dest); - } - }); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_COMPARE_ULE, [](X64Emitter& e, Instr*& i) { - CompareXX(e, i, [](X64Emitter& e, Reg8& dest, bool invert) { - if (!invert) { - e.setbe(dest); - } else { - e.seta(dest); - } - }); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_COMPARE_UGT, [](X64Emitter& e, Instr*& i) { - CompareXX(e, i, [](X64Emitter& e, Reg8& dest, bool invert) { - if (!invert) { - e.seta(dest); - } else { - e.setbe(dest); - } - }); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_COMPARE_UGE, [](X64Emitter& e, Instr*& i) { - CompareXX(e, i, [](X64Emitter& e, Reg8& dest, bool invert) { - if (!invert) { - e.setae(dest); - } else { - e.setb(dest); - } - }); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_DID_CARRY, [](X64Emitter& e, Instr*& i) { - Reg8 dest; - e.BeginOp(i->dest, dest, REG_DEST); - LoadEflags(e); - e.setc(dest); - e.EndOp(dest); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_DID_OVERFLOW, [](X64Emitter& e, Instr*& i) { - Reg8 dest; - e.BeginOp(i->dest, dest, REG_DEST); - LoadEflags(e); - e.seto(dest); - e.EndOp(dest); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_DID_SATURATE, [](X64Emitter& e, Instr*& i) { - UNIMPLEMENTED_SEQ(); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_VECTOR_COMPARE_EQ, [](X64Emitter& e, Instr*& i) { - if (IsVecType(i->dest->type)) { - VectorCompareXX(e, i, VECTOR_CMP_EQ, true); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_VECTOR_COMPARE_SGT, [](X64Emitter& e, Instr*& i) { - if (IsVecType(i->dest->type)) { - VectorCompareXX(e, i, VECTOR_CMP_GT, true); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_VECTOR_COMPARE_SGE, [](X64Emitter& e, Instr*& i) { - if (IsVecType(i->dest->type)) { - VectorCompareXX(e, i, VECTOR_CMP_GE, true); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_VECTOR_COMPARE_UGT, [](X64Emitter& e, Instr*& i) { - if (IsVecType(i->dest->type)) { - VectorCompareXX(e, i, VECTOR_CMP_GT, false); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_VECTOR_COMPARE_UGE, [](X64Emitter& e, Instr*& i) { - if (IsVecType(i->dest->type)) { - VectorCompareXX(e, i, VECTOR_CMP_GE, false); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -// -------------------------------------------------------------------------- -// Math -// -------------------------------------------------------------------------- - -table->AddSequence(OPCODE_ADD, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - IntBinaryOp(e, i, [](X64Emitter& e, Instr& i, const Reg& dest_src, const Operand& src) { - e.add(dest_src, src); - }, [](X64Emitter& e, Instr& i, const Reg& dest_src, uint32_t src) { - e.add(dest_src, src); - }); - } else if (IsFloatType(i->dest->type)) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - if (i.src1.value->type == FLOAT32_TYPE) { - e.addss(dest_src, src); - } else { - e.addsd(dest_src, src); - } - }); - } else if (IsVecType(i->dest->type)) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - e.addps(dest_src, src); - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_ADD_CARRY, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - // dest = src1 + src2 + src3.i8 - IntTernaryOp(e, i, [](X64Emitter& e, Instr& i, const Reg& dest_src, const Operand& src2, const Operand& src3) { - Reg8 src3_8(src3.getIdx()); - if (src3.getIdx() <= 4) { - e.mov(e.ah, src3_8); - } else { - e.mov(e.al, src3_8); - e.mov(e.ah, e.al); - } - e.sahf(); - e.adc(dest_src, src2); - }, [](X64Emitter& e, Instr& i, const Reg& dest_src, const Operand& src2, uint32_t src3) { - e.mov(e.eax, src3); - e.mov(e.ah, e.al); - e.sahf(); - e.adc(dest_src, src2); - }, [](X64Emitter& e, Instr& i, const Reg& dest_src, uint32_t src2, const Operand& src3) { - Reg8 src3_8(src3.getIdx()); - if (src3.getIdx() <= 4) { - e.mov(e.ah, src3_8); - } else { - e.mov(e.al, src3_8); - e.mov(e.ah, e.al); - } - e.sahf(); - e.adc(dest_src, src2); - }); - } else { - UNIMPLEMENTED_SEQ(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_VECTOR_ADD, [](X64Emitter& e, Instr*& i) { - if (IsVecType(i->dest->type)) { - if (i->flags == INT8_TYPE) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == INT16_TYPE) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == INT32_TYPE) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == FLOAT32_TYPE) { - UNIMPLEMENTED_SEQ(); - } else { - ASSERT_INVALID_TYPE(); - } - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_SUB, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - IntBinaryOp(e, i, [](X64Emitter& e, Instr& i, const Reg& dest_src, const Operand& src) { - if (i.flags & ARITHMETIC_SET_CARRY) { - auto Nax = LIKE_REG(e.rax, src); - e.mov(Nax, src); - e.not(Nax); - e.stc(); - e.adc(dest_src, Nax); - } else { - e.sub(dest_src, src); - } - }, [](X64Emitter& e, Instr& i, const Reg& dest_src, uint32_t src) { - if (i.flags & ARITHMETIC_SET_CARRY) { - auto Nax = LIKE_REG(e.rax, dest_src); - e.mov(Nax, src); - e.not(Nax); - e.stc(); - e.adc(dest_src, Nax); - } else { - e.sub(dest_src, src); - } - }); - } else if (IsFloatType(i->dest->type)) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - if (i.src1.value->type == FLOAT32_TYPE) { - e.subss(dest_src, src); - } else { - e.subsd(dest_src, src); - } - }); - } else if (IsVecType(i->dest->type)) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - e.subps(dest_src, src); - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_MUL, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - IntBinaryOp(e, i, [](X64Emitter& e, Instr& i, const Reg& dest_src, const Operand& src) { - // RAX = value, RDX = clobbered - // TODO(benvanik): make the register allocator put dest_src in RAX? - auto Nax = LIKE_REG(e.rax, dest_src); - e.mov(Nax, dest_src); - if (i.flags & ARITHMETIC_UNSIGNED) { - e.mul(src); - } else { - e.imul(src); - } - e.mov(dest_src, Nax); - ReloadRDX(e); - }, [](X64Emitter& e, Instr& i, const Reg& dest_src, uint32_t src) { - // RAX = value, RDX = clobbered - // TODO(benvanik): make the register allocator put dest_src in RAX? - auto Nax = LIKE_REG(e.rax, dest_src); - auto Ndx = LIKE_REG(e.rdx, dest_src); - e.mov(Nax, dest_src); - e.mov(Ndx, src); - if (i.flags & ARITHMETIC_UNSIGNED) { - e.mul(Ndx); - } else { - e.imul(Ndx); - } - e.mov(dest_src, Nax); - ReloadRDX(e); - }); - } else if (IsFloatType(i->dest->type)) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - if (i.flags & ARITHMETIC_UNSIGNED) { UNIMPLEMENTED_SEQ(); } - if (i.src1.value->type == FLOAT32_TYPE) { - e.mulss(dest_src, src); - } else { - e.mulsd(dest_src, src); - } - }); - } else if (IsVecType(i->dest->type)) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - if (i.flags & ARITHMETIC_UNSIGNED) { UNIMPLEMENTED_SEQ(); } - e.mulps(dest_src, src); - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_MUL_HI, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - IntBinaryOp(e, i, [](X64Emitter& e, Instr& i, const Reg& dest_src, const Operand& src) { - // RAX = value, RDX = clobbered - // TODO(benvanik): make the register allocator put dest_src in RAX? - auto Nax = LIKE_REG(e.rax, dest_src); - auto Ndx = LIKE_REG(e.rdx, dest_src); - e.mov(Nax, dest_src); - if (i.flags & ARITHMETIC_UNSIGNED) { - e.mul(src); - } else { - e.imul(src); - } - e.mov(dest_src, Ndx); - ReloadRDX(e); - }, [](X64Emitter& e, Instr& i, const Reg& dest_src, uint32_t src) { - // RAX = value, RDX = clobbered - // TODO(benvanik): make the register allocator put dest_src in RAX? - auto Nax = LIKE_REG(e.rax, dest_src); - auto Ndx = LIKE_REG(e.rdx, dest_src); - e.mov(Nax, dest_src); - e.mov(Ndx, src); - if (i.flags & ARITHMETIC_UNSIGNED) { - e.mul(Ndx); - } else { - e.imul(Ndx); - } - e.mov(dest_src, Ndx); - ReloadRDX(e); - }); - } else { - UNIMPLEMENTED_SEQ(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_DIV, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - IntBinaryOp(e, i, [](X64Emitter& e, Instr& i, const Reg& dest_src, const Operand& src) { - // RAX = value, RDX = clobbered - // TODO(benvanik): make the register allocator put dest_src in RAX? - auto Nax = LIKE_REG(e.rax, dest_src); - auto Ndx = LIKE_REG(e.rdx, dest_src); - e.mov(Nax, dest_src); - e.xor(Ndx, Ndx); - if (i.flags & ARITHMETIC_UNSIGNED) { - e.div(src); - } else { - e.idiv(src); - } - e.mov(dest_src, Nax); - ReloadRDX(e); - }, [](X64Emitter& e, Instr& i, const Reg& dest_src, uint32_t src) { - // RAX = value, RDX = clobbered - // TODO(benvanik): make the register allocator put dest_src in RAX? - auto Nax = LIKE_REG(e.rax, dest_src); - auto Ndx = LIKE_REG(e.rdx, dest_src); - e.mov(Nax, dest_src); - e.mov(Ndx, src); - if (i.flags & ARITHMETIC_UNSIGNED) { - e.div(Ndx); - } else { - e.idiv(Ndx); - } - e.mov(dest_src, Nax); - ReloadRDX(e); - }); - } else if (IsFloatType(i->dest->type)) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - if (i.flags & ARITHMETIC_UNSIGNED) { UNIMPLEMENTED_SEQ(); } - if (i.src1.value->type == FLOAT32_TYPE) { - e.divss(dest_src, src); - } else { - e.divsd(dest_src, src); - } - }); - } else if (IsVecType(i->dest->type)) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - if (i.flags & ARITHMETIC_UNSIGNED) { UNIMPLEMENTED_SEQ(); } - e.divps(dest_src, src); - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_MUL_ADD, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); - } else if (IsFloatType(i->dest->type)) { - XmmTernaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src2, const Xmm& src3) { - if (i.dest->type == FLOAT32_TYPE) { - e.vfmadd132ss(dest_src, src3, src2); - } else { - e.vfmadd132sd(dest_src, src3, src2); - } - }); - } else if (IsVecType(i->dest->type)) { - XmmTernaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src2, const Xmm& src3) { - e.vfmadd132ps(dest_src, src3, src2); - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_MUL_SUB, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); - } else if (IsFloatType(i->dest->type)) { - XmmTernaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src2, const Xmm& src3) { - if (i.dest->type == FLOAT32_TYPE) { - e.vfmsub132ss(dest_src, src3, src2); - } else { - e.vfmsub132sd(dest_src, src3, src2); - } - }); - } else if (IsVecType(i->dest->type)) { - XmmTernaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src2, const Xmm& src3) { - e.vfmsub132ps(dest_src, src3, src2); - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_NEG, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - IntUnaryOp(e, i, [](X64Emitter& e, Instr& i, const Reg& dest_src) { - e.neg(dest_src); - }); - } else if (IsFloatType(i->dest->type)) { - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { - if (i.src1.value->type == FLOAT32_TYPE) { - e.mov(e.rax, XMMCONSTBASE); - e.vpxor(dest, src, XMMCONST(e.rax, XMMSignMaskPS)); - } else { - e.mov(e.rax, XMMCONSTBASE); - e.vpxor(dest, src, XMMCONST(e.rax, XMMSignMaskPD)); - } - }); - } else if (IsVecType(i->dest->type)) { - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { - e.mov(e.rax, XMMCONSTBASE); - e.vpxor(dest, src, XMMCONST(e.rax, XMMSignMaskPS)); - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_ABS, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); - } else if (IsFloatType(i->dest->type)) { - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { - if (i.src1.value->type == FLOAT32_TYPE) { - e.mov(e.rax, XMMCONSTBASE); - e.movaps(e.xmm0, XMMCONST(e.rax, XMMSignMaskPS)); - e.vpandn(dest, e.xmm0, src); - } else { - e.mov(e.rax, XMMCONSTBASE); - e.movaps(e.xmm0, XMMCONST(e.rax, XMMSignMaskPD));; - e.vpandn(dest, e.xmm0, src); - } - }); - } else if (IsVecType(i->dest->type)) { - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { - e.mov(e.rax, XMMCONSTBASE); - e.movaps(e.xmm0, XMMCONST(e.rax, XMMSignMaskPS));; - e.vpandn(dest, e.xmm0, src); - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_SQRT, [](X64Emitter& e, Instr*& i) { - if (IsFloatType(i->dest->type)) { - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { - if (i.dest->type == FLOAT32_TYPE) { - e.sqrtss(dest, src); - } else { - e.sqrtsd(dest, src); - } - }); - } else if (IsVecType(i->dest->type)) { - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { - e.sqrtps(dest, src); - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_RSQRT, [](X64Emitter& e, Instr*& i) { - if (IsFloatType(i->dest->type)) { - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { - if (i.dest->type == FLOAT32_TYPE) { - e.rsqrtss(dest, src); - } else { - e.cvtsd2ss(dest, src); - e.rsqrtss(dest, dest); - e.cvtss2sd(dest, dest); - } - }); - } else if (IsVecType(i->dest->type)) { - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { - e.rsqrtps(dest, src); - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_POW2, [](X64Emitter& e, Instr*& i) { - if (IsFloatType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); - } else if (IsVecType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_LOG2, [](X64Emitter& e, Instr*& i) { - if (IsFloatType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); - } else if (IsVecType(i->dest->type)) { - UNIMPLEMENTED_SEQ(); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_DOT_PRODUCT_3, [](X64Emitter& e, Instr*& i) { - if (IsVecType(i->src1.value->type)) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - // http://msdn.microsoft.com/en-us/library/bb514054(v=vs.90).aspx - // TODO(benvanik): verify ordering - e.dpps(dest_src, src, B01110001); - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_DOT_PRODUCT_4, [](X64Emitter& e, Instr*& i) { - if (IsVecType(i->src1.value->type)) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - // http://msdn.microsoft.com/en-us/library/bb514054(v=vs.90).aspx - // TODO(benvanik): verify ordering - e.dpps(dest_src, src, B11110001); - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_AND, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - IntBinaryOp(e, i, [](X64Emitter& e, Instr& i, const Reg& dest_src, const Operand& src) { - e.and(dest_src, src); - }, [](X64Emitter& e, Instr& i, const Reg& dest_src, uint32_t src) { - e.and(dest_src, src); - }); - } else if (IsVecType(i->dest->type)) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - e.pand(dest_src, src); - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_OR, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - IntBinaryOp(e, i, [](X64Emitter& e, Instr& i, const Reg& dest_src, const Operand& src) { - e.or(dest_src, src); - }, [](X64Emitter& e, Instr& i, const Reg& dest_src, uint32_t src) { - e.or(dest_src, src); - }); - } else if (IsVecType(i->dest->type)) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - e.por(dest_src, src); - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_XOR, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - IntBinaryOp(e, i, [](X64Emitter& e, Instr& i, const Reg& dest_src, const Operand& src) { - e.xor(dest_src, src); - }, [](X64Emitter& e, Instr& i, const Reg& dest_src, uint32_t src) { - e.xor(dest_src, src); - }); - } else if (IsVecType(i->dest->type)) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - e.pxor(dest_src, src); - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_NOT, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - IntUnaryOp(e, i, [](X64Emitter& e, Instr& i, const Reg& dest_src) { - e.not(dest_src); - }); - } else if (IsVecType(i->dest->type)) { - XmmUnaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { - // dest_src ^= 0xFFFF... - if (dest.getIdx() != src.getIdx()) { - e.movaps(dest, src); - } - e.mov(e.rax, XMMCONSTBASE); - e.pxor(dest, XMMCONST(e.rax, XMMOne)); - }); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_SHL, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - // TODO(benvanik): use shlx if available. - IntBinaryOp(e, i, [](X64Emitter& e, Instr& i, const Reg& dest_src, const Operand& src) { - // Can only shl by cl. Eww x86. - Reg8 shamt(src.getIdx()); - e.mov(e.rax, e.rcx); - e.mov(e.cl, shamt); - e.shl(dest_src, e.cl); - e.mov(e.rcx, e.rax); - // BeaEngine can't disasm this, boo. - /*Reg32e dest_src_e(dest_src.getIdx(), MAX(dest_src.getBit(), 32)); - Reg32e src_e(src.getIdx(), MAX(dest_src.getBit(), 32)); - e.and(src_e, 0x3F); - e.shlx(dest_src_e, dest_src_e, src_e);*/ - }, [](X64Emitter& e, Instr& i, const Reg& dest_src, uint32_t src) { - e.shl(dest_src, src); - }); - } else { - UNIMPLEMENTED_SEQ(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_SHR, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - // TODO(benvanik): use shrx if available. - IntBinaryOp(e, i, [](X64Emitter& e, Instr& i, const Reg& dest_src, const Operand& src) { - // Can only sar by cl. Eww x86. - Reg8 shamt(src.getIdx()); - e.mov(e.rax, e.rcx); - e.mov(e.cl, shamt); - e.shr(dest_src, e.cl); - e.mov(e.rcx, e.rax); - }, [](X64Emitter& e, Instr& i, const Reg& dest_src, uint32_t src) { - e.shr(dest_src, src); - }); - } else { - UNIMPLEMENTED_SEQ(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_SHA, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - // TODO(benvanik): use sarx if available. - IntBinaryOp(e, i, [](X64Emitter& e, Instr& i, const Reg& dest_src, const Operand& src) { - // Can only sar by cl. Eww x86. - Reg8 shamt(src.getIdx()); - e.mov(e.rax, e.rcx); - e.mov(e.cl, shamt); - e.sar(dest_src, e.cl); - e.mov(e.rcx, e.rax); - }, [](X64Emitter& e, Instr& i, const Reg& dest_src, uint32_t src) { - e.sar(dest_src, src); - }); - } else { - UNIMPLEMENTED_SEQ(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_VECTOR_SHL, [](X64Emitter& e, Instr*& i) { - if (IsVecType(i->dest->type)) { - if (i->flags == INT8_TYPE) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == INT16_TYPE) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == INT32_TYPE) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - // src shift mask may have values >31, and x86 sets to zero when - // that happens so we mask. - e.mov(e.eax, 0x1F); - e.vmovd(e.xmm0, e.eax); - e.vpbroadcastd(e.xmm0, e.xmm0); - e.vandps(e.xmm0, src, e.xmm0); - e.vpsllvd(dest_src, dest_src, e.xmm0); - }); - } else { - ASSERT_INVALID_TYPE(); - } - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_VECTOR_SHR, [](X64Emitter& e, Instr*& i) { - if (IsVecType(i->dest->type)) { - if (i->flags == INT8_TYPE) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == INT16_TYPE) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == INT32_TYPE) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - // src shift mask may have values >31, and x86 sets to zero when - // that happens so we mask. - e.mov(e.eax, 0x1F); - e.vmovd(e.xmm0, e.eax); - e.vpbroadcastd(e.xmm0, e.xmm0); - e.vandps(e.xmm0, src, e.xmm0); - e.vpsrlvd(dest_src, dest_src, src); - }); - } else { - ASSERT_INVALID_TYPE(); - } - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_VECTOR_SHA, [](X64Emitter& e, Instr*& i) { - if (IsVecType(i->dest->type)) { - if (i->flags == INT8_TYPE) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == INT16_TYPE) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == INT32_TYPE) { - XmmBinaryOp(e, i, i->flags, [](X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src) { - // src shift mask may have values >31, and x86 sets to zero when - // that happens so we mask. - e.mov(e.eax, 0x1F); - e.vmovd(e.xmm0, e.eax); - e.vpbroadcastd(e.xmm0, e.xmm0); - e.vandps(e.xmm0, src, e.xmm0); - e.vpsravd(dest_src, dest_src, src); - }); - } else { - ASSERT_INVALID_TYPE(); - } - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_ROTATE_LEFT, [](X64Emitter& e, Instr*& i) { - if (IsIntType(i->dest->type)) { - IntBinaryOp(e, i, [](X64Emitter& e, Instr& i, const Reg& dest_src, const Operand& src) { - // Can only rol by cl. Eww x86. - Reg8 shamt(src.getIdx()); - e.mov(e.rax, e.rcx); - e.mov(e.cl, shamt); - e.rol(dest_src, e.cl); - e.mov(e.rcx, e.rax); - }, [](X64Emitter& e, Instr& i, const Reg& dest_src, uint32_t src) { - e.rol(dest_src, src); - }); - } else { - UNIMPLEMENTED_SEQ(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_BYTE_SWAP, [](X64Emitter& e, Instr*& i) { - if (i->Match(SIG_TYPE_I16, SIG_TYPE_I16)) { - Reg16 dest, src1; - // TODO(benvanik): fix register allocator to put the value in ABCD - //e.BeginOp(i->dest, d, REG_DEST | REG_ABCD, - // i->src1.value, s1, 0); - //if (d != s1) { - // e.mov(d, s1); - // e.xchg(d.cvt8(), Reg8(d.getIdx() + 4)); - //} else { - // e.xchg(d.cvt8(), Reg8(d.getIdx() + 4)); - //} - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0); - e.mov(e.ax, src1); - e.xchg(e.ah, e.al); - e.mov(dest, e.ax); - e.EndOp(dest, src1); - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_I32)) { - Reg32 dest, src1; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0); - if (dest.getIdx() != src1.getIdx()) { - e.mov(dest, src1); - e.bswap(dest); - } else { - e.bswap(dest); - } - e.EndOp(dest, src1); - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_I64)) { - Reg64 dest, src1; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0); - if (dest.getIdx() != src1.getIdx()) { - e.mov(dest, src1); - e.bswap(dest); - } else { - e.bswap(dest); - } - e.EndOp(dest, src1); - } else if (i->Match(SIG_TYPE_V128, SIG_TYPE_V128)) { - Xmm dest, src1; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0); - // TODO(benvanik): find a way to do this without the memory load. - e.mov(e.rax, XMMCONSTBASE); - e.vpshufb(dest, src1, XMMCONST(e.rax, XMMByteSwapMask)); - e.EndOp(dest, src1); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_CNTLZ, [](X64Emitter& e, Instr*& i) { - if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I8)) { - Reg8 dest; - Reg8 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.bsr(dest.cvt16(), src.cvt16()); - // ZF = 1 if zero - e.mov(e.eax, 16 ^ 0x7); - e.cmovz(dest.cvt32(), e.eax); - e.sub(dest, 8); - e.xor(dest, 0x7); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I16)) { - Reg8 dest; - Reg16 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.bsr(dest.cvt16(), src); - // ZF = 1 if zero - e.mov(e.eax, 16 ^ 0xF); - e.cmovz(dest.cvt32(), e.eax); - e.xor(dest, 0xF); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I32)) { - Reg8 dest; - Reg32 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.bsr(dest.cvt32(), src); - // ZF = 1 if zero - e.mov(e.eax, 32 ^ 0x1F); - e.cmovz(dest.cvt32(), e.eax); - e.xor(dest, 0x1F); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I64)) { - Reg8 dest; - Reg64 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.bsr(dest, src); - // ZF = 1 if zero - e.mov(e.eax, 64 ^ 0x3F); - e.cmovz(dest.cvt32(), e.eax); - e.xor(dest, 0x3F); - e.EndOp(dest, src); - } else { - UNIMPLEMENTED_SEQ(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_INSERT, [](X64Emitter& e, Instr*& i) { - if (IsVecType(i->dest->type)) { - if (i->src3.value->type == INT8_TYPE) { - UNIMPLEMENTED_SEQ(); - } else if (i->src3.value->type == INT16_TYPE) { - UNIMPLEMENTED_SEQ(); - } else if (i->src3.value->type == INT32_TYPE) { - UNIMPLEMENTED_SEQ(); - } else { - ASSERT_INVALID_TYPE(); - } - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -// TODO(benvanik): sequence extract/splat: -// v0.i32 = extract v0.v128, 0 -// v0.v128 = splat v0.i32 -// This can be a single broadcast. - -table->AddSequence(OPCODE_EXTRACT, [](X64Emitter& e, Instr*& i) { - if (IsVecType(i->src1.value->type)) { - if (i->dest->type == INT8_TYPE) { - Reg8 dest; - Xmm src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - if (i->src2.value->IsConstant()) { - e.pextrb(dest, src, i->src2.value->constant.i8); - } else { - UNIMPLEMENTED_SEQ(); - } - e.EndOp(dest, src); - } else if (i->dest->type == INT16_TYPE) { - Reg16 dest; - Xmm src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - if (i->src2.value->IsConstant()) { - e.pextrw(dest, src, i->src2.value->constant.i8); - } else { - UNIMPLEMENTED_SEQ(); - } - e.EndOp(dest, src); - } else if (i->dest->type == INT32_TYPE) { - if (i->src2.value->IsConstant()) { - Reg32 dest; - Xmm src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.pextrd(dest, src, i->src2.value->constant.i8); - e.EndOp(dest, src); - } else { - Reg32 dest; - Xmm src; - Reg8 sel; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0, - i->src2.value, sel, 0); - // Get the desired word in xmm0, then extract that. - e.mov(TEMP_REG, sel); - e.and(TEMP_REG, 0x03); - e.shl(TEMP_REG, 4); - e.mov(e.rax, (uintptr_t)extract_table_32); - e.movaps(e.xmm0, e.ptr[e.rax + TEMP_REG]); - e.vpshufb(e.xmm0, src, e.xmm0); - e.pextrd(dest, e.xmm0, 0); - e.EndOp(dest, src, sel); - } - } else if (i->dest->type == FLOAT32_TYPE) { - Reg32 dest; - Xmm src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - if (i->src2.value->IsConstant()) { - e.extractps(dest, src, i->src2.value->constant.i8); - } else { - UNIMPLEMENTED_SEQ(); - } - e.EndOp(dest, src); - } else { - ASSERT_INVALID_TYPE(); - } - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_SPLAT, [](X64Emitter& e, Instr*& i) { - if (IsVecType(i->dest->type)) { - if (i->Match(SIG_TYPE_V128, SIG_TYPE_I8)) { - Xmm dest; - Reg8 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.vmovd(e.xmm0, src.cvt32()); - e.vpbroadcastb(dest, e.xmm0); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_V128, SIG_TYPE_I8C)) { - Xmm dest; - e.BeginOp(i->dest, dest, REG_DEST); - // TODO(benvanik): faster constant splats. - e.mov(e.eax, i->src1.value->constant.i8); - e.vmovd(e.xmm0, e.eax); - e.vpbroadcastb(dest, e.xmm0); - e.EndOp(dest); - } else if (i->Match(SIG_TYPE_V128, SIG_TYPE_I16)) { - Xmm dest; - Reg16 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.vmovd(e.xmm0, src.cvt32()); - e.vpbroadcastw(dest, e.xmm0); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_V128, SIG_TYPE_I16C)) { - Xmm dest; - e.BeginOp(i->dest, dest, REG_DEST); - // TODO(benvanik): faster constant splats. - e.mov(e.eax, i->src1.value->constant.i16); - e.vmovd(e.xmm0, e.eax); - e.vpbroadcastw(dest, e.xmm0); - e.EndOp(dest); - } else if (i->Match(SIG_TYPE_V128, SIG_TYPE_I32)) { - Xmm dest; - Reg32 src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.vmovd(e.xmm0, src); - e.vpbroadcastd(dest, e.xmm0); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_V128, SIG_TYPE_I32C)) { - Xmm dest; - e.BeginOp(i->dest, dest, REG_DEST); - // TODO(benvanik): faster constant splats. - e.mov(e.eax, i->src1.value->constant.i32); - e.vmovd(e.xmm0, e.eax); - e.vpbroadcastd(dest, e.xmm0); - e.EndOp(dest); - } else if (i->Match(SIG_TYPE_V128, SIG_TYPE_F32)) { - Xmm dest, src; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src, 0); - e.vbroadcastss(dest, src); - e.EndOp(dest, src); - } else if (i->Match(SIG_TYPE_V128, SIG_TYPE_F32C)) { - Xmm dest; - e.BeginOp(i->dest, dest, REG_DEST); - e.mov(e.eax, i->src1.value->constant.i32); - e.vmovd(e.xmm0, e.eax); - e.vbroadcastss(dest, e.xmm0); - e.EndOp(dest); - } else { - ASSERT_INVALID_TYPE(); - } - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_PERMUTE, [](X64Emitter& e, Instr*& i) { - if (IsVecType(i->dest->type)) { - if (i->src1.value->type == INT32_TYPE) { - // Permute words between src2 and src3. - // TODO(benvanik): check src3 for zero. if 0, we can use pshufb. - if (i->src1.value->IsConstant()) { - uint32_t control = i->src1.value->AsUint32(); - Xmm dest, src2, src3; - e.BeginOp(i->dest, dest, REG_DEST, - i->src2.value, src2, 0, - i->src3.value, src3, 0); - // Shuffle things into the right places in dest & xmm0, - // then we blend them together. - uint32_t src_control = - (((control >> 24) & 0x3) << 0) | - (((control >> 16) & 0x3) << 2) | - (((control >> 8) & 0x3) << 4) | - (((control >> 0) & 0x3) << 6); - uint32_t blend_control = - (((control >> 26) & 0x1) << 0) | - (((control >> 18) & 0x1) << 1) | - (((control >> 10) & 0x1) << 2) | - (((control >> 2) & 0x1) << 3); - if (dest.getIdx() != src3.getIdx()) { - e.pshufd(dest, src2, src_control); - e.pshufd(e.xmm0, src3, src_control); - e.blendps(dest, e.xmm0, blend_control); - } else { - e.movaps(e.xmm0, src3); - e.pshufd(dest, src2, src_control); - e.pshufd(e.xmm0, e.xmm0, src_control); - e.blendps(dest, e.xmm0, blend_control); - } - e.EndOp(dest, src2, src3); - } else { - Reg32 control; - Xmm dest, src2, src3; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, control, 0, - i->src2.value, src2, 0, - i->src3.value, src3, 0); - UNIMPLEMENTED_SEQ(); - e.EndOp(dest, control, src2, src3); - } - } else if (i->src1.value->type == VEC128_TYPE) { - // Permute bytes between src2 and src3. - if (i->src3.value->IsConstantZero()) { - // Permuting with src2/zero, so just shuffle/mask. - Xmm dest, control, src2; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, control, 0, - i->src2.value, src2, 0); - if (i->src2.value->IsConstantZero()) { - e.vpxor(dest, src2, src2); - } else { - if (i->src2.value->IsConstant()) { - LoadXmmConstant(e, src2, i->src2.value->constant.v128); - } - // Control mask needs to be shuffled. - e.mov(e.rax, XMMCONSTBASE); - e.vpshufb(e.xmm0, control, XMMCONST(e.rax, XMMByteSwapMask)); - e.vpshufb(dest, src2, e.xmm0); - // Build a mask with values in src2 having 0 and values in src3 having 1. - e.vpcmpgtb(e.xmm0, e.xmm0, XMMCONST(e.rax, XMMPermuteControl15)); - e.vpandn(dest, e.xmm0, dest); - } - e.EndOp(dest, control, src2); - } else { - // General permute. - Xmm dest, control, src2, src3; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, control, 0, - i->src2.value, src2, 0, - i->src3.value, src3, 0); - e.mov(e.rax, XMMCONSTBASE); - // Control mask needs to be shuffled. - e.vpshufb(e.xmm1, control, XMMCONST(e.rax, XMMByteSwapMask)); - // Build a mask with values in src2 having 0 and values in src3 having 1. - e.vpcmpgtb(dest, e.xmm1, XMMCONST(e.rax, XMMPermuteControl15)); - Xmm src2_shuf, src3_shuf; - if (i->src2.value->IsConstantZero()) { - e.vpxor(src2, src2); - src2_shuf = src2; - } else { - if (i->src2.value->IsConstant()) { - LoadXmmConstant(e, src2, i->src2.value->constant.v128); - } - src2_shuf = e.xmm0; - e.vpshufb(src2_shuf, src2, e.xmm1); - } - if (i->src3.value->IsConstantZero()) { - e.vpxor(src3, src3); - src3_shuf = src3; - } else { - if (i->src3.value->IsConstant()) { - LoadXmmConstant(e, src3, i->src3.value->constant.v128); - } - // NOTE: reusing xmm1 here. - src3_shuf = e.xmm1; - e.vpshufb(src3_shuf, src3, e.xmm1); - } - e.vpblendvb(dest, src2_shuf, src3_shuf, dest); - e.EndOp(dest, control, src2, src3); - } - } else { - ASSERT_INVALID_TYPE(); - } - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_SWIZZLE, [](X64Emitter& e, Instr*& i) { - if (IsVecType(i->dest->type)) { - // Defined by SWIZZLE_MASK() - if (i->flags == INT32_TYPE || i->flags == FLOAT32_TYPE) { - uint8_t swizzle_mask = (uint8_t)i->src2.offset; - swizzle_mask = - (((swizzle_mask >> 6) & 0x3) << 0) | - (((swizzle_mask >> 4) & 0x3) << 2) | - (((swizzle_mask >> 2) & 0x3) << 4) | - (((swizzle_mask >> 0) & 0x3) << 6); - Xmm dest, src1; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0); - e.pshufd(dest, src1, swizzle_mask); - e.EndOp(dest, src1); - } else { - UNIMPLEMENTED_SEQ(); - } - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_PACK, [](X64Emitter& e, Instr*& i) { - if (i->flags == PACK_TYPE_D3DCOLOR) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == PACK_TYPE_FLOAT16_2) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == PACK_TYPE_FLOAT16_4) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == PACK_TYPE_SHORT_2) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == PACK_TYPE_S8_IN_16_LO) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == PACK_TYPE_S8_IN_16_HI) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == PACK_TYPE_S16_IN_32_LO) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == PACK_TYPE_S16_IN_32_HI) { - UNIMPLEMENTED_SEQ(); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_UNPACK, [](X64Emitter& e, Instr*& i) { - if (i->flags == PACK_TYPE_D3DCOLOR) { - // ARGB (WXYZ) -> RGBA (XYZW) - // XMLoadColor - // int32_t src = (int32_t)src1.iw; - // dest.f4[0] = (float)((src >> 16) & 0xFF) * (1.0f / 255.0f); - // dest.f4[1] = (float)((src >> 8) & 0xFF) * (1.0f / 255.0f); - // dest.f4[2] = (float)(src & 0xFF) * (1.0f / 255.0f); - // dest.f4[3] = (float)((src >> 24) & 0xFF) * (1.0f / 255.0f); - XmmUnaryOp(e, i, 0, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { - // src = ZZYYXXWW - // unpack to 000000ZZ,000000YY,000000XX,000000WW - e.mov(e.rax, XMMCONSTBASE); - e.vpshufb(dest, src, XMMCONST(e.rax, XMMUnpackD3DCOLOR)); - // mult by 1/255 - e.vmulps(dest, XMMCONST(e.rax, XMMOneOver255)); - }); - } else if (i->flags == PACK_TYPE_FLOAT16_2) { - // 1 bit sign, 5 bit exponent, 10 bit mantissa - // D3D10 half float format - // TODO(benvanik): http://blogs.msdn.com/b/chuckw/archive/2012/09/11/directxmath-f16c-and-fma.aspx - // Use _mm_cvtph_ps -- requires very modern processors (SSE5+) - // Unpacking half floats: http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ - // Packing half floats: https://gist.github.com/rygorous/2156668 - // Load source, move from tight pack of X16Y16.... to X16...Y16... - // Also zero out the high end. - // TODO(benvanik): special case constant unpacks that just get 0/1/etc. - XmmUnaryOp(e, i, 0, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { - // sx = src.iw >> 16; - // sy = src.iw & 0xFFFF; - // dest = { XMConvertHalfToFloat(sx), - // XMConvertHalfToFloat(sy), - // 0.0, - // 1.0 }; - auto addr = Stash(e, src); - e.lea(e.rdx, addr); - CallNative(e, Unpack_FLOAT16_2); - e.movaps(dest, addr); - }); - } else if (i->flags == PACK_TYPE_FLOAT16_4) { - // Could be shared with FLOAT16_2. - UNIMPLEMENTED_SEQ(); - } else if (i->flags == PACK_TYPE_SHORT_2) { - // (VD.x) = 3.0 + (VB.x>>16)*2^-22 - // (VD.y) = 3.0 + (VB.x)*2^-22 - // (VD.z) = 0.0 - // (VD.w) = 1.0 - XmmUnaryOp(e, i, 0, [](X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src) { - // XMLoadShortN2 plus 3,3,0,3 (for some reason) - // src is (xx,xx,xx,VALUE) - e.mov(e.rax, XMMCONSTBASE); - // (VALUE,VALUE,VALUE,VALUE) - e.vbroadcastss(dest, src); - // (VALUE&0xFFFF,VALUE&0xFFFF0000,0,0) - e.andps(dest, XMMCONST(e.rax, XMMMaskX16Y16)); - // Sign extend. - e.xorps(dest, XMMCONST(e.rax, XMMFlipX16Y16)); - // Convert int->float. - e.cvtpi2ps(dest, Stash(e, dest)); - // 0x8000 to undo sign. - e.addps(dest, XMMCONST(e.rax, XMMFixX16Y16)); - // Normalize. - e.mulps(dest, XMMCONST(e.rax, XMMNormalizeX16Y16)); - // Clamp. - e.maxps(dest, XMMCONST(e.rax, XMMNegativeOne)); - // Add 3,3,0,1. - e.addps(dest, XMMCONST(e.rax, XMM3301)); - }); - } else if (i->flags == PACK_TYPE_S8_IN_16_LO) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == PACK_TYPE_S8_IN_16_HI) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == PACK_TYPE_S16_IN_32_LO) { - UNIMPLEMENTED_SEQ(); - } else if (i->flags == PACK_TYPE_S16_IN_32_HI) { - UNIMPLEMENTED_SEQ(); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -// -------------------------------------------------------------------------- -// Atomic -// -------------------------------------------------------------------------- - -table->AddSequence(OPCODE_COMPARE_EXCHANGE, [](X64Emitter& e, Instr*& i) { - UNIMPLEMENTED_SEQ(); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_ATOMIC_EXCHANGE, [](X64Emitter& e, Instr*& i) { - // dest = old_value = InterlockedExchange(src1 = address, src2 = new_value); - if (i->Match(SIG_TYPE_I32, SIG_TYPE_I64, SIG_TYPE_I32)) { - Reg32 dest, src2; - Reg64 src1; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0, - i->src2.value, src2, 0); - Reg64 real_src1 = src1; - if (dest.getIdx() == src1.getIdx()) { - e.mov(TEMP_REG, src1); - real_src1 = TEMP_REG; - } - e.mov(dest, src2); - e.lock(); - e.xchg(e.dword[real_src1], dest); - e.EndOp(dest, src1, src2); - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_I64, SIG_TYPE_I32C)) { - Reg32 dest; - Reg64 src1; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0); - Reg64 real_src1 = src1; - if (dest.getIdx() == src1.getIdx()) { - e.mov(TEMP_REG, src1); - real_src1 = TEMP_REG; - } - e.mov(dest, i->src2.value->constant.i32); - e.lock(); - e.xchg(e.dword[real_src1], dest); - e.EndOp(dest, src1); - } else { - ASSERT_INVALID_TYPE(); - } - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_ATOMIC_ADD, [](X64Emitter& e, Instr*& i) { - UNIMPLEMENTED_SEQ(); - i = e.Advance(i); - return true; -}); - -table->AddSequence(OPCODE_ATOMIC_SUB, [](X64Emitter& e, Instr*& i) { - UNIMPLEMENTED_SEQ(); - i = e.Advance(i); - return true; -}); -} diff --git a/src/alloy/backend/x64/lowering/lowering_table.cc b/src/alloy/backend/x64/lowering/lowering_table.cc deleted file mode 100644 index 6c5c8468b..000000000 --- a/src/alloy/backend/x64/lowering/lowering_table.cc +++ /dev/null @@ -1,71 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include - -#include -#include - -using namespace alloy; -using namespace alloy::backend::x64; -using namespace alloy::backend::x64::lowering; - - -LoweringTable::LoweringTable(X64Backend* backend) : - backend_(backend) { - xe_zero_struct(lookup_, sizeof(lookup_)); -} - -LoweringTable::~LoweringTable() { - for (size_t n = 0; n < XECOUNT(lookup_); n++) { - auto entry = lookup_[n]; - while (entry) { - auto next = entry->next; - delete entry; - entry = next; - } - } -} - -int LoweringTable::Initialize() { - RegisterSequences(this); - return 0; -} - -void LoweringTable::AddSequence(hir::Opcode starting_opcode, sequence_fn_t fn) { - auto existing_entry = lookup_[starting_opcode]; - auto new_entry = new sequence_fn_entry_t(); - new_entry->fn = fn; - new_entry->next = existing_entry; - lookup_[starting_opcode] = new_entry; -} - -int LoweringTable::ProcessBlock(X64Emitter& e, hir::Block* block) { - // Process instructions. - auto instr = block->instr_head; - while (instr) { - bool processed = false; - auto entry = lookup_[instr->opcode->num]; - while (entry) { - if ((*entry->fn)(e, instr)) { - processed = true; - break; - } - entry = entry->next; - } - if (!processed) { - // No sequence found! - XELOGE("Unable to process HIR opcode %s", instr->opcode->name); - return 1; - instr = e.Advance(instr); - } - } - - return 0; -} \ No newline at end of file diff --git a/src/alloy/backend/x64/lowering/lowering_table.h b/src/alloy/backend/x64/lowering/lowering_table.h deleted file mode 100644 index f62bfd777..000000000 --- a/src/alloy/backend/x64/lowering/lowering_table.h +++ /dev/null @@ -1,58 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_TABLE_H_ -#define ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_TABLE_H_ - -#include -#include - - -namespace alloy { -namespace backend { -namespace x64 { -class X64Backend; -class X64Emitter; -namespace lowering { - - -class LoweringTable { -public: - LoweringTable(X64Backend* backend); - ~LoweringTable(); - - int Initialize(); - - int ProcessBlock(X64Emitter& e, hir::Block* block); - -public: - typedef bool(*sequence_fn_t)(X64Emitter& e, hir::Instr*& instr); - void AddSequence(hir::Opcode starting_opcode, sequence_fn_t fn); - -private: - class sequence_fn_entry_t { - public: - sequence_fn_t fn; - sequence_fn_entry_t* next; - }; - - // NOTE: this class is shared by multiple threads and is not thread safe. - // Do not modify anything after init. - X64Backend* backend_; - sequence_fn_entry_t* lookup_[hir::__OPCODE_MAX_VALUE]; -}; - - -} // namespace lowering -} // namespace x64 -} // namespace backend -} // namespace alloy - - -#endif // ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_TABLE_H_ diff --git a/src/alloy/backend/x64/lowering/op_utils.inl b/src/alloy/backend/x64/lowering/op_utils.inl deleted file mode 100644 index 749e84901..000000000 --- a/src/alloy/backend/x64/lowering/op_utils.inl +++ /dev/null @@ -1,1063 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2014 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -// NOTE: this file is only designed to be included by lowering_sequencies.cc! - -#ifndef ALLOY_BACKEND_X64_X64_LOWERING_OP_UTILS_INL_ -#define ALLOY_BACKEND_X64_X64_LOWERING_OP_UTILS_INL_ - -namespace { - -#define LIKE_REG(dest, like) Reg(dest.getIdx(), dest.getKind(), like.getBit(), false) -#define TEMP_REG e.r8 -#define TEMP_LIKE(like) Reg(TEMP_REG.getIdx(), TEMP_REG.getKind(), like.getBit(), false) - -#define STASH_OFFSET 32 - -// If we are running with tracing on we have to store the EFLAGS in the stack, -// otherwise our calls out to C to print will clear it before DID_CARRY/etc -// can get the value. -#define STORE_EFLAGS 1 - -void LoadEflags(X64Emitter& e) { -#if STORE_EFLAGS - e.mov(e.eax, e.dword[e.rsp + STASH_OFFSET]); - e.push(e.rax); - e.popf(); -#else - // EFLAGS already present. -#endif // STORE_EFLAGS -} -void StoreEflags(X64Emitter& e) { -#if STORE_EFLAGS - e.pushf(); - e.pop(e.qword[e.rsp + STASH_OFFSET]); -#else - // EFLAGS should have CA set? - // (so long as we don't fuck with it) -#endif // STORE_EFLAGS -} - -Address Stash(X64Emitter& e, const Xmm& r) { - // TODO(benvanik): ensure aligned. - auto addr = e.ptr[e.rsp + STASH_OFFSET]; - e.movups(addr, r); - return addr; -} - -void LoadXmmConstant(X64Emitter& e, const Xmm& dest, const vec128_t& v) { - if (!v.low && !v.high) { - // zero - e.vpxor(dest, dest); - //} else if (v.low == ~0ull && v.high == ~0ull) { - // one - // TODO(benvanik): XMMCONST? - } else { - // TODO(benvanik): more efficient loading of partial values? - e.mov(e.qword[e.rsp + STASH_OFFSET], v.low); - e.mov(e.qword[e.rsp + STASH_OFFSET + 8], v.high); - e.vmovaps(dest, e.ptr[e.rsp + STASH_OFFSET]); - } -} - -// Moves a 64bit immediate into memory. -void MovMem64(X64Emitter& e, RegExp& addr, uint64_t v) { - if ((v & ~0x7FFFFFFF) == 0) { - // Fits under 31 bits, so just load using normal mov. - e.mov(e.qword[addr], v); - } else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) { - // Negative number that fits in 32bits. - e.mov(e.qword[addr], v); - } else { - // 64bit number that needs double movs. - e.mov(e.rax, v); - e.mov(e.qword[addr], e.rax); - } -} - -void CallNative(X64Emitter& e, void* target) { - e.mov(e.rax, (uint64_t)target); - e.call(e.rax); - e.mov(e.rcx, e.qword[e.rsp + StackLayout::GUEST_RCX_HOME]); - e.mov(e.rdx, e.qword[e.rcx + 8]); // membase -} - -void ReloadRDX(X64Emitter& e) { - e.mov(e.rdx, e.qword[e.rcx + 8]); // membase -} - -// Sets EFLAGs with zf for the given value. -// ZF = 1 if false, 0 = true (so jz = jump if false) -void CheckBoolean(X64Emitter& e, Value* v) { - if (v->IsConstant()) { - e.mov(e.ah, (v->IsConstantZero() ? 1 : 0) << 6); - e.sahf(); - } else if (v->type == INT8_TYPE) { - Reg8 src; - e.BeginOp(v, src, 0); - e.test(src, src); - e.EndOp(src); - } else if (v->type == INT16_TYPE) { - Reg16 src; - e.BeginOp(v, src, 0); - e.test(src, src); - e.EndOp(src); - } else if (v->type == INT32_TYPE) { - Reg32 src; - e.BeginOp(v, src, 0); - e.test(src, src); - e.EndOp(src); - } else if (v->type == INT64_TYPE) { - Reg64 src; - e.BeginOp(v, src, 0); - e.test(src, src); - e.EndOp(src); - } else if (v->type == FLOAT32_TYPE) { - // TODO(benvanik): mask? - Xmm src; - e.BeginOp(v, src, 0); - e.ptest(src, src); - e.EndOp(src); - } else if (v->type == FLOAT64_TYPE) { - // TODO(benvanik): mask? - Xmm src; - e.BeginOp(v, src, 0); - e.ptest(src, src); - e.EndOp(src); - } else if (v->type == VEC128_TYPE) { - Xmm src; - e.BeginOp(v, src, 0); - e.ptest(src, src); - e.EndOp(src); - } else { - ASSERT_INVALID_TYPE(); - } -} - -// Compares src1 and src2 and calls the given fn to set a byte based on EFLAGS. -void CompareXX(X64Emitter& e, Instr*& i, void(set_fn)(X64Emitter& e, Reg8& dest, bool invert)) { - if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I8, SIG_TYPE_I8)) { - Reg8 dest; - Reg8 src1, src2; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0, - i->src2.value, src2, 0); - e.cmp(src1, src2); - set_fn(e, dest, false); - e.EndOp(dest, src1, src2); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I8, SIG_TYPE_I8C)) { - Reg8 dest; - Reg8 src1; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0); - e.cmp(src1, i->src2.value->constant.i8); - set_fn(e, dest, false); - e.EndOp(dest, src1); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I8C, SIG_TYPE_I8)) { - Reg8 dest; - Reg8 src2; - e.BeginOp(i->dest, dest, REG_DEST, - i->src2.value, src2, 0); - e.cmp(src2, i->src1.value->constant.i8); - set_fn(e, dest, true); - e.EndOp(dest, src2); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I16, SIG_TYPE_I16)) { - Reg8 dest; - Reg16 src1, src2; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0, - i->src2.value, src2, 0); - e.cmp(src1, src2); - set_fn(e, dest, false); - e.EndOp(dest, src1, src2); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I16, SIG_TYPE_I16C)) { - Reg8 dest; - Reg16 src1; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0); - e.cmp(src1, i->src2.value->constant.i16); - set_fn(e, dest, false); - e.EndOp(dest, src1); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I16C, SIG_TYPE_I16)) { - Reg8 dest; - Reg16 src2; - e.BeginOp(i->dest, dest, REG_DEST, - i->src2.value, src2, 0); - e.cmp(src2, i->src1.value->constant.i16); - e.sete(dest); - set_fn(e, dest, true); - e.EndOp(dest, src2); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I32, SIG_TYPE_I32)) { - Reg8 dest; - Reg32 src1, src2; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0, - i->src2.value, src2, 0); - e.cmp(src1, src2); - set_fn(e, dest, false); - e.EndOp(dest, src1, src2); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I32, SIG_TYPE_I32C)) { - Reg8 dest; - Reg32 src1; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0); - e.cmp(src1, i->src2.value->constant.i32); - set_fn(e, dest, false); - e.EndOp(dest, src1); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I32C, SIG_TYPE_I32)) { - Reg8 dest; - Reg32 src2; - e.BeginOp(i->dest, dest, REG_DEST, - i->src2.value, src2, 0); - e.cmp(src2, i->src1.value->constant.i32); - set_fn(e, dest, true); - e.EndOp(dest, src2); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I64, SIG_TYPE_I64)) { - Reg8 dest; - Reg64 src1, src2; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0, - i->src2.value, src2, 0); - e.cmp(src1, src2); - set_fn(e, dest, false); - e.EndOp(dest, src1, src2); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I64, SIG_TYPE_I64C)) { - Reg8 dest; - Reg64 src1; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0); - e.mov(e.rax, i->src2.value->constant.i64); - e.cmp(src1, e.rax); - set_fn(e, dest, false); - e.EndOp(dest, src1); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I64C, SIG_TYPE_I64)) { - Reg8 dest; - Reg64 src2; - e.BeginOp(i->dest, dest, REG_DEST, - i->src2.value, src2, 0); - e.mov(e.rax, i->src1.value->constant.i64); - e.cmp(src2, e.rax); - set_fn(e, dest, true); - e.EndOp(dest, src2); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F32, SIG_TYPE_F32)) { - Reg8 dest; - Xmm src1, src2; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0, - i->src2.value, src2, 0); - e.comiss(src1, src2); - set_fn(e, dest, false); - e.EndOp(dest, src1, src2); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F32, SIG_TYPE_F32C)) { - Reg8 dest; - Xmm src1; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0); - if (i->src2.value->IsConstantZero()) { - e.pxor(e.xmm0, e.xmm0); - } else { - e.mov(e.eax, (uint32_t)i->src2.value->constant.i32); - e.pinsrd(e.xmm0, e.eax, 0); - } - e.comiss(src1, e.xmm0); - set_fn(e, dest, false); - e.EndOp(dest, src1); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F64, SIG_TYPE_F64)) { - Reg8 dest; - Xmm src1, src2; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0, - i->src2.value, src2, 0); - e.comisd(src1, src2); - set_fn(e, dest, false); - e.EndOp(dest, src1, src2); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_F64, SIG_TYPE_F64C)) { - Reg8 dest; - Xmm src1; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0); - if (i->src2.value->IsConstantZero()) { - e.pxor(e.xmm0, e.xmm0); - } else { - e.mov(e.rax, (uint64_t)i->src2.value->constant.i64); - e.pinsrq(e.xmm0, e.rax, 0); - } - e.comisd(src1, e.xmm0); - set_fn(e, dest, false); - e.EndOp(dest, src1); - } else { - UNIMPLEMENTED_SEQ(); - } -}; - -enum VectoreCompareOp { - VECTOR_CMP_EQ, - VECTOR_CMP_GT, - VECTOR_CMP_GE, -}; -// Compares src1 to src2 with the given op and sets the dest. -// Dest will have each part set to all ones if the compare passes. -void VectorCompareXX(X64Emitter& e, Instr*& i, VectoreCompareOp op, bool as_signed) { - Xmm dest, src1, src2; - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0, - i->src2.value, src2, 0); - if (op == VECTOR_CMP_EQ) { - // Commutative, so simple. - Xmm real_src; - if (dest.getIdx() == src1.getIdx()) { - real_src = src2; - } else if (dest.getIdx() == src2.getIdx()) { - real_src = src1; - } else { - e.movaps(dest, src1); - real_src = src2; - } - if (i->flags == INT8_TYPE) { - e.pcmpeqb(dest, real_src); - } else if (i->flags == INT16_TYPE) { - e.pcmpeqw(dest, real_src); - } else if (i->flags == INT32_TYPE) { - e.pcmpeqd(dest, real_src); - } else if (i->flags == FLOAT32_TYPE) { - e.cmpeqps(dest, real_src); - } else { - ASSERT_INVALID_TYPE(); - } - } else if (i->flags == FLOAT32_TYPE) { - // Float GT/GE must be emulated. - if (op == VECTOR_CMP_GT) { - // Have to swap: src2 < src1. - if (dest.getIdx() == src2.getIdx()) { - e.cmpltps(dest, src1); - } else if (dest.getIdx() == src1.getIdx()) { - e.movaps(e.xmm0, src1); - e.movaps(dest, src2); - e.cmpltps(dest, e.xmm0); - } else { - e.movaps(dest, src2); - e.cmpltps(dest, src1); - } - } else if (op == VECTOR_CMP_GE) { - // Have to swap: src2 <= src1. - if (dest.getIdx() == src2.getIdx()) { - e.cmpleps(dest, src1); - } else if (dest.getIdx() == src1.getIdx()) { - e.movaps(e.xmm0, src1); - e.movaps(dest, src2); - e.cmpleps(dest, e.xmm0); - } else { - e.movaps(dest, src2); - e.cmpleps(dest, src1); - } - } else { - ASSERT_INVALID_TYPE(); - } - } else { - // Integer types are easier. - Xmm real_src; - if (dest.getIdx() == src1.getIdx()) { - real_src = src2; - } else if (dest.getIdx() == src2.getIdx()) { - e.movaps(e.xmm0, src2); - e.movaps(dest, src1); - real_src = e.xmm0; - } else { - e.movaps(dest, src1); - real_src = src2; - } - if (op == VECTOR_CMP_GT) { - if (i->flags == INT8_TYPE) { - if (as_signed) { - e.pcmpgtb(dest, real_src); - } else { - UNIMPLEMENTED_SEQ(); - } - } else if (i->flags == INT16_TYPE) { - if (as_signed) { - e.pcmpgtw(dest, real_src); - } else { - UNIMPLEMENTED_SEQ(); - } - } else if (i->flags == INT32_TYPE) { - if (as_signed) { - e.pcmpgtd(dest, real_src); - } else { - UNIMPLEMENTED_SEQ(); - } - } else { - ASSERT_INVALID_TYPE(); - } - } else if (op == VECTOR_CMP_GE) { - if (i->flags == INT8_TYPE) { - if (as_signed) { - UNIMPLEMENTED_SEQ(); - } else { - UNIMPLEMENTED_SEQ(); - } - } else if (i->flags == INT16_TYPE) { - if (as_signed) { - UNIMPLEMENTED_SEQ(); - } else { - UNIMPLEMENTED_SEQ(); - } - } else if (i->flags == INT32_TYPE) { - if (as_signed) { - UNIMPLEMENTED_SEQ(); - } else { - UNIMPLEMENTED_SEQ(); - } - } else { - ASSERT_INVALID_TYPE(); - } - } else { - ASSERT_INVALID_TYPE(); - } - } - e.EndOp(dest, src1, src2); -}; - -typedef void(v_fn)(X64Emitter& e, Instr& i, const Reg& dest_src); -template -void IntUnaryOpV(X64Emitter& e, Instr*& i, v_fn v_fn, - T& dest, T& src1) { - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0); - if (dest.getIdx() == src1.getIdx()) { - v_fn(e, *i, dest); - } else { - e.mov(dest, src1); - v_fn(e, *i, dest); - } - e.EndOp(dest, src1); -} -template -void IntUnaryOpC(X64Emitter& e, Instr*& i, v_fn v_fn, - T& dest, Value* src1) { - e.BeginOp(i->dest, dest, REG_DEST); - e.mov(dest, (uint64_t)src1->get_constant(CT())); - v_fn(e, *i, dest); - e.EndOp(dest); -} -void IntUnaryOp(X64Emitter& e, Instr*& i, v_fn v_fn) { - if (i->Match(SIG_TYPE_I8, SIG_TYPE_I8)) { - Reg8 dest, src1; - IntUnaryOpV(e, i, v_fn, dest, src1); - } else if (i->Match(SIG_TYPE_I8, SIG_TYPE_I8C)) { - Reg8 dest; - IntUnaryOpC(e, i, v_fn, dest, i->src1.value); - } else if (i->Match(SIG_TYPE_I16, SIG_TYPE_I16)) { - Reg16 dest, src1; - IntUnaryOpV(e, i, v_fn, dest, src1); - } else if (i->Match(SIG_TYPE_I16, SIG_TYPE_I16C)) { - Reg16 dest; - IntUnaryOpC(e, i, v_fn, dest, i->src1.value); - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_I32)) { - Reg32 dest, src1; - IntUnaryOpV(e, i, v_fn, dest, src1); - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_I32C)) { - Reg32 dest; - IntUnaryOpC(e, i, v_fn, dest, i->src1.value); - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_I64)) { - Reg64 dest, src1; - IntUnaryOpV(e, i, v_fn, dest, src1); - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_I64C)) { - Reg64 dest; - IntUnaryOpC(e, i, v_fn, dest, i->src1.value); - } else { - ASSERT_INVALID_TYPE(); - } - if (i->flags & ARITHMETIC_SET_CARRY) { - StoreEflags(e); - } -}; - -typedef void(vv_fn)(X64Emitter& e, Instr& i, const Reg& dest_src, const Operand& src); -typedef void(vc_fn)(X64Emitter& e, Instr& i, const Reg& dest_src, uint32_t src); -template -void IntBinaryOpVV(X64Emitter& e, Instr*& i, vv_fn vv_fn, - TD& dest, TS1& src1, TS2& src2) { - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0, - i->src2.value, src2, 0); - if (dest.getIdx() == src1.getIdx()) { - vv_fn(e, *i, dest, src2); - } else if (dest.getIdx() == src2.getIdx()) { - if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { - vv_fn(e, *i, dest, src1); - } else { - // Eww. - auto Ntx = TEMP_LIKE(src1); - e.mov(Ntx, src1); - vv_fn(e, *i, Ntx, src2); - e.mov(dest, Ntx); - } - } else { - e.mov(dest, src1); - vv_fn(e, *i, dest, src2); - } - e.EndOp(dest, src1, src2); -} -template -void IntBinaryOpVC(X64Emitter& e, Instr*& i, vv_fn vv_fn, vc_fn vc_fn, - TD& dest, TS1& src1, Value* src2) { - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0); - if (dest.getBit() <= 32) { - // 32-bit. - if (dest.getIdx() == src1.getIdx()) { - vc_fn(e, *i, dest, (uint32_t)src2->get_constant(CT())); - } else { - e.mov(dest, src1); - vc_fn(e, *i, dest, (uint32_t)src2->get_constant(CT())); - } - } else { - // 64-bit. - if (dest.getIdx() == src1.getIdx()) { - e.mov(TEMP_REG, src2->constant.i64); - vv_fn(e, *i, dest, TEMP_REG); - } else { - e.mov(TEMP_REG, src2->constant.i64); - e.mov(dest, src1); - vv_fn(e, *i, dest, TEMP_REG); - } - } - e.EndOp(dest, src1); -} -template -void IntBinaryOpCV(X64Emitter& e, Instr*& i, vv_fn vv_fn, vc_fn vc_fn, - TD& dest, Value* src1, TS2& src2) { - e.BeginOp(i->dest, dest, REG_DEST, - i->src2.value, src2, 0); - if (dest.getBit() <= 32) { - // 32-bit. - if (dest.getIdx() == src2.getIdx()) { - if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { - vc_fn(e, *i, dest, (uint32_t)src1->get_constant(CT())); - } else { - // Eww. - auto Ntx = TEMP_LIKE(src2); - e.mov(Ntx, src2); - e.mov(dest, (uint32_t)src1->get_constant(CT())); - vv_fn(e, *i, dest, Ntx); - } - } else { - if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { - e.mov(dest, src2); - vc_fn(e, *i, dest, (uint32_t)src1->get_constant(CT())); - } else { - // Need a cv_fn. Or a better way to do all of this. - e.mov(dest, (uint32_t)src1->get_constant(CT())); - vv_fn(e, *i, dest, src2); - } - } - } else { - // 64-bit. - if (dest.getIdx() == src2.getIdx()) { - if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { - e.mov(TEMP_REG, src1->constant.i64); - vv_fn(e, *i, dest, TEMP_REG); - } else { - // Eww. - e.mov(TEMP_REG, src1->constant.i64); - vv_fn(e, *i, TEMP_REG, src2); - e.mov(dest, TEMP_REG); - } - } else { - e.mov(TEMP_REG, src2); - e.mov(dest, src1->constant.i64); - vv_fn(e, *i, dest, TEMP_REG); - } - } - e.EndOp(dest, src2); -} -void IntBinaryOp(X64Emitter& e, Instr*& i, vv_fn vv_fn, vc_fn vc_fn) { - // TODO(benvanik): table lookup. This linear scan is slow. - // Note: we assume DEST.type = SRC1.type, but that SRC2.type may vary. - XEASSERT(i->dest->type == i->src1.value->type); - if (i->Match(SIG_TYPE_I8, SIG_TYPE_I8, SIG_TYPE_I8)) { - Reg8 dest, src1, src2; - IntBinaryOpVV(e, i, vv_fn, dest, src1, src2); - } else if (i->Match(SIG_TYPE_I8, SIG_TYPE_I8, SIG_TYPE_I8C)) { - Reg8 dest, src1; - IntBinaryOpVC(e, i, vv_fn, vc_fn, dest, src1, i->src2.value); - } else if (i->Match(SIG_TYPE_I8, SIG_TYPE_I8C, SIG_TYPE_I8)) { - Reg8 dest, src2; - IntBinaryOpCV(e, i, vv_fn, vc_fn, dest, i->src1.value, src2); - } else if (i->Match(SIG_TYPE_I16, SIG_TYPE_I16, SIG_TYPE_I16)) { - Reg16 dest, src1, src2; - IntBinaryOpVV(e, i, vv_fn, dest, src1, src2); - } else if (i->Match(SIG_TYPE_I16, SIG_TYPE_I16, SIG_TYPE_I16C)) { - Reg16 dest, src1; - IntBinaryOpVC(e, i, vv_fn, vc_fn, dest, src1, i->src2.value); - } else if (i->Match(SIG_TYPE_I16, SIG_TYPE_I16C, SIG_TYPE_I16)) { - Reg16 dest, src2; - IntBinaryOpCV(e, i, vv_fn, vc_fn, dest, i->src1.value, src2); - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_I32, SIG_TYPE_I32)) { - Reg32 dest, src1, src2; - IntBinaryOpVV(e, i, vv_fn, dest, src1, src2); - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_I32, SIG_TYPE_I32C)) { - Reg32 dest, src1; - IntBinaryOpVC(e, i, vv_fn, vc_fn, dest, src1, i->src2.value); - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_I32C, SIG_TYPE_I32)) { - Reg32 dest, src2; - IntBinaryOpCV(e, i, vv_fn, vc_fn, dest, i->src1.value, src2); - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_I64, SIG_TYPE_I64)) { - Reg64 dest, src1, src2; - IntBinaryOpVV(e, i, vv_fn, dest, src1, src2); - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_I64, SIG_TYPE_I64C)) { - Reg64 dest, src1; - IntBinaryOpVC(e, i, vv_fn, vc_fn, dest, src1, i->src2.value); - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_I64C, SIG_TYPE_I64)) { - Reg64 dest, src2; - IntBinaryOpCV(e, i, vv_fn, vc_fn, dest, i->src1.value, src2); - // Start forced src2=i8 - } else if (i->Match(SIG_TYPE_I16, SIG_TYPE_I16, SIG_TYPE_I8)) { - Reg16 dest, src1; - Reg8 src2; - IntBinaryOpVV(e, i, vv_fn, dest, src1, src2); - } else if (i->Match(SIG_TYPE_I16, SIG_TYPE_I16, SIG_TYPE_I8C)) { - Reg16 dest, src1; - IntBinaryOpVC(e, i, vv_fn, vc_fn, dest, src1, i->src2.value); - } else if (i->Match(SIG_TYPE_I16, SIG_TYPE_I16C, SIG_TYPE_I8)) { - Reg16 dest; - Reg8 src2; - IntBinaryOpCV(e, i, vv_fn, vc_fn, dest, i->src1.value, src2); - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_I32, SIG_TYPE_I8)) { - Reg32 dest, src1; - Reg8 src2; - IntBinaryOpVV(e, i, vv_fn, dest, src1, src2); - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_I32, SIG_TYPE_I8C)) { - Reg32 dest, src1; - IntBinaryOpVC(e, i, vv_fn, vc_fn, dest, src1, i->src2.value); - } else if (i->Match(SIG_TYPE_I32, SIG_TYPE_I32C, SIG_TYPE_I8)) { - Reg32 dest; - Reg8 src2; - IntBinaryOpCV(e, i, vv_fn, vc_fn, dest, i->src1.value, src2); - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_I64, SIG_TYPE_I8)) { - Reg64 dest, src1; - Reg8 src2; - IntBinaryOpVV(e, i, vv_fn, dest, src1, src2); - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_I64, SIG_TYPE_I8C)) { - Reg64 dest, src1; - IntBinaryOpVC(e, i, vv_fn, vc_fn, dest, src1, i->src2.value); - } else if (i->Match(SIG_TYPE_I64, SIG_TYPE_I64C, SIG_TYPE_I8)) { - Reg64 dest; - Reg8 src2; - IntBinaryOpCV(e, i, vv_fn, vc_fn, dest, i->src1.value, src2); - } else { - ASSERT_INVALID_TYPE(); - } - if (i->flags & ARITHMETIC_SET_CARRY) { - StoreEflags(e); - } -}; - -typedef void(vvv_fn)(X64Emitter& e, Instr& i, const Reg& dest_src1, const Operand& src2, const Operand& src3); -typedef void(vvc_fn)(X64Emitter& e, Instr& i, const Reg& dest_src1, const Operand& src2, uint32_t src3); -typedef void(vcv_fn)(X64Emitter& e, Instr& i, const Reg& dest_src1, uint32_t src2, const Operand& src3); -template -void IntTernaryOpVVV(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, - TD& dest, TS1& src1, TS2& src2, TS3& src3) { - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0, - i->src2.value, src2, 0, - i->src3.value, src3, 0); - if (dest.getIdx() == src1.getIdx()) { - vvv_fn(e, *i, dest, src2, src3); - } else if (dest.getIdx() == src2.getIdx()) { - if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { - vvv_fn(e, *i, dest, src1, src3); - } else { - UNIMPLEMENTED_SEQ(); - } - } else if (dest.getIdx() == src3.getIdx()) { - auto Ntx = TEMP_LIKE(src3); - e.mov(Ntx, src3); - e.mov(dest, src1); - vvv_fn(e, *i, dest, src2, Ntx); - } else { - e.mov(dest, src1); - vvv_fn(e, *i, dest, src2, src3); - } - e.EndOp(dest, src1, src2, src3); -} -template -void IntTernaryOpVVC(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn, - TD& dest, TS1& src1, TS2& src2, Value* src3) { - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0, - i->src2.value, src2, 0); - if (dest.getBit() <= 32) { - // 32-bit. - if (dest.getIdx() == src1.getIdx()) { - vvc_fn(e, *i, dest, src2, (uint32_t)src3->get_constant(CT())); - } else if (dest == src2) { - if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { - vvc_fn(e, *i, dest, src1, (uint32_t)src3->get_constant(CT())); - } else { - // Eww. - auto Ntx = TEMP_LIKE(src2); - e.mov(Ntx, src2); - e.mov(dest, src1); - vvc_fn(e, *i, dest, Ntx, (uint32_t)src3->get_constant(CT())); - } - } else { - e.mov(dest, src1); - vvc_fn(e, *i, dest, src2, (uint32_t)src3->get_constant(CT())); - } - } else { - // 64-bit. - if (dest.getIdx() == src1.getIdx()) { - e.mov(TEMP_REG, src3->constant.i64); - vvv_fn(e, *i, dest, src2, TEMP_REG); - } else if (dest.getIdx() == src2.getIdx()) { - if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { - e.mov(TEMP_REG, src3->constant.i64); - vvv_fn(e, *i, dest, src1, TEMP_REG); - } else { - // Eww. - e.mov(TEMP_REG, src1); - e.mov(src1, src2); - e.mov(dest, TEMP_REG); - e.mov(TEMP_REG, src3->constant.i64); - vvv_fn(e, *i, dest, src1, TEMP_REG); - } - } else { - e.mov(TEMP_REG, src3->constant.i64); - e.mov(dest, src1); - vvv_fn(e, *i, dest, src2, TEMP_REG); - } - } - e.EndOp(dest, src1, src2); -} -template -void IntTernaryOpVCV(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vcv_fn vcv_fn, - TD& dest, TS1& src1, Value* src2, TS3& src3) { - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0, - i->src3.value, src3, 0); - if (dest.getBit() <= 32) { - // 32-bit. - if (dest.getIdx() == src1.getIdx()) { - vcv_fn(e, *i, dest, (uint32_t)src2->get_constant(CT()), src3); - } else if (dest.getIdx() == src3.getIdx()) { - if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { - vcv_fn(e, *i, dest, (uint32_t)src2->get_constant(CT()), src1); - } else { - // Eww. - auto Ntx = TEMP_LIKE(src3); - e.mov(Ntx, src3); - e.mov(dest, src1); - vcv_fn(e, *i, dest, (uint32_t)src2->get_constant(CT()), Ntx); - } - } else { - e.mov(dest, src1); - vcv_fn(e, *i, dest, (uint32_t)src2->get_constant(CT()), src3); - } - } else { - // 64-bit. - if (dest.getIdx() == src1.getIdx()) { - e.mov(TEMP_REG, src2->constant.i64); - vvv_fn(e, *i, dest, TEMP_REG, src3); - } else if (dest.getIdx() == src3.getIdx()) { - if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { - e.mov(TEMP_REG, src2->constant.i64); - vvv_fn(e, *i, dest, src1, TEMP_REG); - } else { - // Eww. - e.mov(TEMP_REG, src1); - e.mov(src1, src3); - e.mov(dest, TEMP_REG); - e.mov(TEMP_REG, src2->constant.i64); - vvv_fn(e, *i, dest, TEMP_REG, src1); - } - } else { - e.mov(TEMP_REG, src2->constant.i64); - e.mov(dest, src1); - vvv_fn(e, *i, dest, TEMP_REG, src3); - } - } - e.EndOp(dest, src1, src3); -} -void IntTernaryOp(X64Emitter& e, Instr*& i, vvv_fn vvv_fn, vvc_fn vvc_fn, vcv_fn vcv_fn) { - // TODO(benvanik): table lookup. This linear scan is slow. - // Note: we assume DEST.type = SRC1.type = SRC2.type, but that SRC3.type may vary. - XEASSERT(i->dest->type == i->src1.value->type && - i->dest->type == i->src2.value->type); - // TODO(benvanik): table lookup. - if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I8, SIG_TYPE_I8, SIG_TYPE_I8)) { - Reg8 dest, src1, src2; - Reg8 src3; - IntTernaryOpVVV(e, i, vvv_fn, dest, src1, src2, src3); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I8, SIG_TYPE_I8, SIG_TYPE_I8C)) { - Reg8 dest, src1, src2; - IntTernaryOpVVC(e, i, vvv_fn, vvc_fn, dest, src1, src2, i->src3.value); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I16, SIG_TYPE_I16, SIG_TYPE_I8)) { - Reg16 dest, src1, src2; - Reg8 src3; - IntTernaryOpVVV(e, i, vvv_fn, dest, src1, src2, src3); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I16, SIG_TYPE_I16, SIG_TYPE_I8C)) { - Reg16 dest, src1, src2; - IntTernaryOpVVC(e, i, vvv_fn, vvc_fn, dest, src1, src2, i->src3.value); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I32, SIG_TYPE_I32, SIG_TYPE_I8)) { - Reg32 dest, src1, src2; - Reg8 src3; - IntTernaryOpVVV(e, i,vvv_fn, dest, src1, src2, src3); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I32, SIG_TYPE_I32, SIG_TYPE_I8C)) { - Reg32 dest, src1, src2; - IntTernaryOpVVC(e, i, vvv_fn, vvc_fn, dest, src1, src2, i->src3.value); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I64, SIG_TYPE_I64, SIG_TYPE_I8)) { - Reg64 dest, src1, src2; - Reg8 src3; - IntTernaryOpVVV(e, i, vvv_fn, dest, src1, src2, src3); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I64, SIG_TYPE_I64, SIG_TYPE_I8C)) { - Reg64 dest, src1, src2; - IntTernaryOpVVC(e, i, vvv_fn, vvc_fn, dest, src1, src2, i->src3.value); - // - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I8, SIG_TYPE_I8C, SIG_TYPE_I8)) { - Reg8 dest, src1; - Reg8 src3; - IntTernaryOpVCV(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I16, SIG_TYPE_I16C, SIG_TYPE_I8)) { - Reg16 dest, src1; - Reg8 src3; - IntTernaryOpVCV(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I32, SIG_TYPE_I32C, SIG_TYPE_I8)) { - Reg32 dest, src1; - Reg8 src3; - IntTernaryOpVCV(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3); - } else if (i->Match(SIG_TYPE_IGNORE, SIG_TYPE_I64, SIG_TYPE_I64C, SIG_TYPE_I8)) { - Reg64 dest, src1; - Reg8 src3; - IntTernaryOpVCV(e, i, vvv_fn, vcv_fn, dest, src1, i->src2.value, src3); - } else { - ASSERT_INVALID_TYPE(); - } - if (i->flags & ARITHMETIC_SET_CARRY) { - StoreEflags(e); - } -} - -// Since alot of SSE ops can take dest + src, just do that. -// Worst case the callee can dedupe. -typedef void(xmm_v_fn)(X64Emitter& e, Instr& i, const Xmm& dest, const Xmm& src); -void XmmUnaryOpV(X64Emitter& e, Instr*& i, xmm_v_fn v_fn, - Xmm& dest, Xmm& src1) { - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0); - v_fn(e, *i, dest, src1); - e.EndOp(dest, src1); -} -void XmmUnaryOpC(X64Emitter& e, Instr*& i, xmm_v_fn v_fn, - Xmm& dest, Value* src1) { - e.BeginOp(i->dest, dest, REG_DEST); - if (src1->type == FLOAT32_TYPE) { - e.mov(e.eax, (uint32_t)src1->constant.i32); - e.movd(dest, e.eax); - } else if (src1->type == FLOAT64_TYPE) { - e.mov(e.rax, (uint64_t)src1->constant.i64); - e.movq(dest, e.rax); - } else { - LoadXmmConstant(e, dest, src1->constant.v128); - } - v_fn(e, *i, dest, dest); - e.EndOp(dest); -} -void XmmUnaryOp(X64Emitter& e, Instr*& i, uint32_t flags, xmm_v_fn v_fn) { - if (IsFloatType(i->src1.value->type)) { - if (i->Match(SIG_TYPE_F32, SIG_TYPE_F32)) { - Xmm dest, src1; - XmmUnaryOpV(e, i, v_fn, dest, src1); - } else if (i->Match(SIG_TYPE_F32, SIG_TYPE_F32C)) { - Xmm dest; - XmmUnaryOpC(e, i, v_fn, dest, i->src1.value); - } else if (i->Match(SIG_TYPE_F64, SIG_TYPE_F64)) { - Xmm dest, src1; - XmmUnaryOpV(e, i, v_fn, dest, src1); - } else if (i->Match(SIG_TYPE_F64, SIG_TYPE_F64C)) { - Xmm dest; - XmmUnaryOpC(e, i, v_fn, dest, i->src1.value); - } else { - ASSERT_INVALID_TYPE(); - } - } else if (IsVecType(i->src1.value->type)) { - if (i->Match(SIG_TYPE_V128, SIG_TYPE_V128)) { - Xmm dest, src1; - XmmUnaryOpV(e, i, v_fn, dest, src1); - } else if (i->Match(SIG_TYPE_V128, SIG_TYPE_V128C)) { - Xmm dest; - XmmUnaryOpC(e, i, v_fn, dest, i->src1.value); - } else { - ASSERT_INVALID_TYPE(); - } - } else { - ASSERT_INVALID_TYPE(); - } -}; - -// TODO(benvanik): allow a vvv form for dest = src1 + src2 that new SSE -// ops support. -typedef void(xmm_vv_fn)(X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src); -void XmmBinaryOpVV(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn, - Xmm& dest, Xmm& src1, Xmm& src2) { - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0, - i->src2.value, src2, 0); - if (dest.getIdx() == src1.getIdx()) { - vv_fn(e, *i, dest, src2); - } else if (dest.getIdx() == src2.getIdx()) { - if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { - vv_fn(e, *i, dest, src1); - } else { - // Eww. - e.movaps(e.xmm0, src1); - vv_fn(e, *i, e.xmm0, src2); - e.movaps(dest, e.xmm0); - } - } else { - e.movaps(dest, src1); - vv_fn(e, *i, dest, src2); - } - e.EndOp(dest, src1, src2); -} -void XmmBinaryOpVC(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn, - Xmm& dest, Xmm& src1, Value* src2) { - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0); - if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { - if (src2->type == FLOAT32_TYPE) { - e.mov(e.eax, (uint32_t)src2->constant.i32); - e.movss(dest, e.eax); - } else if (src2->type == FLOAT64_TYPE) { - e.mov(e.rax, (uint64_t)src2->constant.i64); - e.movsd(dest, e.rax); - } else { - LoadXmmConstant(e, dest, src2->constant.v128); - } - vv_fn(e, *i, dest, src1); - } else { - if (dest.getIdx() != src1.getIdx()) { - e.movaps(dest, src1); - } - if (src2->type == FLOAT32_TYPE) { - e.mov(e.eax, (uint32_t)src2->constant.i32); - e.movss(e.xmm0, e.eax); - } else if (src2->type == FLOAT64_TYPE) { - e.mov(e.rax, (uint64_t)src2->constant.i64); - e.movsd(e.xmm0, e.rax); - } else { - LoadXmmConstant(e, e.xmm0, src2->constant.v128); - } - vv_fn(e, *i, dest, e.xmm0); - } - e.EndOp(dest, src1); -} -void XmmBinaryOpCV(X64Emitter& e, Instr*& i, xmm_vv_fn vv_fn, - Xmm& dest, Value* src1, Xmm& src2) { - e.BeginOp(i->dest, dest, REG_DEST, - i->src2.value, src2, 0); - if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { - if (src1->type == FLOAT32_TYPE) { - e.mov(e.eax, (uint32_t)src1->constant.i32); - e.movss(dest, e.eax); - } else if (src1->type == FLOAT64_TYPE) { - e.mov(e.rax, (uint64_t)src1->constant.i64); - e.movsd(dest, e.rax); - } else { - LoadXmmConstant(e, dest, src1->constant.v128); - } - vv_fn(e, *i, dest, src2); - } else { - auto real_src2 = src2; - if (dest.getIdx() == src2.getIdx()) { - e.movaps(e.xmm0, src2); - real_src2 = e.xmm0; - } - if (src1->type == FLOAT32_TYPE) { - e.mov(e.eax, (uint32_t)src1->constant.i32); - e.movss(dest, e.eax); - } else if (src1->type == FLOAT64_TYPE) { - e.mov(e.rax, (uint64_t)src1->constant.i64); - e.movsd(dest, e.rax); - } else { - LoadXmmConstant(e, dest, src1->constant.v128); - } - vv_fn(e, *i, dest, real_src2); - } - e.EndOp(dest, src2); -} -void XmmBinaryOp(X64Emitter& e, Instr*& i, uint32_t flags, xmm_vv_fn vv_fn) { - // TODO(benvanik): table lookup. This linear scan is slow. - if (!i->src1.value->IsConstant() && !i->src2.value->IsConstant()) { - Xmm dest, src1, src2; - XmmBinaryOpVV(e, i, vv_fn, dest, src1, src2); - } else if (!i->src1.value->IsConstant() && i->src2.value->IsConstant()) { - Xmm dest, src1; - XmmBinaryOpVC(e, i, vv_fn, dest, src1, i->src2.value); - } else if (i->src1.value->IsConstant() && !i->src2.value->IsConstant()) { - Xmm dest, src2; - XmmBinaryOpCV(e, i, vv_fn, dest, i->src1.value, src2); - } else { - ASSERT_INVALID_TYPE(); - } - if (flags & ARITHMETIC_SET_CARRY) { - StoreEflags(e); - } -}; - -typedef void(xmm_vvv_fn)(X64Emitter& e, Instr& i, const Xmm& dest_src, const Xmm& src2, const Xmm& src3); -void XmmTernaryOpVVV(X64Emitter& e, Instr*& i, xmm_vvv_fn vvv_fn, - Xmm& dest, Xmm& src1, Xmm& src2, Xmm& src3) { - e.BeginOp(i->dest, dest, REG_DEST, - i->src1.value, src1, 0, - i->src2.value, src2, 0, - i->src3.value, src3, 0); - if (dest.getIdx() == src1.getIdx()) { - vvv_fn(e, *i, dest, src2, src3); - } else if (dest.getIdx() == src2.getIdx()) { - if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { - vvv_fn(e, *i, dest, src1, src3); - } else { - // Eww. - e.movaps(e.xmm0, src1); - vvv_fn(e, *i, e.xmm0, src2, src3); - e.movaps(dest, e.xmm0); - } - } else if (dest.getIdx() == src3.getIdx()) { - if (i->opcode->flags & OPCODE_FLAG_COMMUNATIVE) { - vvv_fn(e, *i, dest, src1, src2); - } else { - e.movaps(e.xmm0, src3); - e.movaps(dest, src1); - vvv_fn(e, *i, dest, src2, e.xmm0); - } - } else { - e.movaps(dest, src1); - vvv_fn(e, *i, dest, src2, src3); - } - e.EndOp(dest, src1, src2, src3); -} -void XmmTernaryOp(X64Emitter& e, Instr*& i, uint32_t flags, xmm_vvv_fn vvv_fn) { - // TODO(benvanik): table lookup. This linear scan is slow. - if (!i->src1.value->IsConstant() && !i->src2.value->IsConstant() && - !i->src3.value->IsConstant()) { - Xmm dest, src1, src2, src3; - XmmTernaryOpVVV(e, i, vvv_fn, dest, src1, src2, src3); - } else { - ASSERT_INVALID_TYPE(); - } - if (flags & ARITHMETIC_SET_CARRY) { - StoreEflags(e); - } -}; - -} // namespace - -#endif // ALLOY_BACKEND_X64_X64_LOWERING_OP_UTILS_INL_ diff --git a/src/alloy/backend/x64/lowering/sources.gypi b/src/alloy/backend/x64/lowering/sources.gypi deleted file mode 100644 index d6cdeb1bb..000000000 --- a/src/alloy/backend/x64/lowering/sources.gypi +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright 2013 Ben Vanik. All Rights Reserved. -{ - 'sources': [ - 'lowering_sequences.cc', - 'lowering_sequences.h', - 'lowering_table.cc', - 'lowering_table.h', - 'op_utils.inl', - 'tracers.cc', - 'tracers.h', - ], -} diff --git a/src/alloy/backend/x64/sources.gypi b/src/alloy/backend/x64/sources.gypi index 7ca63e25d..38167e3f1 100644 --- a/src/alloy/backend/x64/sources.gypi +++ b/src/alloy/backend/x64/sources.gypi @@ -12,11 +12,12 @@ 'x64_emitter.h', 'x64_function.cc', 'x64_function.h', + 'x64_sequence.inl', + 'x64_sequences.cc', + 'x64_sequences.h', 'x64_thunk_emitter.cc', 'x64_thunk_emitter.h', - ], - - 'includes': [ - 'lowering/sources.gypi', + 'x64_tracers.cc', + 'x64_tracers.h', ], } diff --git a/src/alloy/backend/x64/x64_backend.cc b/src/alloy/backend/x64/x64_backend.cc index 076ab1cbb..40283f6d2 100644 --- a/src/alloy/backend/x64/x64_backend.cc +++ b/src/alloy/backend/x64/x64_backend.cc @@ -12,26 +12,23 @@ #include #include #include +#include #include -#include -#include using namespace alloy; using namespace alloy::backend; using namespace alloy::backend::x64; -using namespace alloy::backend::x64::lowering; using namespace alloy::runtime; X64Backend::X64Backend(Runtime* runtime) : - code_cache_(0), lowering_table_(0), + code_cache_(0), Backend(runtime) { } X64Backend::~X64Backend() { alloy::tracing::WriteEvent(EventType::Deinit({ })); - delete lowering_table_; delete code_cache_; } @@ -41,6 +38,8 @@ int X64Backend::Initialize() { return result; } + RegisterSequences(); + machine_info_.register_sets[0] = { 0, "gpr", @@ -68,9 +67,6 @@ int X64Backend::Initialize() { delete thunk_emitter; delete allocator; - lowering_table_ = new LoweringTable(this); - RegisterSequences(lowering_table_); - alloy::tracing::WriteEvent(EventType::Init({ })); diff --git a/src/alloy/backend/x64/x64_backend.h b/src/alloy/backend/x64/x64_backend.h index dd12c0347..0ff3018cd 100644 --- a/src/alloy/backend/x64/x64_backend.h +++ b/src/alloy/backend/x64/x64_backend.h @@ -20,7 +20,6 @@ namespace backend { namespace x64 { class X64CodeCache; -namespace lowering { class LoweringTable; } #define ALLOY_HAS_X64_BACKEND 1 @@ -38,8 +37,6 @@ public: HostToGuestThunk host_to_guest_thunk() const { return host_to_guest_thunk_; } GuestToHostThunk guest_to_host_thunk() const { return guest_to_host_thunk_; } - lowering::LoweringTable* lowering_table() const { return lowering_table_; } - virtual int Initialize(); virtual Assembler* CreateAssembler(); @@ -48,8 +45,6 @@ private: X64CodeCache* code_cache_; HostToGuestThunk host_to_guest_thunk_; GuestToHostThunk guest_to_host_thunk_; - - lowering::LoweringTable* lowering_table_; }; diff --git a/src/alloy/backend/x64/x64_emitter.cc b/src/alloy/backend/x64/x64_emitter.cc index 80ed2cbca..ce1e4e70a 100644 --- a/src/alloy/backend/x64/x64_emitter.cc +++ b/src/alloy/backend/x64/x64_emitter.cc @@ -11,10 +11,14 @@ #include #include +#include +#include #include -#include #include #include +#include +#include +#include using namespace alloy; using namespace alloy::backend; @@ -31,6 +35,13 @@ namespace x64 { static const size_t MAX_CODE_SIZE = 1 * 1024 * 1024; +static const size_t STASH_OFFSET = 32; + +// If we are running with tracing on we have to store the EFLAGS in the stack, +// otherwise our calls out to C to print will clear it before DID_CARRY/etc +// can get the value. +#define STORE_EFLAGS 1 + } // namespace x64 } // namespace backend } // namespace alloy @@ -145,12 +156,9 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) { mov(qword[rsp + StackLayout::GUEST_RCX_HOME], rcx); mov(qword[rsp + StackLayout::GUEST_RET_ADDR], rdx); mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], 0); - // ReloadRDX: mov(rdx, qword[rcx + 8]); // membase } - auto lowering_table = backend_->lowering_table(); - // Body. auto block = builder->first_block(); while (block) { @@ -161,12 +169,17 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) { label = label->next; } - // Add instructions. - // The table will process sequences of instructions to (try to) - // generate optimal code. - current_instr_ = block->instr_head; - if (lowering_table->ProcessBlock(*this, block)) { - return 1; + // Process instructions. + const Instr* instr = block->instr_head; + while (instr) { + const Instr* new_tail = instr; + if (!SelectSequence(*this, instr, &new_tail)) { + // No sequence found! + XEASSERTALWAYS(); + XELOGE("Unable to process HIR opcode %s", instr->opcode->name); + break; + } + instr = new_tail; } block = block->next; @@ -191,16 +204,320 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) { return 0; } -Instr* X64Emitter::Advance(Instr* i) { - auto next = i->next; - current_instr_ = next; - return next; -} - -void X64Emitter::MarkSourceOffset(Instr* i) { +void X64Emitter::MarkSourceOffset(const Instr* i) { auto entry = source_map_arena_.Alloc(); entry->source_offset = i->src1.offset; entry->hir_offset = uint32_t(i->block->ordinal << 16) | i->ordinal; entry->code_offset = getSize(); source_map_count_++; } + +void X64Emitter::DebugBreak() { + // TODO(benvanik): notify debugger. + db(0xCC); +} + +void X64Emitter::Trap() { + // TODO(benvanik): notify debugger. + db(0xCC); +} + +void X64Emitter::UnimplementedInstr(const hir::Instr* i) { + // TODO(benvanik): notify debugger. + db(0xCC); + XEASSERTALWAYS(); +} + +uint64_t ResolveFunctionSymbol(void* raw_context, uint64_t symbol_info_ptr) { + // TODO(benvanik): generate this thunk at runtime? or a shim? + auto thread_state = *reinterpret_cast(raw_context); + auto symbol_info = reinterpret_cast(symbol_info_ptr); + + Function* fn = NULL; + thread_state->runtime()->ResolveFunction(symbol_info->address(), &fn); + XEASSERTNOTNULL(fn); + auto x64_fn = static_cast(fn); + return reinterpret_cast(x64_fn->machine_code()); +} + +void X64Emitter::Call(const hir::Instr* instr, runtime::FunctionInfo* symbol_info) { + auto fn = reinterpret_cast(symbol_info->function()); + // Resolve address to the function to call and store in rax. + // TODO(benvanik): caching/etc. For now this makes debugging easier. + if (fn) { + mov(rax, reinterpret_cast(fn->machine_code())); + } else { + CallNative(ResolveFunctionSymbol, reinterpret_cast(symbol_info)); + } + + // Actually jump/call to rax. + if (instr->flags & CALL_TAIL) { + // Pass the callers return address over. + mov(rdx, qword[rsp + StackLayout::GUEST_RET_ADDR]); + + add(rsp, static_cast(stack_size())); + jmp(rax); + } else { + // Return address is from the previous SET_RETURN_ADDRESS. + mov(rdx, qword[rsp + StackLayout::GUEST_CALL_RET_ADDR]); + call(rax); + } +} + +uint64_t ResolveFunctionAddress(void* raw_context, uint64_t target_address) { + // TODO(benvanik): generate this thunk at runtime? or a shim? + auto thread_state = *reinterpret_cast(raw_context); + + // TODO(benvanik): required? + target_address &= 0xFFFFFFFF; + + Function* fn = NULL; + thread_state->runtime()->ResolveFunction(target_address, &fn); + XEASSERTNOTNULL(fn); + auto x64_fn = static_cast(fn); + return reinterpret_cast(x64_fn->machine_code()); +} + +void X64Emitter::CallIndirect(const hir::Instr* instr, const Reg64& reg) { + // Check if return. + if (instr->flags & CALL_POSSIBLE_RETURN) { + cmp(reg.cvt32(), dword[rsp + StackLayout::GUEST_RET_ADDR]); + je("epilog", CodeGenerator::T_NEAR); + } + + // Resolve address to the function to call and store in rax. + // TODO(benvanik): caching/etc. For now this makes debugging easier. + if (reg.getIdx() != rdx.getIdx()) { + mov(rdx, reg); + } + CallNative(ResolveFunctionAddress); + + // Actually jump/call to rax. + if (instr->flags & CALL_TAIL) { + // Pass the callers return address over. + mov(rdx, qword[rsp + StackLayout::GUEST_RET_ADDR]); + + add(rsp, static_cast(stack_size())); + jmp(rax); + } else { + // Return address is from the previous SET_RETURN_ADDRESS. + mov(rdx, qword[rsp + StackLayout::GUEST_CALL_RET_ADDR]); + call(rax); + } +} + +uint64_t UndefinedCallExtern(void* raw_context, uint64_t symbol_info_ptr) { + auto symbol_info = reinterpret_cast(symbol_info_ptr); + XELOGW("undefined extern call to %.8X %s", + symbol_info->address(), + symbol_info->name()); + return 0; +} +void X64Emitter::CallExtern(const hir::Instr* instr, const FunctionInfo* symbol_info) { + XEASSERT(symbol_info->behavior() == FunctionInfo::BEHAVIOR_EXTERN); + if (!symbol_info->extern_handler()) { + CallNative(UndefinedCallExtern, reinterpret_cast(symbol_info)); + } else { + // rcx = context + // rdx = target host function + // r8 = arg0 + // r9 = arg1 + mov(rdx, reinterpret_cast(symbol_info->extern_handler())); + mov(r8, reinterpret_cast(symbol_info->extern_arg0())); + mov(r9, reinterpret_cast(symbol_info->extern_arg1())); + auto thunk = backend()->guest_to_host_thunk(); + mov(rax, reinterpret_cast(thunk)); + call(rax); + ReloadECX(); + ReloadEDX(); + // rax = host return + } +} + +void X64Emitter::CallNative(void* fn) { + mov(rax, reinterpret_cast(fn)); + call(rax); + ReloadECX(); + ReloadEDX(); +} + +void X64Emitter::CallNative(uint64_t(*fn)(void* raw_context)) { + mov(rax, reinterpret_cast(fn)); + call(rax); + ReloadECX(); + ReloadEDX(); +} + +void X64Emitter::CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0)) { + mov(rax, reinterpret_cast(fn)); + call(rax); + ReloadECX(); + ReloadEDX(); +} + +void X64Emitter::CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0), uint64_t arg0) { + mov(rdx, arg0); + mov(rax, reinterpret_cast(fn)); + call(rax); + ReloadECX(); + ReloadEDX(); +} + +void X64Emitter::SetReturnAddress(uint64_t value) { + mov(qword[rsp + StackLayout::GUEST_CALL_RET_ADDR], value); +} + +void X64Emitter::ReloadECX() { + mov(rcx, qword[rsp + StackLayout::GUEST_RCX_HOME]); +} + +void X64Emitter::ReloadEDX() { + mov(rdx, qword[rcx + 8]); // membase +} + +void X64Emitter::LoadEflags() { +#if STORE_EFLAGS + mov(eax, dword[rsp + STASH_OFFSET]); + push(rax); + popf(); +#else + // EFLAGS already present. +#endif // STORE_EFLAGS +} + +void X64Emitter::StoreEflags() { +#if STORE_EFLAGS + pushf(); + pop(qword[rsp + STASH_OFFSET]); +#else + // EFLAGS should have CA set? + // (so long as we don't fuck with it) +#endif // STORE_EFLAGS +} + +bool X64Emitter::ConstantFitsIn32Reg(uint64_t v) { + if ((v & ~0x7FFFFFFF) == 0) { + // Fits under 31 bits, so just load using normal mov. + return true; + } else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) { + // Negative number that fits in 32bits. + return true; + } + return false; +} + +void X64Emitter::MovMem64(const RegExp& addr, uint64_t v) { + if ((v & ~0x7FFFFFFF) == 0) { + // Fits under 31 bits, so just load using normal mov. + mov(qword[addr], v); + } else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) { + // Negative number that fits in 32bits. + mov(qword[addr], v); + } else if (!(v >> 32)) { + // All high bits are zero. It'd be nice if we had a way to load a 32bit + // immediate without sign extending! + // TODO(benvanik): this is super common, find a better way. + mov(dword[addr], static_cast(v)); + mov(dword[addr + 4], 0); + } else { + // 64bit number that needs double movs. + mov(dword[addr], static_cast(v)); + mov(dword[addr + 4], static_cast(v >> 32)); + } +} + +Address X64Emitter::GetXmmConstPtr(XmmConst id) { + static const vec128_t xmm_consts[] = { + /* XMMZero */ vec128f(0.0f, 0.0f, 0.0f, 0.0f), + /* XMMOne */ vec128f(1.0f, 1.0f, 1.0f, 1.0f), + /* XMMNegativeOne */ vec128f(-1.0f, -1.0f, -1.0f, -1.0f), + /* XMMMaskX16Y16 */ vec128i(0x0000FFFF, 0xFFFF0000, 0x00000000, 0x00000000), + /* XMMFlipX16Y16 */ vec128i(0x00008000, 0x00000000, 0x00000000, 0x00000000), + /* XMMFixX16Y16 */ vec128f(-32768.0f, 0.0f, 0.0f, 0.0f), + /* XMMNormalizeX16Y16 */ vec128f(1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f), + /* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f), + /* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u), + /* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, 0x00000000u, 0x80000000u), + /* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, 0x0C0D0E0Fu), + /* XMMPermuteControl15 */ vec128b(15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15), + /* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF02, 0xFFFFFF01, 0xFFFFFF00, 0xFFFFFF02), + /* XMMOneOver255 */ vec128f(1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f), + /* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu, 0x0000001Fu, 0x0000001Fu), + /* XMMOneMask */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu), + }; + // TODO(benvanik): cache base pointer somewhere? stack? It'd be nice to + // prevent this move. + // TODO(benvanik): move to predictable location in PPCContext? could then + // just do rcx relative addression with no rax overwriting. + mov(rax, (uint64_t)&xmm_consts[id]); + return ptr[rax]; +} + +void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v) { + // http://www.agner.org/optimize/optimizing_assembly.pdf + // 13.4 Generating constants + if (!v.low && !v.high) { + // 0000... + vpxor(dest, dest); + } else if (v.low == ~0ull && v.high == ~0ull) { + // 1111... + vmovaps(dest, GetXmmConstPtr(XMMOneMask)); + } else { + // TODO(benvanik): see what other common values are. + // TODO(benvanik): build constant table - 99% are reused. + MovMem64(rsp + STASH_OFFSET, v.low); + MovMem64(rsp + STASH_OFFSET + 8, v.high); + vmovdqa(dest, ptr[rsp + STASH_OFFSET]); + } +} + +void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, float v) { + union { + float f; + uint32_t i; + } x = { v }; + if (!v) { + // 0 + vpxor(dest, dest); + } else if (x.i == ~0UL) { + // 1111... + vmovaps(dest, GetXmmConstPtr(XMMOneMask)); + } else { + // TODO(benvanik): see what other common values are. + // TODO(benvanik): build constant table - 99% are reused. + mov(eax, x.i); + vmovd(dest, eax); + } +} + +void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, double v) { + union { + double d; + uint64_t i; + } x = { v }; + if (!v) { + // 0 + vpxor(dest, dest); + } else if (x.i == ~0ULL) { + // 1111... + vmovaps(dest, GetXmmConstPtr(XMMOneMask)); + } else { + // TODO(benvanik): see what other common values are. + // TODO(benvanik): build constant table - 99% are reused. + mov(rax, x.i); + vmovq(dest, rax); + } +} + +Address X64Emitter::StashXmm(const Xmm& r) { + auto addr = ptr[rsp + STASH_OFFSET]; + vmovups(addr, r); + return addr; +} + +Address X64Emitter::StashXmm(const vec128_t& v) { + auto addr = ptr[rsp + STASH_OFFSET]; + LoadConstantXmm(xmm0, v); + vmovups(addr, xmm0); + return addr; +} diff --git a/src/alloy/backend/x64/x64_emitter.h b/src/alloy/backend/x64/x64_emitter.h index e006bf3f9..93f859616 100644 --- a/src/alloy/backend/x64/x64_emitter.h +++ b/src/alloy/backend/x64/x64_emitter.h @@ -19,7 +19,9 @@ XEDECLARECLASS2(alloy, hir, HIRBuilder); XEDECLARECLASS2(alloy, hir, Instr); XEDECLARECLASS2(alloy, runtime, DebugInfo); +XEDECLARECLASS2(alloy, runtime, FunctionInfo); XEDECLARECLASS2(alloy, runtime, Runtime); +XEDECLARECLASS2(alloy, runtime, SymbolInfo); namespace alloy { namespace backend { @@ -33,6 +35,25 @@ enum RegisterFlags { REG_ABCD = (1 << 1), }; +enum XmmConst { + XMMZero = 0, + XMMOne = 1, + XMMNegativeOne = 2, + XMMMaskX16Y16 = 3, + XMMFlipX16Y16 = 4, + XMMFixX16Y16 = 5, + XMMNormalizeX16Y16 = 6, + XMM3301 = 7, + XMMSignMaskPS = 8, + XMMSignMaskPD = 9, + XMMByteSwapMask = 10, + XMMPermuteControl15 = 11, + XMMUnpackD3DCOLOR = 12, + XMMOneOver255 = 13, + XMMShiftMaskPS = 14, + XMMOneMask = 15, +}; + // Unfortunately due to the design of xbyak we have to pass this to the ctor. class XbyakAllocator : public Xbyak::Allocator { public: @@ -54,79 +75,68 @@ public: void*& out_code_address, size_t& out_code_size); public: - template - void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags) { - SetupReg(v0, r0); - } - template - void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags, - hir::Value* v1, V1& r1, uint32_t r1_flags) { - SetupReg(v0, r0); - SetupReg(v1, r1); - } - template - void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags, - hir::Value* v1, V1& r1, uint32_t r1_flags, - hir::Value* v2, V2& r2, uint32_t r2_flags) { - SetupReg(v0, r0); - SetupReg(v1, r1); - SetupReg(v2, r2); - } - template - void BeginOp(hir::Value* v0, V0& r0, uint32_t r0_flags, - hir::Value* v1, V1& r1, uint32_t r1_flags, - hir::Value* v2, V2& r2, uint32_t r2_flags, - hir::Value* v3, V3& r3, uint32_t r3_flags) { - SetupReg(v0, r0); - SetupReg(v1, r1); - SetupReg(v2, r2); - SetupReg(v3, r3); - } - template - void EndOp(V0& r0) { - } - template - void EndOp(V0& r0, V1& r1) { - } - template - void EndOp(V0& r0, V1& r1, V2& r2) { - } - template - void EndOp(V0& r0, V1& r1, V2& r2, V3& r3) { - } - // Reserved: rsp // Scratch: rax/rcx/rdx - // xmm0-1 + // xmm0-2 (could be only xmm0 with some trickery) // Available: rbx, r12-r15 (save to get r8-r11, rbp, rsi, rdi?) - // xmm6-xmm15 (save to get xmm2-xmm5) + // xmm6-xmm15 (save to get xmm3-xmm5) static const int GPR_COUNT = 5; static const int XMM_COUNT = 10; - static void SetupReg(hir::Value* v, Xbyak::Reg8& r) { + static void SetupReg(const hir::Value* v, Xbyak::Reg8& r) { auto idx = gpr_reg_map_[v->reg.index]; r = Xbyak::Reg8(idx); } - static void SetupReg(hir::Value* v, Xbyak::Reg16& r) { + static void SetupReg(const hir::Value* v, Xbyak::Reg16& r) { auto idx = gpr_reg_map_[v->reg.index]; r = Xbyak::Reg16(idx); } - static void SetupReg(hir::Value* v, Xbyak::Reg32& r) { + static void SetupReg(const hir::Value* v, Xbyak::Reg32& r) { auto idx = gpr_reg_map_[v->reg.index]; r = Xbyak::Reg32(idx); } - static void SetupReg(hir::Value* v, Xbyak::Reg64& r) { + static void SetupReg(const hir::Value* v, Xbyak::Reg64& r) { auto idx = gpr_reg_map_[v->reg.index]; r = Xbyak::Reg64(idx); } - static void SetupReg(hir::Value* v, Xbyak::Xmm& r) { + static void SetupReg(const hir::Value* v, Xbyak::Xmm& r) { auto idx = xmm_reg_map_[v->reg.index]; r = Xbyak::Xmm(idx); } - hir::Instr* Advance(hir::Instr* i); + void MarkSourceOffset(const hir::Instr* i); - void MarkSourceOffset(hir::Instr* i); + void DebugBreak(); + void Trap(); + void UnimplementedInstr(const hir::Instr* i); + void UnimplementedExtern(const hir::Instr* i); + + void Call(const hir::Instr* instr, runtime::FunctionInfo* symbol_info); + void CallIndirect(const hir::Instr* instr, const Xbyak::Reg64& reg); + void CallExtern(const hir::Instr* instr, const runtime::FunctionInfo* symbol_info); + void CallNative(void* fn); + void CallNative(uint64_t(*fn)(void* raw_context)); + void CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0)); + void CallNative(uint64_t(*fn)(void* raw_context, uint64_t arg0), uint64_t arg0); + void SetReturnAddress(uint64_t value); + void ReloadECX(); + void ReloadEDX(); + + // TODO(benvanik): Label for epilog (don't use strings). + + void LoadEflags(); + void StoreEflags(); + + // Moves a 64bit immediate into memory. + bool ConstantFitsIn32Reg(uint64_t v); + void MovMem64(const Xbyak::RegExp& addr, uint64_t v); + + Xbyak::Address GetXmmConstPtr(XmmConst id); + void LoadConstantXmm(Xbyak::Xmm dest, float v); + void LoadConstantXmm(Xbyak::Xmm dest, double v); + void LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v); + Xbyak::Address StashXmm(const Xbyak::Xmm& r); + Xbyak::Address StashXmm(const vec128_t& v); size_t stack_size() const { return stack_size_; } diff --git a/src/alloy/backend/x64/x64_sequence.inl b/src/alloy/backend/x64/x64_sequence.inl new file mode 100644 index 000000000..ce2b8e36e --- /dev/null +++ b/src/alloy/backend/x64/x64_sequence.inl @@ -0,0 +1,714 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + + +namespace { + +enum KeyType { + KEY_TYPE_X = OPCODE_SIG_TYPE_X, + KEY_TYPE_L = OPCODE_SIG_TYPE_L, + KEY_TYPE_O = OPCODE_SIG_TYPE_O, + KEY_TYPE_S = OPCODE_SIG_TYPE_S, + KEY_TYPE_V_I8 = OPCODE_SIG_TYPE_V + INT8_TYPE, + KEY_TYPE_V_I16 = OPCODE_SIG_TYPE_V + INT16_TYPE, + KEY_TYPE_V_I32 = OPCODE_SIG_TYPE_V + INT32_TYPE, + KEY_TYPE_V_I64 = OPCODE_SIG_TYPE_V + INT64_TYPE, + KEY_TYPE_V_F32 = OPCODE_SIG_TYPE_V + FLOAT32_TYPE, + KEY_TYPE_V_F64 = OPCODE_SIG_TYPE_V + FLOAT64_TYPE, + KEY_TYPE_V_V128 = OPCODE_SIG_TYPE_V + VEC128_TYPE, +}; + +#pragma pack(push, 1) +union InstrKey { + struct { + uint32_t opcode : 8; + uint32_t dest : 5; + uint32_t src1 : 5; + uint32_t src2 : 5; + uint32_t src3 : 5; + uint32_t reserved : 4; + }; + uint32_t value; + + operator uint32_t() const { + return value; + } + + InstrKey() : value(0) {} + InstrKey(uint32_t v) : value(v) {} + InstrKey(const Instr* i) : value(0) { + opcode = i->opcode->num; + uint32_t sig = i->opcode->signature; + dest = GET_OPCODE_SIG_TYPE_DEST(sig) ? OPCODE_SIG_TYPE_V + i->dest->type : 0; + src1 = GET_OPCODE_SIG_TYPE_SRC1(sig); + if (src1 == OPCODE_SIG_TYPE_V) { + src1 += i->src1.value->type; + } + src2 = GET_OPCODE_SIG_TYPE_SRC2(sig); + if (src2 == OPCODE_SIG_TYPE_V) { + src2 += i->src2.value->type; + } + src3 = GET_OPCODE_SIG_TYPE_SRC3(sig); + if (src3 == OPCODE_SIG_TYPE_V) { + src3 += i->src3.value->type; + } + } + + template + struct Construct { + static const uint32_t value = + (OPCODE) | (DEST << 8) | (SRC1 << 13) | (SRC2 << 18) | (SRC3 << 23); + }; +}; +#pragma pack(pop) +static_assert(sizeof(InstrKey) <= 4, "Key must be 4 bytes"); + +template +struct CombinedStruct; +template <> +struct CombinedStruct<> {}; +template +struct CombinedStruct : T, CombinedStruct {}; + +struct OpBase {}; + +template +struct Op : OpBase { + static const KeyType key_type = KEY_TYPE; +}; + +struct VoidOp : Op { +protected: + template friend struct Op; + template friend struct I; + void Load(const Instr::Op& op) {} +}; + +struct OffsetOp : Op { + uint64_t value; +protected: + template friend struct Op; + template friend struct I; + void Load(const Instr::Op& op) { + this->value = op.offset; + } +}; + +struct SymbolOp : Op { + FunctionInfo* value; +protected: + template friend struct Op; + template friend struct I; + bool Load(const Instr::Op& op) { + this->value = op.symbol_info; + return true; + } +}; + +struct LabelOp : Op { + hir::Label* value; +protected: + template friend struct Op; + template friend struct I; + void Load(const Instr::Op& op) { + this->value = op.label; + } +}; + +template +struct ValueOp : Op, KEY_TYPE> { + typedef REG_TYPE reg_type; + static const int tag = TAG; + const Value* value; + bool is_constant; + virtual bool ConstantFitsIn32Reg() const { return true; } + const REG_TYPE& reg() const { + XEASSERT(!is_constant); + return reg_; + } + operator const REG_TYPE&() const { + return reg(); + } + bool IsEqual(const T& b) const { + if (is_constant && b.is_constant) { + return reinterpret_cast(this)->constant() == b.constant(); + } else if (!is_constant && !b.is_constant) { + return reg_.getIdx() == b.reg_.getIdx(); + } else { + return false; + } + } + bool IsEqual(const Xbyak::Reg& b) const { + if (is_constant) { + return false; + } else if (!is_constant) { + return reg_.getIdx() == b.getIdx(); + } else { + return false; + } + } + bool operator== (const T& b) const { + return IsEqual(b); + } + bool operator!= (const T& b) const { + return !IsEqual(b); + } + bool operator== (const Xbyak::Reg& b) const { + return IsEqual(b); + } + bool operator!= (const Xbyak::Reg& b) const { + return !IsEqual(b); + } + void Load(const Instr::Op& op) { + const Value* value = op.value; + this->value = value; + is_constant = value->IsConstant(); + if (!is_constant) { + X64Emitter::SetupReg(value, reg_); + } + } +protected: + REG_TYPE reg_; +}; + +template +struct I8 : ValueOp, KEY_TYPE_V_I8, Reg8, int8_t, TAG> { + const int8_t constant() const { + XEASSERT(is_constant); + return value->constant.i8; + } +}; +template +struct I16 : ValueOp, KEY_TYPE_V_I16, Reg16, int16_t, TAG> { + const int16_t constant() const { + XEASSERT(is_constant); + return value->constant.i16; + } +}; +template +struct I32 : ValueOp, KEY_TYPE_V_I32, Reg32, int32_t, TAG> { + const int32_t constant() const { + XEASSERT(is_constant); + return value->constant.i32; + } +}; +template +struct I64 : ValueOp, KEY_TYPE_V_I64, Reg64, int64_t, TAG> { + const int64_t constant() const { + XEASSERT(is_constant); + return value->constant.i64; + } + bool ConstantFitsIn32Reg() const override { + int64_t v = value->constant.i64; + if ((v & ~0x7FFFFFFF) == 0) { + // Fits under 31 bits, so just load using normal mov. + return true; + } else if ((v & ~0x7FFFFFFF) == ~0x7FFFFFFF) { + // Negative number that fits in 32bits. + return true; + } + return false; + } +}; +template +struct F32 : ValueOp, KEY_TYPE_V_F32, Xmm, float, TAG> { + const float constant() const { + XEASSERT(is_constant); + return value->constant.f32; + } +}; +template +struct F64 : ValueOp, KEY_TYPE_V_F64, Xmm, double, TAG> { + const double constant() const { + XEASSERT(is_constant); + return value->constant.f64; + } +}; +template +struct V128 : ValueOp, KEY_TYPE_V_V128, Xmm, vec128_t, TAG> { + const vec128_t& constant() const { + XEASSERT(is_constant); + return value->constant.v128; + } +}; + +struct TagTable { + struct { + bool valid; + Instr::Op op; + } table[16]; + + template ::type* = nullptr> + bool CheckTag(const Instr::Op& op) { + return true; + } + template ::type* = nullptr> + bool CheckTag(const Instr::Op& op) { + return true; + } + template ::type* = nullptr> + bool CheckTag(const Instr::Op& op) { + return true; + } + template ::type* = nullptr> + bool CheckTag(const Instr::Op& op) { + return true; + } + template = KEY_TYPE_V_I8>::type* = nullptr> + bool CheckTag(const Instr::Op& op) { + const Value* value = op.value; + if (T::tag == -1) { + return true; + } + if (table[T::tag].valid && + table[T::tag].op.value != value) { + return false; + } + table[T::tag].valid = true; + table[T::tag].op.value = (Value*)value; + return true; + } +}; + +template +struct DestField; +template +struct DestField { + DEST dest; +protected: + bool LoadDest(const Instr* i, TagTable& tag_table) { + Instr::Op op; + op.value = i->dest; + if (tag_table.CheckTag(op)) { + dest.Load(op); + return true; + } + return false; + } +}; +template <> +struct DestField { +protected: + bool LoadDest(const Instr* i, TagTable& tag_table) { + return true; + } +}; + +template +struct I; +template +struct I : DestField { + static const hir::Opcode opcode = OPCODE; + static const uint32_t key = InstrKey::Construct::value; + static const KeyType dest_type = DEST::key_type; + const Instr* instr; +protected: + template friend struct SequenceFields; + bool Load(const Instr* i, TagTable& tag_table) { + if (InstrKey(i).value == key && + LoadDest(i, tag_table)) { + instr = i; + return true; + } + return false; + } +}; +template +struct I : DestField { + static const hir::Opcode opcode = OPCODE; + static const uint32_t key = InstrKey::Construct::value; + static const KeyType dest_type = DEST::key_type; + static const KeyType src1_type = SRC1::key_type; + const Instr* instr; + SRC1 src1; +protected: + template friend struct SequenceFields; + bool Load(const Instr* i, TagTable& tag_table) { + if (InstrKey(i).value == key && + LoadDest(i, tag_table) && + tag_table.CheckTag(i->src1)) { + instr = i; + src1.Load(i->src1); + return true; + } + return false; + } +}; +template +struct I : DestField { + static const hir::Opcode opcode = OPCODE; + static const uint32_t key = InstrKey::Construct::value; + static const KeyType dest_type = DEST::key_type; + static const KeyType src1_type = SRC1::key_type; + static const KeyType src2_type = SRC2::key_type; + const Instr* instr; + SRC1 src1; + SRC2 src2; +protected: + template friend struct SequenceFields; + bool Load(const Instr* i, TagTable& tag_table) { + if (InstrKey(i).value == key && + LoadDest(i, tag_table) && + tag_table.CheckTag(i->src1) && + tag_table.CheckTag(i->src2)) { + instr = i; + src1.Load(i->src1); + src2.Load(i->src2); + return true; + } + return false; + } +}; +template +struct I : DestField { + static const hir::Opcode opcode = OPCODE; + static const uint32_t key = InstrKey::Construct::value; + static const KeyType dest_type = DEST::key_type; + static const KeyType src1_type = SRC1::key_type; + static const KeyType src2_type = SRC2::key_type; + static const KeyType src3_type = SRC3::key_type; + const Instr* instr; + SRC1 src1; + SRC2 src2; + SRC3 src3; +protected: + template friend struct SequenceFields; + bool Load(const Instr* i, TagTable& tag_table) { + if (InstrKey(i).value == key && + LoadDest(i, tag_table) && + tag_table.CheckTag(i->src1) && + tag_table.CheckTag(i->src2) && + tag_table.CheckTag(i->src3)) { + instr = i; + src1.Load(i->src1); + src2.Load(i->src2); + src3.Load(i->src3); + return true; + } + return false; + } +}; + +template +struct SequenceFields; +template +struct SequenceFields { + I1 i1; + typedef typename I1 I1Type; +protected: + template friend struct Sequence; + bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) { + if (i1.Load(i, tag_table)) { + *new_tail = i->next; + return true; + } + return false; + } +}; +template +struct SequenceFields : SequenceFields { + I2 i2; +protected: + template friend struct Sequence; + bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) { + if (SequenceFields::Check(i, tag_table, new_tail)) { + auto ni = i->next; + if (ni && i2.Load(ni, tag_table)) { + *new_tail = ni; + return i; + } + } + return false; + } +}; +template +struct SequenceFields : SequenceFields { + I3 i3; +protected: + template friend struct Sequence; + bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) { + if (SequenceFields::Check(i, tag_table, new_tail)) { + auto ni = i->next; + if (ni && i3.Load(ni, tag_table)) { + *new_tail = ni; + return i; + } + } + return false; + } +}; +template +struct SequenceFields : SequenceFields { + I4 i4; +protected: + template friend struct Sequence; + bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) { + if (SequenceFields::Check(i, tag_table, new_tail)) { + auto ni = i->next; + if (ni && i4.Load(ni, tag_table)) { + *new_tail = ni; + return i; + } + } + return false; + } +}; +template +struct SequenceFields : SequenceFields { + I5 i5; +protected: + template friend struct Sequence; + bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) { + if (SequenceFields::Check(i, tag_table, new_tail)) { + auto ni = i->next; + if (ni && i5.Load(ni, tag_table)) { + *new_tail = ni; + return i; + } + } + return false; + } +}; + +template +struct Sequence { + struct EmitArgs : SequenceFields {}; + + static bool Select(X64Emitter& e, const Instr* i, const Instr** new_tail) { + EmitArgs args; + TagTable tag_table; + if (!args.Check(i, tag_table, new_tail)) { + return false; + } + SEQ::Emit(e, args); + return true; + } +}; + +template +const T GetTempReg(X64Emitter& e); +template <> +const Reg8 GetTempReg(X64Emitter& e) { + return e.al; +} +template <> +const Reg16 GetTempReg(X64Emitter& e) { + return e.ax; +} +template <> +const Reg32 GetTempReg(X64Emitter& e) { + return e.eax; +} +template <> +const Reg64 GetTempReg(X64Emitter& e) { + return e.rax; +} + +template +struct SingleSequence : public Sequence, T> { + typedef T EmitArgType; + static const uint32_t head_key = T::key; + static void Emit(X64Emitter& e, const EmitArgs& _) { + SEQ::Emit(e, _.i1); + } + + template + static void EmitUnaryOp( + X64Emitter& e, const EmitArgType& i, + const REG_FN& reg_fn) { + if (i.src1.is_constant) { + e.mov(i.dest, i.src1.constant()); + reg_fn(e, i.dest); + } else { + if (i.dest != i.src1) { + e.mov(i.dest, i.src1); + } + reg_fn(e, i.dest); + } + } + + template + static void EmitCommutativeBinaryOp( + X64Emitter& e, const EmitArgType& i, + const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) { + if (i.src1.is_constant) { + XEASSERT(!i.src2.is_constant); + if (i.dest == i.src2) { + if (i.src1.ConstantFitsIn32Reg()) { + reg_const_fn(e, i.dest, static_cast(i.src1.constant())); + } else { + auto temp = GetTempReg(e); + e.mov(temp, i.src1.constant()); + reg_reg_fn(e, i.dest, temp); + } + } else { + e.mov(i.dest, i.src1.constant()); + reg_reg_fn(e, i.dest, i.src2); + } + } else if (i.src2.is_constant) { + if (i.dest == i.src1) { + if (i.src2.ConstantFitsIn32Reg()) { + reg_const_fn(e, i.dest, static_cast(i.src2.constant())); + } else { + auto temp = GetTempReg(e); + e.mov(temp, i.src2.constant()); + reg_reg_fn(e, i.dest, temp); + } + } else { + e.mov(i.dest, i.src2.constant()); + reg_reg_fn(e, i.dest, i.src1); + } + } else { + if (i.dest == i.src1) { + reg_reg_fn(e, i.dest, i.src2); + } else if (i.dest == i.src2) { + reg_reg_fn(e, i.dest, i.src1); + } else { + e.mov(i.dest, i.src1); + reg_reg_fn(e, i.dest, i.src2); + } + } + } + template + static void EmitAssociativeBinaryOp( + X64Emitter& e, const EmitArgType& i, + const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) { + if (i.src1.is_constant) { + XEASSERT(!i.src2.is_constant); + if (i.dest == i.src2) { + auto temp = GetTempReg(e); + e.mov(temp, i.src2); + e.mov(i.dest, i.src1.constant()); + reg_reg_fn(e, i.dest, temp); + } else { + e.mov(i.dest, i.src1.constant()); + reg_reg_fn(e, i.dest, i.src2); + } + } else if (i.src2.is_constant) { + if (i.dest == i.src1) { + if (i.src2.ConstantFitsIn32Reg()) { + reg_const_fn(e, i.dest, static_cast(i.src2.constant())); + } else { + auto temp = GetTempReg(e); + e.mov(temp, i.src2.constant()); + reg_reg_fn(e, i.dest, temp); + } + } else { + e.mov(i.dest, i.src1); + if (i.src2.ConstantFitsIn32Reg()) { + reg_const_fn(e, i.dest, static_cast(i.src2.constant())); + } else { + auto temp = GetTempReg(e); + e.mov(temp, i.src2.constant()); + reg_reg_fn(e, i.dest, temp); + } + } + } else { + if (i.dest == i.src1) { + reg_reg_fn(e, i.dest, i.src2); + } else if (i.dest == i.src2) { + auto temp = GetTempReg(e); + e.mov(temp, i.src2); + e.mov(i.dest, i.src1); + reg_reg_fn(e, i.dest, temp); + } else { + e.mov(i.dest, i.src1); + reg_reg_fn(e, i.dest, i.src2); + } + } + } + + template + static void EmitCommutativeCompareOp( + X64Emitter& e, const EmitArgType& i, + const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) { + if (i.src1.is_constant) { + XEASSERT(!i.src2.is_constant); + if (i.src1.ConstantFitsIn32Reg()) { + reg_const_fn(e, i.src2, static_cast(i.src1.constant())); + } else { + auto temp = GetTempReg(e); + e.mov(temp, i.src1.constant()); + reg_reg_fn(e, i.src2, temp); + } + } else if (i.src2.is_constant) { + if (i.src2.ConstantFitsIn32Reg()) { + reg_const_fn(e, i.src1, static_cast(i.src2.constant())); + } else { + auto temp = GetTempReg(e); + e.mov(temp, i.src2.constant()); + reg_reg_fn(e, i.src1, temp); + } + } else { + reg_reg_fn(e, i.src1, i.src2); + } + } + template + static void EmitAssociativeCompareOp( + X64Emitter& e, const EmitArgType& i, + const REG_REG_FN& reg_reg_fn, const REG_CONST_FN& reg_const_fn) { + if (i.src1.is_constant) { + XEASSERT(!i.src2.is_constant); + if (i.src1.ConstantFitsIn32Reg()) { + reg_const_fn(e, i.dest, i.src2, static_cast(i.src1.constant()), true); + } else { + auto temp = GetTempReg(e); + e.mov(temp, i.src1.constant()); + reg_reg_fn(e, i.dest, i.src2, temp, true); + } + } else if (i.src2.is_constant) { + if (i.src2.ConstantFitsIn32Reg()) { + reg_const_fn(e, i.dest, i.src1, static_cast(i.src2.constant()), false); + } else { + auto temp = GetTempReg(e); + e.mov(temp, i.src2.constant()); + reg_reg_fn(e, i.dest, i.src1, temp, false); + } + } else { + reg_reg_fn(e, i.dest, i.src1, i.src2, false); + } + } +}; + +static const int ANY = -1; +typedef int tag_t; +static const tag_t TAG0 = 0; +static const tag_t TAG1 = 1; +static const tag_t TAG2 = 2; +static const tag_t TAG3 = 3; +static const tag_t TAG4 = 4; +static const tag_t TAG5 = 5; +static const tag_t TAG6 = 6; +static const tag_t TAG7 = 7; + +typedef bool (*SequenceSelectFn)(X64Emitter&, const Instr*, const Instr**); + +template +void Register() { + sequence_table.insert({ T::head_key, T::Select }); +} +template +void Register() { + Register(); + Register(); +}; +#define EMITTER_OPCODE_TABLE(name, ...) \ + void Register_##name() { \ + Register<__VA_ARGS__>(); \ + } + +#define MATCH(...) __VA_ARGS__ +#define EMITTER(name, match) struct name : SingleSequence +#define SEQUENCE(name, match) struct name : Sequence + +} // namespace diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc new file mode 100644 index 000000000..a48df3db5 --- /dev/null +++ b/src/alloy/backend/x64/x64_sequences.cc @@ -0,0 +1,4488 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +// A note about vectors: +// Alloy represents vectors as xyzw pairs, with indices 0123. +// XMM registers are xyzw pairs with indices 3210, making them more like wzyx. +// This makes things somewhat confusing. It'd be nice to just shuffle the +// registers around on load/store, however certain operations require that +// data be in the right offset. +// Basically, this identity must hold: +// shuffle(vec, b00011011) -> {x,y,z,w} => {x,y,z,w} +// All indices and operations must respect that. +// +// Memory (big endian): +// [00 01 02 03] [04 05 06 07] [08 09 0A 0B] [0C 0D 0E 0F] (x, y, z, w) +// load into xmm register: +// [0F 0E 0D 0C] [0B 0A 09 08] [07 06 05 04] [03 02 01 00] (w, z, y, x) + +#include + +#include +#include +#include +#include + +// TODO(benvanik): reimplement packing functions +#include + +using namespace alloy; +using namespace alloy::backend; +using namespace alloy::backend::x64; +using namespace alloy::hir; +using namespace alloy::runtime; + +using namespace Xbyak; + +// Utilities/types used only in this file: +#include + +namespace { +static std::unordered_multimap sequence_table; +} // namespace + + +// ============================================================================ +// OPCODE_COMMENT +// ============================================================================ +EMITTER(COMMENT, MATCH(I)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (IsTracingInstr()) { + auto str = reinterpret_cast(i.src1.value); + // TODO(benvanik): pass through. + // TODO(benvanik): don't just leak this memory. + auto str_copy = xestrdupa(str); + e.mov(e.rdx, reinterpret_cast(str_copy)); + e.CallNative(TraceString); + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_COMMENT, + COMMENT); + + +// ============================================================================ +// OPCODE_NOP +// ============================================================================ +EMITTER(NOP, MATCH(I)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.nop(); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_NOP, + NOP); + + +// ============================================================================ +// OPCODE_SOURCE_OFFSET +// ============================================================================ +EMITTER(SOURCE_OFFSET, MATCH(I)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { +#if XE_DEBUG + e.nop(); + e.nop(); + e.mov(e.eax, (uint32_t)i.src1.value); + e.nop(); + e.nop(); +#endif // XE_DEBUG + e.MarkSourceOffset(i.instr); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_SOURCE_OFFSET, + SOURCE_OFFSET); + + +// ============================================================================ +// OPCODE_DEBUG_BREAK +// ============================================================================ +EMITTER(DEBUG_BREAK, MATCH(I)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.DebugBreak(); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_DEBUG_BREAK, + DEBUG_BREAK); + + +// ============================================================================ +// OPCODE_DEBUG_BREAK_TRUE +// ============================================================================ +EMITTER(DEBUG_BREAK_TRUE_I8, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.DebugBreak(); + e.L(skip); + } +}; +EMITTER(DEBUG_BREAK_TRUE_I16, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.DebugBreak(); + e.L(skip); + } +}; +EMITTER(DEBUG_BREAK_TRUE_I32, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.DebugBreak(); + e.L(skip); + } +}; +EMITTER(DEBUG_BREAK_TRUE_I64, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.DebugBreak(); + e.L(skip); + } +}; +EMITTER(DEBUG_BREAK_TRUE_F32, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.DebugBreak(); + e.L(skip); + } +}; +EMITTER(DEBUG_BREAK_TRUE_F64, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.DebugBreak(); + e.L(skip); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_DEBUG_BREAK_TRUE, + DEBUG_BREAK_TRUE_I8, + DEBUG_BREAK_TRUE_I16, + DEBUG_BREAK_TRUE_I32, + DEBUG_BREAK_TRUE_I64, + DEBUG_BREAK_TRUE_F32, + DEBUG_BREAK_TRUE_F64); + + +// ============================================================================ +// OPCODE_TRAP +// ============================================================================ +EMITTER(TRAP, MATCH(I)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.Trap(); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_TRAP, + TRAP); + + +// ============================================================================ +// OPCODE_TRAP_TRUE +// ============================================================================ +EMITTER(TRAP_TRUE_I8, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.Trap(); + e.L(skip); + } +}; +EMITTER(TRAP_TRUE_I16, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.Trap(); + e.L(skip); + } +}; +EMITTER(TRAP_TRUE_I32, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.Trap(); + e.L(skip); + } +}; +EMITTER(TRAP_TRUE_I64, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.Trap(); + e.L(skip); + } +}; +EMITTER(TRAP_TRUE_F32, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.Trap(); + e.L(skip); + } +}; +EMITTER(TRAP_TRUE_F64, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.Trap(); + e.L(skip); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_TRAP_TRUE, + TRAP_TRUE_I8, + TRAP_TRUE_I16, + TRAP_TRUE_I32, + TRAP_TRUE_I64, + TRAP_TRUE_F32, + TRAP_TRUE_F64); + + +// ============================================================================ +// OPCODE_CALL +// ============================================================================ +EMITTER(CALL, MATCH(I)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.Call(i.instr, i.src1.value); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_CALL, + CALL); + + +// ============================================================================ +// OPCODE_CALL_TRUE +// ============================================================================ +EMITTER(CALL_TRUE_I8, MATCH(I, SymbolOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.Call(i.instr, i.src2.value); + e.L(skip); + } +}; +EMITTER(CALL_TRUE_I16, MATCH(I, SymbolOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.Call(i.instr, i.src2.value); + e.L(skip); + } +}; +EMITTER(CALL_TRUE_I32, MATCH(I, SymbolOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.Call(i.instr, i.src2.value); + e.L(skip); + } +}; +EMITTER(CALL_TRUE_I64, MATCH(I, SymbolOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.Call(i.instr, i.src2.value); + e.L(skip); + } +}; +EMITTER(CALL_TRUE_F32, MATCH(I, SymbolOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.Call(i.instr, i.src2.value); + e.L(skip); + } +}; +EMITTER(CALL_TRUE_F64, MATCH(I, SymbolOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.Call(i.instr, i.src2.value); + e.L(skip); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_CALL_TRUE, + CALL_TRUE_I8, + CALL_TRUE_I16, + CALL_TRUE_I32, + CALL_TRUE_I64, + CALL_TRUE_F32, + CALL_TRUE_F64); + + +// ============================================================================ +// OPCODE_CALL_INDIRECT +// ============================================================================ +EMITTER(CALL_INDIRECT, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.CallIndirect(i.instr, i.src1); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_CALL_INDIRECT, + CALL_INDIRECT); + + +// ============================================================================ +// OPCODE_CALL_INDIRECT_TRUE +// ============================================================================ +EMITTER(CALL_INDIRECT_TRUE_I8, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.CallIndirect(i.instr, i.src2); + e.L(skip); + } +}; +EMITTER(CALL_INDIRECT_TRUE_I16, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.CallIndirect(i.instr, i.src2); + e.L(skip); + } +}; +EMITTER(CALL_INDIRECT_TRUE_I32, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.CallIndirect(i.instr, i.src2); + e.L(skip); + } +}; +EMITTER(CALL_INDIRECT_TRUE_I64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.CallIndirect(i.instr, i.src2); + e.L(skip); + } +}; +EMITTER(CALL_INDIRECT_TRUE_F32, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.CallIndirect(i.instr, i.src2); + e.L(skip); + } +}; +EMITTER(CALL_INDIRECT_TRUE_F64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + Xbyak::Label skip; + e.jz(skip); + e.CallIndirect(i.instr, i.src2); + e.L(skip); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_CALL_INDIRECT_TRUE, + CALL_INDIRECT_TRUE_I8, + CALL_INDIRECT_TRUE_I16, + CALL_INDIRECT_TRUE_I32, + CALL_INDIRECT_TRUE_I64, + CALL_INDIRECT_TRUE_F32, + CALL_INDIRECT_TRUE_F64); + + +// ============================================================================ +// OPCODE_CALL_EXTERN +// ============================================================================ +EMITTER(CALL_EXTERN, MATCH(I)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.CallExtern(i.instr, i.src1.value); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_CALL_EXTERN, + CALL_EXTERN); + + +// ============================================================================ +// OPCODE_RETURN +// ============================================================================ +EMITTER(RETURN, MATCH(I)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // If this is the last instruction in the last block, just let us + // fall through. + if (i.instr->next || i.instr->block->next) { + e.jmp("epilog", CodeGenerator::T_NEAR); + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_RETURN, + RETURN); + + +// ============================================================================ +// OPCODE_RETURN_TRUE +// ============================================================================ +EMITTER(RETURN_TRUE_I8, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.jnz("epilog", CodeGenerator::T_NEAR); + } +}; +EMITTER(RETURN_TRUE_I16, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.jnz("epilog", CodeGenerator::T_NEAR); + } +}; +EMITTER(RETURN_TRUE_I32, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.jnz("epilog", CodeGenerator::T_NEAR); + } +}; +EMITTER(RETURN_TRUE_I64, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.jnz("epilog", CodeGenerator::T_NEAR); + } +}; +EMITTER(RETURN_TRUE_F32, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + e.jnz("epilog", CodeGenerator::T_NEAR); + } +}; +EMITTER(RETURN_TRUE_F64, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + e.jnz("epilog", CodeGenerator::T_NEAR); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_RETURN_TRUE, + RETURN_TRUE_I8, + RETURN_TRUE_I16, + RETURN_TRUE_I32, + RETURN_TRUE_I64, + RETURN_TRUE_F32, + RETURN_TRUE_F64); + + +// ============================================================================ +// OPCODE_SET_RETURN_ADDRESS +// ============================================================================ +EMITTER(SET_RETURN_ADDRESS, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.SetReturnAddress(i.src1.constant()); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_SET_RETURN_ADDRESS, + SET_RETURN_ADDRESS); + + +// ============================================================================ +// OPCODE_BRANCH +// ============================================================================ +EMITTER(BRANCH, MATCH(I)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.jmp(i.src1.value->name, e.T_NEAR); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_BRANCH, + BRANCH); + + +// ============================================================================ +// OPCODE_BRANCH_TRUE +// ============================================================================ +EMITTER(BRANCH_TRUE_I8, MATCH(I, LabelOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.jnz(i.src2.value->name, e.T_NEAR); + } +}; +EMITTER(BRANCH_TRUE_I16, MATCH(I, LabelOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.jnz(i.src2.value->name, e.T_NEAR); + } +}; +EMITTER(BRANCH_TRUE_I32, MATCH(I, LabelOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.jnz(i.src2.value->name, e.T_NEAR); + } +}; +EMITTER(BRANCH_TRUE_I64, MATCH(I, LabelOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.jnz(i.src2.value->name, e.T_NEAR); + } +}; +EMITTER(BRANCH_TRUE_F32, MATCH(I, LabelOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + e.jnz(i.src2.value->name, e.T_NEAR); + } +}; +EMITTER(BRANCH_TRUE_F64, MATCH(I, LabelOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + e.jnz(i.src2.value->name, e.T_NEAR); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_BRANCH_TRUE, + BRANCH_TRUE_I8, + BRANCH_TRUE_I16, + BRANCH_TRUE_I32, + BRANCH_TRUE_I64, + BRANCH_TRUE_F32, + BRANCH_TRUE_F64); + + +// ============================================================================ +// OPCODE_BRANCH_FALSE +// ============================================================================ +EMITTER(BRANCH_FALSE_I8, MATCH(I, LabelOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.jz(i.src2.value->name, e.T_NEAR); + } +}; +EMITTER(BRANCH_FALSE_I16, MATCH(I, LabelOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.jz(i.src2.value->name, e.T_NEAR); + } +}; +EMITTER(BRANCH_FALSE_I32, MATCH(I, LabelOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.jz(i.src2.value->name, e.T_NEAR); + } +}; +EMITTER(BRANCH_FALSE_I64, MATCH(I, LabelOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.jz(i.src2.value->name, e.T_NEAR); + } +}; +EMITTER(BRANCH_FALSE_F32, MATCH(I, LabelOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + e.jz(i.src2.value->name, e.T_NEAR); + } +}; +EMITTER(BRANCH_FALSE_F64, MATCH(I, LabelOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + e.jz(i.src2.value->name, e.T_NEAR); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_BRANCH_FALSE, + BRANCH_FALSE_I8, + BRANCH_FALSE_I16, + BRANCH_FALSE_I32, + BRANCH_FALSE_I64, + BRANCH_FALSE_F32, + BRANCH_FALSE_F64); + + +// ============================================================================ +// OPCODE_ASSIGN +// ============================================================================ +EMITTER(ASSIGN_I8, MATCH(I, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.mov(i.dest, i.src1); + } +}; +EMITTER(ASSIGN_I16, MATCH(I, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.mov(i.dest, i.src1); + } +}; +EMITTER(ASSIGN_I32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.mov(i.dest, i.src1); + } +}; +EMITTER(ASSIGN_I64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.mov(i.dest, i.src1); + } +}; +EMITTER(ASSIGN_F32, MATCH(I, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vmovaps(i.dest, i.src1); + } +}; +EMITTER(ASSIGN_F64, MATCH(I, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vmovaps(i.dest, i.src1); + } +}; +EMITTER(ASSIGN_V128, MATCH(I, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vmovaps(i.dest, i.src1); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_ASSIGN, + ASSIGN_I8, + ASSIGN_I16, + ASSIGN_I32, + ASSIGN_I64, + ASSIGN_F32, + ASSIGN_F64, + ASSIGN_V128); + + +// ============================================================================ +// OPCODE_CAST +// ============================================================================ +EMITTER(CAST_I32_F32, MATCH(I, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vmovd(i.dest, i.src1); + } +}; +EMITTER(CAST_I64_F64, MATCH(I, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vmovq(i.dest, i.src1); + } +}; +EMITTER(CAST_F32_I32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vmovd(i.dest, i.src1); + } +}; +EMITTER(CAST_F64_I64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vmovq(i.dest, i.src1); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_CAST, + CAST_I32_F32, + CAST_I64_F64, + CAST_F32_I32, + CAST_F64_I64); + + +// ============================================================================ +// OPCODE_ZERO_EXTEND +// ============================================================================ +EMITTER(ZERO_EXTEND_I16_I8, MATCH(I, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.movzx(i.dest, i.src1); + } +}; +EMITTER(ZERO_EXTEND_I32_I8, MATCH(I, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.movzx(i.dest, i.src1); + } +}; +EMITTER(ZERO_EXTEND_I64_I8, MATCH(I, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.movzx(i.dest, i.src1); + } +}; +EMITTER(ZERO_EXTEND_I32_I16, MATCH(I, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.movzx(i.dest, i.src1); + } +}; +EMITTER(ZERO_EXTEND_I64_I16, MATCH(I, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.movzx(i.dest, i.src1); + } +}; +EMITTER(ZERO_EXTEND_I64_I32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.mov(i.dest.reg().cvt32(), i.src1); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_ZERO_EXTEND, + ZERO_EXTEND_I16_I8, + ZERO_EXTEND_I32_I8, + ZERO_EXTEND_I64_I8, + ZERO_EXTEND_I32_I16, + ZERO_EXTEND_I64_I16, + ZERO_EXTEND_I64_I32); + + +// ============================================================================ +// OPCODE_SIGN_EXTEND +// ============================================================================ +EMITTER(SIGN_EXTEND_I16_I8, MATCH(I, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.movsx(i.dest, i.src1); + } +}; +EMITTER(SIGN_EXTEND_I32_I8, MATCH(I, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.movsx(i.dest, i.src1); + } +}; +EMITTER(SIGN_EXTEND_I64_I8, MATCH(I, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.movsx(i.dest, i.src1); + } +}; +EMITTER(SIGN_EXTEND_I32_I16, MATCH(I, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.movsx(i.dest, i.src1); + } +}; +EMITTER(SIGN_EXTEND_I64_I16, MATCH(I, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.movsx(i.dest, i.src1); + } +}; +EMITTER(SIGN_EXTEND_I64_I32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.movsxd(i.dest, i.src1); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_SIGN_EXTEND, + SIGN_EXTEND_I16_I8, + SIGN_EXTEND_I32_I8, + SIGN_EXTEND_I64_I8, + SIGN_EXTEND_I32_I16, + SIGN_EXTEND_I64_I16, + SIGN_EXTEND_I64_I32); + + +// ============================================================================ +// OPCODE_TRUNCATE +// ============================================================================ +EMITTER(TRUNCATE_I8_I16, MATCH(I, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.movzx(i.dest.reg().cvt32(), i.src1.reg().cvt8()); + } +}; +EMITTER(TRUNCATE_I8_I32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.movzx(i.dest.reg().cvt32(), i.src1.reg().cvt8()); + } +}; +EMITTER(TRUNCATE_I8_I64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.movzx(i.dest.reg().cvt32(), i.src1.reg().cvt8()); + } +}; +EMITTER(TRUNCATE_I16_I32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.movzx(i.dest.reg().cvt32(), i.src1.reg().cvt16()); + } +}; +EMITTER(TRUNCATE_I16_I64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.movzx(i.dest.reg().cvt32(), i.src1.reg().cvt16()); + } +}; +EMITTER(TRUNCATE_I32_I64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.mov(i.dest, i.src1.reg().cvt32()); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_TRUNCATE, + TRUNCATE_I8_I16, + TRUNCATE_I8_I32, + TRUNCATE_I8_I64, + TRUNCATE_I16_I32, + TRUNCATE_I16_I64, + TRUNCATE_I32_I64); + + +// ============================================================================ +// OPCODE_CONVERT +// ============================================================================ +EMITTER(CONVERT_I32_F32, MATCH(I, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // TODO(benvanik): saturation check? cvtt* (trunc?) + e.vcvtss2si(i.dest, i.src1); + } +}; +EMITTER(CONVERT_I32_F64, MATCH(I, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // TODO(benvanik): saturation check? cvtt* (trunc?) + e.vcvttsd2si(i.dest, i.src1); + } +}; +EMITTER(CONVERT_I64_F64, MATCH(I, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // TODO(benvanik): saturation check? cvtt* (trunc?) + e.vcvttsd2si(i.dest, i.src1); + } +}; +EMITTER(CONVERT_F32_I32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // TODO(benvanik): saturation check? cvtt* (trunc?) + e.vcvtsi2ss(i.dest, i.src1); + } +}; +EMITTER(CONVERT_F32_F64, MATCH(I, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // TODO(benvanik): saturation check? cvtt* (trunc?) + e.vcvtsd2ss(i.dest, i.src1); + } +}; +EMITTER(CONVERT_F64_I64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // TODO(benvanik): saturation check? cvtt* (trunc?) + e.vcvtsi2sd(i.dest, i.src1); + } +}; +EMITTER(CONVERT_F64_F32, MATCH(I, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vcvtss2sd(i.dest, i.src1); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_CONVERT, + CONVERT_I32_F32, + CONVERT_I32_F64, + CONVERT_I64_F64, + CONVERT_F32_I32, + CONVERT_F32_F64, + CONVERT_F64_I64, + CONVERT_F64_F32); + + +// ============================================================================ +// OPCODE_ROUND +// ============================================================================ +EMITTER(ROUND_F32, MATCH(I, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + switch (i.instr->flags) { + case ROUND_TO_ZERO: + e.vroundss(i.dest, i.src1, B00000011); + break; + case ROUND_TO_NEAREST: + e.vroundss(i.dest, i.src1, B00000000); + break; + case ROUND_TO_MINUS_INFINITY: + e.vroundss(i.dest, i.src1, B00000001); + break; + case ROUND_TO_POSITIVE_INFINITY: + e.vroundss(i.dest, i.src1, B00000010); + break; + } + } +}; +EMITTER(ROUND_F64, MATCH(I, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + switch (i.instr->flags) { + case ROUND_TO_ZERO: + e.vroundsd(i.dest, i.src1, B00000011); + break; + case ROUND_TO_NEAREST: + e.vroundsd(i.dest, i.src1, B00000000); + break; + case ROUND_TO_MINUS_INFINITY: + e.vroundsd(i.dest, i.src1, B00000001); + break; + case ROUND_TO_POSITIVE_INFINITY: + e.vroundsd(i.dest, i.src1, B00000010); + break; + } + } +}; +EMITTER(ROUND_V128, MATCH(I, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + switch (i.instr->flags) { + case ROUND_TO_ZERO: + e.vroundps(i.dest, i.src1, B00000011); + break; + case ROUND_TO_NEAREST: + e.vroundps(i.dest, i.src1, B00000000); + break; + case ROUND_TO_MINUS_INFINITY: + e.vroundps(i.dest, i.src1, B00000001); + break; + case ROUND_TO_POSITIVE_INFINITY: + e.vroundps(i.dest, i.src1, B00000010); + break; + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_ROUND, + ROUND_F32, + ROUND_F64, + ROUND_V128); + + +// ============================================================================ +// OPCODE_VECTOR_CONVERT_I2F +// ============================================================================ +EMITTER(VECTOR_CONVERT_I2F, MATCH(I, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // flags = ARITHMETIC_UNSIGNED + // TODO(benvanik): are these really the same? VC++ thinks so. + e.vcvtdq2ps(i.dest, i.src1); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_VECTOR_CONVERT_I2F, + VECTOR_CONVERT_I2F); + + +// ============================================================================ +// OPCODE_VECTOR_CONVERT_F2I +// ============================================================================ +EMITTER(VECTOR_CONVERT_F2I, MATCH(I, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // flags = ARITHMETIC_UNSIGNED | ARITHMETIC_UNSIGNED + // TODO(benvanik): are these really the same? VC++ thinks so. + e.vcvttps2dq(i.dest, i.src1); + if (i.instr->flags & ARITHMETIC_SATURATE) { + // TODO(benvanik): check saturation. + e.UnimplementedInstr(i.instr); + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_VECTOR_CONVERT_F2I, + VECTOR_CONVERT_F2I); + + +// ============================================================================ +// OPCODE_LOAD_VECTOR_SHL +// ============================================================================ +static vec128_t lvsl_table[17] = { + vec128b( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), + vec128b( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), + vec128b( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17), + vec128b( 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18), + vec128b( 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19), + vec128b( 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20), + vec128b( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21), + vec128b( 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22), + vec128b( 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23), + vec128b( 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24), + vec128b(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25), + vec128b(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), + vec128b(12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27), + vec128b(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28), + vec128b(14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29), + vec128b(15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30), + vec128b(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31), +}; +EMITTER(LOAD_VECTOR_SHL_I8, MATCH(I, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.src1.is_constant) { + auto sh = i.src1.constant(); + XEASSERT(sh < XECOUNT(lvsl_table)); + e.mov(e.rax, (uintptr_t)&lvsl_table[sh]); + e.vmovaps(i.dest, e.ptr[e.rax]); + } else { +#if XE_DEBUG + // We should only ever be getting values in [0,16]. Assert that. + Xbyak::Label skip; + e.cmp(i.src1, 17); + e.jb(skip); + e.Trap(); + e.L(skip); +#endif // XE_DEBUG + // TODO(benvanik): find a cheaper way of doing this. + e.movzx(e.rdx, i.src1); + e.shl(e.rdx, 4); + e.mov(e.rax, (uintptr_t)lvsl_table); + e.vmovaps(i.dest, e.ptr[e.rax + e.rdx]); + e.ReloadEDX(); + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_LOAD_VECTOR_SHL, + LOAD_VECTOR_SHL_I8); + + +// ============================================================================ +// OPCODE_LOAD_VECTOR_SHR +// ============================================================================ +static vec128_t lvsr_table[17] = { + vec128b(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31), + vec128b(15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30), + vec128b(14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29), + vec128b(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28), + vec128b(12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27), + vec128b(11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26), + vec128b(10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25), + vec128b( 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24), + vec128b( 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23), + vec128b( 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22), + vec128b( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21), + vec128b( 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20), + vec128b( 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19), + vec128b( 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18), + vec128b( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17), + vec128b( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), + vec128b( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), +}; +EMITTER(LOAD_VECTOR_SHR_I8, MATCH(I, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.src1.is_constant) { + auto sh = i.src1.constant(); + XEASSERT(sh < XECOUNT(lvsr_table)); + e.mov(e.rax, (uintptr_t)&lvsr_table[sh]); + e.vmovaps(i.dest, e.ptr[e.rax]); + } else { +#if XE_DEBUG + // We should only ever be getting values in [0,16]. Assert that. + Xbyak::Label skip; + e.cmp(i.src1, 17); + e.jb(skip); + e.Trap(); + e.L(skip); +#endif // XE_DEBUG + // TODO(benvanik): find a cheaper way of doing this. + e.movzx(e.rdx, i.src1); + e.shl(e.rdx, 4); + e.mov(e.rax, (uintptr_t)lvsr_table); + e.vmovaps(i.dest, e.ptr[e.rax + e.rdx]); + e.ReloadEDX(); + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_LOAD_VECTOR_SHR, + LOAD_VECTOR_SHR_I8); + + +// ============================================================================ +// OPCODE_LOAD_CLOCK +// ============================================================================ +EMITTER(LOAD_CLOCK, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // It'd be cool to call QueryPerformanceCounter directly, but w/e. + e.CallNative(LoadClock); + e.mov(i.dest, e.rax); + } + static uint64_t LoadClock(void* raw_context) { + LARGE_INTEGER counter; + uint64_t time = 0; + if (QueryPerformanceCounter(&counter)) { + time = counter.QuadPart; + } + return time; + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_LOAD_CLOCK, + LOAD_CLOCK); + + +// ============================================================================ +// OPCODE_LOAD_LOCAL +// ============================================================================ +// Note: all types are always aligned on the stack. +EMITTER(LOAD_LOCAL_I8, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.mov(i.dest, e.byte[e.rsp + i.src1.constant()]); + //e.TraceLoadI8(DATA_LOCAL, i.src1.constant, i.dest); + } +}; +EMITTER(LOAD_LOCAL_I16, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.mov(i.dest, e.word[e.rsp + i.src1.constant()]); + //e.TraceLoadI16(DATA_LOCAL, i.src1.constant, i.dest); + } +}; +EMITTER(LOAD_LOCAL_I32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.mov(i.dest, e.dword[e.rsp + i.src1.constant()]); + //e.TraceLoadI32(DATA_LOCAL, i.src1.constant, i.dest); + } +}; +EMITTER(LOAD_LOCAL_I64, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.mov(i.dest, e.qword[e.rsp + i.src1.constant()]); + //e.TraceLoadI64(DATA_LOCAL, i.src1.constant, i.dest); + } +}; +EMITTER(LOAD_LOCAL_F32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vmovss(i.dest, e.dword[e.rsp + i.src1.constant()]); + //e.TraceLoadF32(DATA_LOCAL, i.src1.constant, i.dest); + } +}; +EMITTER(LOAD_LOCAL_F64, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vmovsd(i.dest, e.qword[e.rsp + i.src1.constant()]); + //e.TraceLoadF64(DATA_LOCAL, i.src1.constant, i.dest); + } +}; +EMITTER(LOAD_LOCAL_V128, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vmovaps(i.dest, e.ptr[e.rsp + i.src1.constant()]); + //e.TraceLoadV128(DATA_LOCAL, i.src1.constant, i.dest); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_LOAD_LOCAL, + LOAD_LOCAL_I8, + LOAD_LOCAL_I16, + LOAD_LOCAL_I32, + LOAD_LOCAL_I64, + LOAD_LOCAL_F32, + LOAD_LOCAL_F64, + LOAD_LOCAL_V128); + + +// ============================================================================ +// OPCODE_STORE_LOCAL +// ============================================================================ +// Note: all types are always aligned on the stack. +EMITTER(STORE_LOCAL_I8, MATCH(I, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + //e.TraceStoreI8(DATA_LOCAL, i.src1.constant, i.src2); + e.mov(e.byte[e.rsp + i.src1.constant()], i.src2); + } +}; +EMITTER(STORE_LOCAL_I16, MATCH(I, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + //e.TraceStoreI16(DATA_LOCAL, i.src1.constant, i.src2); + e.mov(e.word[e.rsp + i.src1.constant()], i.src2); + } +}; +EMITTER(STORE_LOCAL_I32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + //e.TraceStoreI32(DATA_LOCAL, i.src1.constant, i.src2); + e.mov(e.dword[e.rsp + i.src1.constant()], i.src2); + } +}; +EMITTER(STORE_LOCAL_I64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + //e.TraceStoreI64(DATA_LOCAL, i.src1.constant, i.src2); + e.mov(e.qword[e.rsp + i.src1.constant()], i.src2); + } +}; +EMITTER(STORE_LOCAL_F32, MATCH(I, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + //e.TraceStoreF32(DATA_LOCAL, i.src1.constant, i.src2); + e.vmovss(e.dword[e.rsp + i.src1.constant()], i.src2); + } +}; +EMITTER(STORE_LOCAL_F64, MATCH(I, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + //e.TraceStoreF64(DATA_LOCAL, i.src1.constant, i.src2); + e.vmovsd(e.qword[e.rsp + i.src1.constant()], i.src2); + } +}; +EMITTER(STORE_LOCAL_V128, MATCH(I, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + //e.TraceStoreV128(DATA_LOCAL, i.src1.constant, i.src2); + e.vmovaps(e.ptr[e.rsp + i.src1.constant()], i.src2); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_STORE_LOCAL, + STORE_LOCAL_I8, + STORE_LOCAL_I16, + STORE_LOCAL_I32, + STORE_LOCAL_I64, + STORE_LOCAL_F32, + STORE_LOCAL_F64, + STORE_LOCAL_V128); + + +// ============================================================================ +// OPCODE_LOAD_CONTEXT +// ============================================================================ +// Note: all types are always aligned in the context. +RegExp ComputeContextAddress(X64Emitter& e, const OffsetOp& offset) { + return e.rcx + offset.value; +} +EMITTER(LOAD_CONTEXT_I8, MATCH(I, OffsetOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeContextAddress(e, i.src1); + e.mov(i.dest, e.byte[addr]); + if (IsTracingData()) { + e.mov(e.r8, e.byte[addr]); + e.mov(e.rdx, i.src1.value); + e.CallNative(TraceContextLoadI8); + } + } +}; +EMITTER(LOAD_CONTEXT_I16, MATCH(I, OffsetOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeContextAddress(e, i.src1); + e.mov(i.dest, e.word[addr]); + if (IsTracingData()) { + e.mov(e.r8, e.word[addr]); + e.mov(e.rdx, i.src1.value); + e.CallNative(TraceContextLoadI16); + } + } +}; +EMITTER(LOAD_CONTEXT_I32, MATCH(I, OffsetOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeContextAddress(e, i.src1); + e.mov(i.dest, e.dword[addr]); + if (IsTracingData()) { + e.mov(e.r8, e.dword[addr]); + e.mov(e.rdx, i.src1.value); + e.CallNative(TraceContextLoadI32); + } + } +}; +EMITTER(LOAD_CONTEXT_I64, MATCH(I, OffsetOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeContextAddress(e, i.src1); + e.mov(i.dest, e.qword[addr]); + if (IsTracingData()) { + e.mov(e.r8, e.qword[addr]); + e.mov(e.rdx, i.src1.value); + e.CallNative(TraceContextLoadI64); + } + } +}; +EMITTER(LOAD_CONTEXT_F32, MATCH(I, OffsetOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeContextAddress(e, i.src1); + e.vmovss(i.dest, e.dword[addr]); + if (IsTracingData()) { + e.lea(e.r8, e.dword[addr]); + e.mov(e.rdx, i.src1.value); + e.CallNative(TraceContextLoadF32); + } + } +}; +EMITTER(LOAD_CONTEXT_F64, MATCH(I, OffsetOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeContextAddress(e, i.src1); + e.vmovsd(i.dest, e.qword[addr]); + if (IsTracingData()) { + e.lea(e.r8, e.qword[addr]); + e.mov(e.rdx, i.src1.value); + e.CallNative(TraceContextLoadF64); + } + } +}; +EMITTER(LOAD_CONTEXT_V128, MATCH(I, OffsetOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeContextAddress(e, i.src1); + e.vmovaps(i.dest, e.ptr[addr]); + if (IsTracingData()) { + e.lea(e.r8, e.ptr[addr]); + e.mov(e.rdx, i.src1.value); + e.CallNative(TraceContextLoadV128); + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_LOAD_CONTEXT, + LOAD_CONTEXT_I8, + LOAD_CONTEXT_I16, + LOAD_CONTEXT_I32, + LOAD_CONTEXT_I64, + LOAD_CONTEXT_F32, + LOAD_CONTEXT_F64, + LOAD_CONTEXT_V128); + + +// ============================================================================ +// OPCODE_STORE_CONTEXT +// ============================================================================ +// Note: all types are always aligned on the stack. +EMITTER(STORE_CONTEXT_I8, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeContextAddress(e, i.src1); + if (i.src2.is_constant) { + e.mov(e.byte[addr], i.src2.constant()); + } else { + e.mov(e.byte[addr], i.src2); + } + if (IsTracingData()) { + e.mov(e.r8, e.byte[addr]); + e.mov(e.rdx, i.src1.value); + e.CallNative(TraceContextStoreI8); + } + } +}; +EMITTER(STORE_CONTEXT_I16, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeContextAddress(e, i.src1); + if (i.src2.is_constant) { + e.mov(e.word[addr], i.src2.constant()); + } else { + e.mov(e.word[addr], i.src2); + } + if (IsTracingData()) { + e.mov(e.r8, e.word[addr]); + e.mov(e.rdx, i.src1.value); + e.CallNative(TraceContextStoreI16); + } + } +}; +EMITTER(STORE_CONTEXT_I32, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeContextAddress(e, i.src1); + if (i.src2.is_constant) { + e.mov(e.dword[addr], i.src2.constant()); + } else { + e.mov(e.dword[addr], i.src2); + } + if (IsTracingData()) { + e.mov(e.r8, e.dword[addr]); + e.mov(e.rdx, i.src1.value); + e.CallNative(TraceContextStoreI32); + } + } +}; +EMITTER(STORE_CONTEXT_I64, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeContextAddress(e, i.src1); + if (i.src2.is_constant) { + e.MovMem64(addr, i.src2.constant()); + } else { + e.mov(e.qword[addr], i.src2); + } + if (IsTracingData()) { + e.mov(e.r8, e.qword[addr]); + e.mov(e.rdx, i.src1.value); + e.CallNative(TraceContextStoreI64); + } + } +}; +EMITTER(STORE_CONTEXT_F32, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeContextAddress(e, i.src1); + if (i.src2.is_constant) { + e.mov(e.dword[addr], i.src2.value->constant.i32); + } else { + e.vmovss(e.dword[addr], i.src2); + } + if (IsTracingData()) { + e.lea(e.r8, e.dword[addr]); + e.mov(e.rdx, i.src1.value); + e.CallNative(TraceContextStoreF32); + } + } +}; +EMITTER(STORE_CONTEXT_F64, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeContextAddress(e, i.src1); + if (i.src2.is_constant) { + e.MovMem64(addr, i.src2.value->constant.i64); + } else { + e.vmovsd(e.qword[addr], i.src2); + } + if (IsTracingData()) { + e.lea(e.r8, e.qword[addr]); + e.mov(e.rdx, i.src1.value); + e.CallNative(TraceContextStoreF64); + } + } +}; +EMITTER(STORE_CONTEXT_V128, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeContextAddress(e, i.src1); + if (i.src2.is_constant) { + e.LoadConstantXmm(e.xmm0, i.src2.constant()); + e.vmovaps(e.ptr[addr], e.xmm0); + } else { + e.vmovaps(e.ptr[addr], i.src2); + } + if (IsTracingData()) { + e.lea(e.r8, e.ptr[addr]); + e.mov(e.rdx, i.src1.value); + e.CallNative(TraceContextStoreV128); + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_STORE_CONTEXT, + STORE_CONTEXT_I8, + STORE_CONTEXT_I16, + STORE_CONTEXT_I32, + STORE_CONTEXT_I64, + STORE_CONTEXT_F32, + STORE_CONTEXT_F64, + STORE_CONTEXT_V128); + + +// ============================================================================ +// OPCODE_LOAD +// ============================================================================ +// Note: most *should* be aligned, but needs to be checked! +template +bool CheckLoadAccessCallback(X64Emitter& e, const T& i) { + // If this is a constant address load, check to see if it's in a + // register range. We'll also probably want a dynamic check for + // unverified stores. So far, most games use constants. + if (!i.src1.is_constant) { + return false; + } + uint64_t address = i.src1.constant() & 0xFFFFFFFF; + auto cbs = e.runtime()->access_callbacks(); + while (cbs) { + if (cbs->handles(cbs->context, address)) { + e.mov(e.rcx, reinterpret_cast(cbs->context)); + e.mov(e.rdx, address); + e.CallNative(cbs->read); + if (T::dest_type == KEY_TYPE_V_I8) { + // No swap required. + e.mov(i.dest, e.al); + } else if (T::dest_type == KEY_TYPE_V_I16) { + e.ror(e.ax, 8); + e.mov(i.dest, e.ax); + } else if (T::dest_type == KEY_TYPE_V_I32) { + e.bswap(e.eax); + e.mov(i.dest, e.eax); + } else if (T::dest_type == KEY_TYPE_V_I64) { + e.bswap(e.rax); + e.mov(i.dest, e.rax); + } else { + XEASSERTALWAYS(); + } + return true; + } + cbs = cbs->next; + } + return false; +} +template +RegExp ComputeMemoryAddress(X64Emitter& e, const T& guest) { + if (guest.is_constant) { + // TODO(benvanik): figure out how to do this without a temp. + // Since the constant is often 0x8... if we tried to use that as a + // displacement it would be sign extended and mess things up. + e.mov(e.eax, static_cast(guest.constant())); + return e.rdx + e.rax; + } else { + // Clear the top 32 bits, as they are likely garbage. + // TODO(benvanik): find a way to avoid doing this. + e.mov(e.eax, guest.reg().cvt32()); + return e.rdx + e.rax; + } +} +EMITTER(LOAD_I8, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (CheckLoadAccessCallback(e, i)) { + return; + } + auto addr = ComputeMemoryAddress(e, i.src1); + e.mov(i.dest, e.byte[addr]); + if (IsTracingData()) { + e.mov(e.r8, i.dest); + e.lea(e.rdx, e.ptr[addr]); + e.CallNative(TraceMemoryLoadI8); + } + } +}; +EMITTER(LOAD_I16, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (CheckLoadAccessCallback(e, i)) { + return; + } + auto addr = ComputeMemoryAddress(e, i.src1); + e.mov(i.dest, e.word[addr]); + if (IsTracingData()) { + e.mov(e.r8, i.dest); + e.lea(e.rdx, e.ptr[addr]); + e.CallNative(TraceMemoryLoadI16); + } + } +}; +EMITTER(LOAD_I32, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (CheckLoadAccessCallback(e, i)) { + return; + } + auto addr = ComputeMemoryAddress(e, i.src1); + e.mov(i.dest, e.dword[addr]); + if (IsTracingData()) { + e.mov(e.r8, i.dest); + e.lea(e.rdx, e.ptr[addr]); + e.CallNative(TraceMemoryLoadI32); + } + } +}; +EMITTER(LOAD_I64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (CheckLoadAccessCallback(e, i)) { + return; + } + auto addr = ComputeMemoryAddress(e, i.src1); + e.mov(i.dest, e.qword[addr]); + if (IsTracingData()) { + e.mov(e.r8, i.dest); + e.lea(e.rdx, e.ptr[addr]); + e.CallNative(TraceMemoryLoadI64); + } + } +}; +EMITTER(LOAD_F32, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeMemoryAddress(e, i.src1); + e.vmovss(i.dest, e.dword[addr]); + if (IsTracingData()) { + e.lea(e.r8, e.dword[addr]); + e.lea(e.rdx, e.ptr[addr]); + e.CallNative(TraceMemoryLoadF32); + } + } +}; +EMITTER(LOAD_F64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeMemoryAddress(e, i.src1); + e.vmovsd(i.dest, e.qword[addr]); + if (IsTracingData()) { + e.lea(e.r8, e.qword[addr]); + e.lea(e.rdx, e.ptr[addr]); + e.CallNative(TraceMemoryLoadF64); + } + } +}; +EMITTER(LOAD_V128, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeMemoryAddress(e, i.src1); + // TODO(benvanik): we should try to stick to movaps if possible. + e.vmovups(i.dest, e.ptr[addr]); + if (IsTracingData()) { + e.lea(e.r8, e.ptr[addr]); + e.lea(e.rdx, e.ptr[addr]); + e.CallNative(TraceMemoryLoadV128); + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_LOAD, + LOAD_I8, + LOAD_I16, + LOAD_I32, + LOAD_I64, + LOAD_F32, + LOAD_F64, + LOAD_V128); + + +// ============================================================================ +// OPCODE_STORE +// ============================================================================ +// Note: most *should* be aligned, but needs to be checked! +template +bool CheckStoreAccessCallback(X64Emitter& e, const T& i) { + // If this is a constant address store, check to see if it's in a + // register range. We'll also probably want a dynamic check for + // unverified stores. So far, most games use constants. + if (!i.src1.is_constant) { + return false; + } + uint64_t address = i.src1.constant() & 0xFFFFFFFF; + auto cbs = e.runtime()->access_callbacks(); + while (cbs) { + if (cbs->handles(cbs->context, address)) { + e.mov(e.rcx, reinterpret_cast(cbs->context)); + e.mov(e.rdx, address); + if (i.src2.is_constant) { + e.mov(e.r8, i.src2.constant()); + } else { + if (T::src2_type == KEY_TYPE_V_I8) { + // No swap required. + e.movzx(e.r8, i.src2.reg().cvt8()); + } else if (T::src2_type == KEY_TYPE_V_I16) { + e.movzx(e.r8, i.src2.reg().cvt16()); + e.ror(e.r8w, 8); + } else if (T::src2_type == KEY_TYPE_V_I32) { + e.mov(e.r8d, i.src2.reg().cvt32()); + e.bswap(e.r8d); + } else if (T::src2_type == KEY_TYPE_V_I64) { + e.mov(e.r8, i.src2); + e.bswap(e.r8); + } else { + XEASSERTALWAYS(); + } + } + e.CallNative(cbs->write); + return true; + } + cbs = cbs->next; + } + return false; +} +EMITTER(STORE_I8, MATCH(I, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (CheckStoreAccessCallback(e, i)) { + return; + } + auto addr = ComputeMemoryAddress(e, i.src1); + if (i.src2.is_constant) { + e.mov(e.byte[addr], i.src2.constant()); + } else { + e.mov(e.byte[addr], i.src2); + } + if (IsTracingData()) { + e.mov(e.r8, e.byte[addr]); + e.lea(e.rdx, e.ptr[addr]); + e.CallNative(TraceMemoryStoreI8); + } + } +}; +EMITTER(STORE_I16, MATCH(I, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (CheckStoreAccessCallback(e, i)) { + return; + } + auto addr = ComputeMemoryAddress(e, i.src1); + if (i.src2.is_constant) { + e.mov(e.word[addr], i.src2.constant()); + } else { + e.mov(e.word[addr], i.src2); + } + if (IsTracingData()) { + e.mov(e.r8, e.word[addr]); + e.lea(e.rdx, e.ptr[addr]); + e.CallNative(TraceMemoryStoreI16); + } + } +}; +EMITTER(STORE_I32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (CheckStoreAccessCallback(e, i)) { + return; + } + auto addr = ComputeMemoryAddress(e, i.src1); + if (i.src2.is_constant) { + e.mov(e.dword[addr], i.src2.constant()); + } else { + e.mov(e.dword[addr], i.src2); + } + if (IsTracingData()) { + e.mov(e.r8, e.dword[addr]); + e.lea(e.rdx, e.ptr[addr]); + e.CallNative(TraceMemoryStoreI32); + } + } +}; +EMITTER(STORE_I64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (CheckStoreAccessCallback(e, i)) { + return; + } + auto addr = ComputeMemoryAddress(e, i.src1); + if (i.src2.is_constant) { + e.MovMem64(addr, i.src2.constant()); + } else { + e.mov(e.qword[addr], i.src2); + } + if (IsTracingData()) { + e.mov(e.r8, e.qword[addr]); + e.lea(e.rdx, e.ptr[addr]); + e.CallNative(TraceMemoryStoreI64); + } + } +}; +EMITTER(STORE_F32, MATCH(I, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeMemoryAddress(e, i.src1); + if (i.src2.is_constant) { + e.mov(e.dword[addr], i.src2.value->constant.i32); + } else { + e.vmovss(e.dword[addr], i.src2); + } + if (IsTracingData()) { + e.lea(e.r8, e.ptr[addr]); + e.lea(e.rdx, e.ptr[addr]); + e.CallNative(TraceMemoryStoreF32); + } + } +}; +EMITTER(STORE_F64, MATCH(I, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeMemoryAddress(e, i.src1); + if (i.src2.is_constant) { + e.MovMem64(addr, i.src2.value->constant.i64); + } else { + e.vmovsd(e.qword[addr], i.src2); + } + if (IsTracingData()) { + e.lea(e.r8, e.ptr[addr]); + e.lea(e.rdx, e.ptr[addr]); + e.CallNative(TraceMemoryStoreF64); + } + } +}; +EMITTER(STORE_V128, MATCH(I, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto addr = ComputeMemoryAddress(e, i.src1); + if (i.src2.is_constant) { + e.LoadConstantXmm(e.xmm0, i.src2.constant()); + e.vmovaps(e.ptr[addr], e.xmm0); + } else { + e.vmovaps(e.ptr[addr], i.src2); + } + if (IsTracingData()) { + e.lea(e.r8, e.ptr[addr]); + e.lea(e.rdx, e.ptr[addr]); + e.CallNative(TraceMemoryStoreV128); + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_STORE, + STORE_I8, + STORE_I16, + STORE_I32, + STORE_I64, + STORE_F32, + STORE_F64, + STORE_V128); + + +// ============================================================================ +// OPCODE_PREFETCH +// ============================================================================ +EMITTER(PREFETCH, MATCH(I, OffsetOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // TODO(benvanik): prefetch addr -> length. + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_PREFETCH, + PREFETCH); + + +// ============================================================================ +// OPCODE_MAX +// ============================================================================ +EMITTER(MAX_F32, MATCH(I, F32<>, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vmaxss(i.dest, i.src1, i.src2); + } +}; +EMITTER(MAX_F64, MATCH(I, F64<>, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vmaxsd(i.dest, i.src1, i.src2); + } +}; +EMITTER(MAX_V128, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vmaxps(i.dest, i.src1, i.src2); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_MAX, + MAX_F32, + MAX_F64, + MAX_V128); + + +// ============================================================================ +// OPCODE_MIN +// ============================================================================ +EMITTER(MIN_F32, MATCH(I, F32<>, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vminss(i.dest, i.src1, i.src2); + } +}; +EMITTER(MIN_F64, MATCH(I, F64<>, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vminsd(i.dest, i.src1, i.src2); + } +}; +EMITTER(MIN_V128, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vminps(i.dest, i.src1, i.src2); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_MIN, + MIN_F32, + MIN_F64, + MIN_V128); + + +// ============================================================================ +// OPCODE_SELECT +// ============================================================================ +EMITTER(SELECT_I8, MATCH(I, I8<>, I8<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.cmovnz(i.dest.reg().cvt32(), i.src2.reg().cvt32()); + e.cmovz(i.dest.reg().cvt32(), i.src3.reg().cvt32()); + } +}; +EMITTER(SELECT_I16, MATCH(I, I8<>, I16<>, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.cmovnz(i.dest.reg().cvt32(), i.src2.reg().cvt32()); + e.cmovz(i.dest.reg().cvt32(), i.src3.reg().cvt32()); + } +}; +EMITTER(SELECT_I32, MATCH(I, I8<>, I32<>, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.cmovnz(i.dest, i.src2); + e.cmovz(i.dest, i.src3); + } +}; +EMITTER(SELECT_I64, MATCH(I, I8<>, I64<>, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.cmovnz(i.dest, i.src2); + e.cmovz(i.dest, i.src3); + } +}; +EMITTER(SELECT_F32, MATCH(I, I8<>, F32<>, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + // TODO(benvanik): find a way to do this without branches. + Xbyak::Label skip; + e.vmovaps(i.dest, i.src3); + e.jz(skip); + e.vmovaps(i.dest, i.src2); + e.L(skip); + } +}; +EMITTER(SELECT_F64, MATCH(I, I8<>, F64<>, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + // TODO(benvanik): find a way to do this without branches. + Xbyak::Label skip; + e.vmovaps(i.dest, i.src3); + e.jz(skip); + e.vmovaps(i.dest, i.src2); + e.L(skip); + } +}; +EMITTER(SELECT_V128, MATCH(I, I8<>, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + // TODO(benvanik): find a way to do this without branches. + Xbyak::Label skip; + e.vmovaps(i.dest, i.src3); + e.jz(skip); + e.vmovaps(i.dest, i.src2); + e.L(skip); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_SELECT, + SELECT_I8, + SELECT_I16, + SELECT_I32, + SELECT_I64, + SELECT_F32, + SELECT_F64, + SELECT_V128); + + +// ============================================================================ +// OPCODE_IS_TRUE +// ============================================================================ +EMITTER(IS_TRUE_I8, MATCH(I, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.setnz(i.dest); + } +}; +EMITTER(IS_TRUE_I16, MATCH(I, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.setnz(i.dest); + } +}; +EMITTER(IS_TRUE_I32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.setnz(i.dest); + } +}; +EMITTER(IS_TRUE_I64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.setnz(i.dest); + } +}; +EMITTER(IS_TRUE_F32, MATCH(I, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + e.setnz(i.dest); + } +}; +EMITTER(IS_TRUE_F64, MATCH(I, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + e.setnz(i.dest); + } +}; +EMITTER(IS_TRUE_V128, MATCH(I, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + e.setnz(i.dest); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_IS_TRUE, + IS_TRUE_I8, + IS_TRUE_I16, + IS_TRUE_I32, + IS_TRUE_I64, + IS_TRUE_F32, + IS_TRUE_F64, + IS_TRUE_V128); + + +// ============================================================================ +// OPCODE_IS_FALSE +// ============================================================================ +EMITTER(IS_FALSE_I8, MATCH(I, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.setz(i.dest); + } +}; +EMITTER(IS_FALSE_I16, MATCH(I, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.setz(i.dest); + } +}; +EMITTER(IS_FALSE_I32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.setz(i.dest); + } +}; +EMITTER(IS_FALSE_I64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.test(i.src1, i.src1); + e.setz(i.dest); + } +}; +EMITTER(IS_FALSE_F32, MATCH(I, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + e.setz(i.dest); + } +}; +EMITTER(IS_FALSE_F64, MATCH(I, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + e.setz(i.dest); + } +}; +EMITTER(IS_FALSE_V128, MATCH(I, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vptest(i.src1, i.src1); + e.setz(i.dest); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_IS_FALSE, + IS_FALSE_I8, + IS_FALSE_I16, + IS_FALSE_I32, + IS_FALSE_I64, + IS_FALSE_F32, + IS_FALSE_F64, + IS_FALSE_V128); + + +// ============================================================================ +// OPCODE_COMPARE_EQ +// ============================================================================ +EMITTER(COMPARE_EQ_I8, MATCH(I, I8<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitCommutativeCompareOp( + e, i, + [](X64Emitter& e, const Reg8& src1, const Reg8& src2) { e.cmp(src1, src2); }, + [](X64Emitter& e, const Reg8& src1, int32_t constant) { e.cmp(src1, constant); }); + e.sete(i.dest); + } +}; +EMITTER(COMPARE_EQ_I16, MATCH(I, I16<>, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitCommutativeCompareOp( + e, i, + [](X64Emitter& e, const Reg16& src1, const Reg16& src2) { e.cmp(src1, src2); }, + [](X64Emitter& e, const Reg16& src1, int32_t constant) { e.cmp(src1, constant); }); + e.sete(i.dest); + } +}; +EMITTER(COMPARE_EQ_I32, MATCH(I, I32<>, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitCommutativeCompareOp( + e, i, + [](X64Emitter& e, const Reg32& src1, const Reg32& src2) { e.cmp(src1, src2); }, + [](X64Emitter& e, const Reg32& src1, int32_t constant) { e.cmp(src1, constant); }); + e.sete(i.dest); + } +}; +EMITTER(COMPARE_EQ_I64, MATCH(I, I64<>, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitCommutativeCompareOp( + e, i, + [](X64Emitter& e, const Reg64& src1, const Reg64& src2) { e.cmp(src1, src2); }, + [](X64Emitter& e, const Reg64& src1, int32_t constant) { e.cmp(src1, constant); }); + e.sete(i.dest); + } +}; +EMITTER(COMPARE_EQ_F32, MATCH(I, F32<>, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vcomiss(i.src1, i.src2); + e.sete(i.dest); + } +}; +EMITTER(COMPARE_EQ_F64, MATCH(I, F64<>, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vcomisd(i.src1, i.src2); + e.sete(i.dest); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_COMPARE_EQ, + COMPARE_EQ_I8, + COMPARE_EQ_I16, + COMPARE_EQ_I32, + COMPARE_EQ_I64, + COMPARE_EQ_F32, + COMPARE_EQ_F64); + + +// ============================================================================ +// OPCODE_COMPARE_NE +// ============================================================================ +EMITTER(COMPARE_NE_I8, MATCH(I, I8<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitCommutativeCompareOp( + e, i, + [](X64Emitter& e, const Reg8& src1, const Reg8& src2) { e.cmp(src1, src2); }, + [](X64Emitter& e, const Reg8& src1, int32_t constant) { e.cmp(src1, constant); }); + e.setne(i.dest); + } +}; +EMITTER(COMPARE_NE_I16, MATCH(I, I16<>, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitCommutativeCompareOp( + e, i, + [](X64Emitter& e, const Reg16& src1, const Reg16& src2) { e.cmp(src1, src2); }, + [](X64Emitter& e, const Reg16& src1, int32_t constant) { e.cmp(src1, constant); }); + e.setne(i.dest); + } +}; +EMITTER(COMPARE_NE_I32, MATCH(I, I32<>, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitCommutativeCompareOp( + e, i, + [](X64Emitter& e, const Reg32& src1, const Reg32& src2) { e.cmp(src1, src2); }, + [](X64Emitter& e, const Reg32& src1, int32_t constant) { e.cmp(src1, constant); }); + e.setne(i.dest); + } +}; +EMITTER(COMPARE_NE_I64, MATCH(I, I64<>, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitCommutativeCompareOp( + e, i, + [](X64Emitter& e, const Reg64& src1, const Reg64& src2) { e.cmp(src1, src2); }, + [](X64Emitter& e, const Reg64& src1, int32_t constant) { e.cmp(src1, constant); }); + e.setne(i.dest); + } +}; +EMITTER(COMPARE_NE_F32, MATCH(I, F32<>, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vcomiss(i.src1, i.src2); + e.setne(i.dest); + } +}; +EMITTER(COMPARE_NE_F64, MATCH(I, F64<>, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vcomisd(i.src1, i.src2); + e.setne(i.dest); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_COMPARE_NE, + COMPARE_NE_I8, + COMPARE_NE_I16, + COMPARE_NE_I32, + COMPARE_NE_I64, + COMPARE_NE_F32, + COMPARE_NE_F64); + + +// ============================================================================ +// OPCODE_COMPARE_* +// ============================================================================ +#define EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, type, reg_type) \ + EMITTER(COMPARE_##op##_##type, MATCH(I, type<>, type<>>)) { \ + static void Emit(X64Emitter& e, const EmitArgType& i) { \ + EmitAssociativeCompareOp( \ + e, i, \ + [](X64Emitter& e, const Reg8& dest, const reg_type& src1, const reg_type& src2, bool inverse) { \ + e.cmp(src1, src2); \ + if (!inverse) { e.instr(dest); } else { e.inverse_instr(dest); } \ + }, \ + [](X64Emitter& e, const Reg8& dest, const reg_type& src1, int32_t constant, bool inverse) { \ + e.cmp(src1, constant); \ + if (!inverse) { e.instr(dest); } else { e.inverse_instr(dest); } \ + }); \ + } \ + }; +#define EMITTER_ASSOCIATIVE_COMPARE_XX(op, instr, inverse_instr) \ + EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I8, Reg8); \ + EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I16, Reg16); \ + EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I32, Reg32); \ + EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I64, Reg64); \ + EMITTER(COMPARE_##op##_F32, MATCH(I, F32<>, F32<>>)) { \ + static void Emit(X64Emitter& e, const EmitArgType& i) { \ + e.vcomiss(i.src1, i.src2); \ + e.instr(i.dest); \ + } \ + }; \ + EMITTER(COMPARE_##op##_F64, MATCH(I, F64<>, F64<>>)) { \ + static void Emit(X64Emitter& e, const EmitArgType& i) { \ + if (i.src1.is_constant) { \ + e.LoadConstantXmm(e.xmm0, i.src1.constant()); \ + e.vcomisd(e.xmm0, i.src2); \ + } else if (i.src2.is_constant) { \ + e.LoadConstantXmm(e.xmm0, i.src2.constant()); \ + e.vcomisd(i.src1, e.xmm0); \ + } else { \ + e.vcomisd(i.src1, i.src2); \ + } \ + e.instr(i.dest); \ + } \ + }; \ + EMITTER_OPCODE_TABLE( \ + OPCODE_COMPARE_##op##, \ + COMPARE_##op##_I8, \ + COMPARE_##op##_I16, \ + COMPARE_##op##_I32, \ + COMPARE_##op##_I64, \ + COMPARE_##op##_F32, \ + COMPARE_##op##_F64); +EMITTER_ASSOCIATIVE_COMPARE_XX(SLT, setl, setge); +EMITTER_ASSOCIATIVE_COMPARE_XX(SLE, setle, setg); +EMITTER_ASSOCIATIVE_COMPARE_XX(SGT, setg, setle); +EMITTER_ASSOCIATIVE_COMPARE_XX(SGE, setge, setl); +EMITTER_ASSOCIATIVE_COMPARE_XX(ULT, setb, setae); +EMITTER_ASSOCIATIVE_COMPARE_XX(ULE, setbe, seta); +EMITTER_ASSOCIATIVE_COMPARE_XX(UGT, seta, setbe); +EMITTER_ASSOCIATIVE_COMPARE_XX(UGE, setae, setb); + + +// ============================================================================ +// OPCODE_DID_CARRY +// ============================================================================ +// TODO(benvanik): salc/setalc +// https://code.google.com/p/corkami/wiki/x86oddities +EMITTER(DID_CARRY_I8, MATCH(I, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.LoadEflags(); + e.setc(i.dest); + } +}; +EMITTER(DID_CARRY_I16, MATCH(I, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.LoadEflags(); + e.setc(i.dest); + } +}; +EMITTER(DID_CARRY_I32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.LoadEflags(); + e.setc(i.dest); + } +}; +EMITTER(DID_CARRY_I64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.LoadEflags(); + e.setc(i.dest); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_DID_CARRY, + DID_CARRY_I8, + DID_CARRY_I16, + DID_CARRY_I32, + DID_CARRY_I64); + + +// ============================================================================ +// OPCODE_DID_OVERFLOW +// ============================================================================ +EMITTER(DID_OVERFLOW, MATCH(I>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.LoadEflags(); + e.seto(i.dest); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_DID_OVERFLOW, + DID_OVERFLOW); + + +// ============================================================================ +// OPCODE_DID_SATURATE +// ============================================================================ +//EMITTER(DID_SATURATE, MATCH(I>)) { +// static void Emit(X64Emitter& e, const EmitArgType& i) { +// } +//}; +//EMITTER_OPCODE_TABLE(OPCODE_DID_SATURATE, +// DID_SATURATE); + + +// ============================================================================ +// OPCODE_VECTOR_COMPARE_EQ +// ============================================================================ +EMITTER(VECTOR_COMPARE_EQ_V128, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + switch (i.instr->flags) { + case INT8_TYPE: + e.vpcmpeqb(i.dest, i.src1, i.src2); + break; + case INT16_TYPE: + e.vpcmpeqw(i.dest, i.src1, i.src2); + break; + case INT32_TYPE: + e.vpcmpeqd(i.dest, i.src1, i.src2); + break; + case FLOAT32_TYPE: + e.vcmpeqps(i.dest, i.src1, i.src2); + break; + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_VECTOR_COMPARE_EQ, + VECTOR_COMPARE_EQ_V128); + + +// ============================================================================ +// OPCODE_VECTOR_COMPARE_SGT +// ============================================================================ +EMITTER(VECTOR_COMPARE_SGT_V128, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + switch (i.instr->flags) { + case INT8_TYPE: + e.vpcmpgtb(i.dest, i.src1, i.src2); + break; + case INT16_TYPE: + e.vpcmpgtw(i.dest, i.src1, i.src2); + break; + case INT32_TYPE: + e.vpcmpgtd(i.dest, i.src1, i.src2); + break; + case FLOAT32_TYPE: + e.vcmpgtps(i.dest, i.src1, i.src2); + break; + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_VECTOR_COMPARE_SGT, + VECTOR_COMPARE_SGT_V128); + + +// ============================================================================ +// OPCODE_VECTOR_COMPARE_SGE +// ============================================================================ +EMITTER(VECTOR_COMPARE_SGE_V128, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + switch (i.instr->flags) { + case INT8_TYPE: + e.vpcmpgtb(i.dest, i.src1, i.src2); + e.vpcmpeqb(e.xmm0, i.src1, i.src2); + e.vpor(i.dest, e.xmm0); + break; + case INT16_TYPE: + e.vpcmpgtw(i.dest, i.src1, i.src2); + e.vpcmpeqw(e.xmm0, i.src1, i.src2); + e.vpor(i.dest, e.xmm0); + break; + case INT32_TYPE: + e.vpcmpgtd(i.dest, i.src1, i.src2); + e.vpcmpeqd(e.xmm0, i.src1, i.src2); + e.vpor(i.dest, e.xmm0); + break; + case FLOAT32_TYPE: + e.vcmpgeps(i.dest, i.src1, i.src2); + break; + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_VECTOR_COMPARE_SGE, + VECTOR_COMPARE_SGE_V128); + + +// ============================================================================ +// OPCODE_VECTOR_COMPARE_UGT +// ============================================================================ +//EMITTER(VECTOR_COMPARE_UGT_V128, MATCH(I, V128<>, V128<>>)) { +// static void Emit(X64Emitter& e, const EmitArgType& i) { +// } +//}; +//EMITTER_OPCODE_TABLE( +// OPCODE_VECTOR_COMPARE_UGT, +// VECTOR_COMPARE_UGT_V128); + + +// ============================================================================ +// OPCODE_VECTOR_COMPARE_UGE +// ============================================================================ +//EMITTER(VECTOR_COMPARE_UGE_V128, MATCH(I, V128<>, V128<>>)) { +// static void Emit(X64Emitter& e, const EmitArgType& i) { +// } +//}; +//EMITTER_OPCODE_TABLE( +// OPCODE_VECTOR_COMPARE_UGE, +// VECTOR_COMPARE_UGE_V128); + + +// ============================================================================ +// OPCODE_ADD +// ============================================================================ +// TODO(benvanik): put dest/src1|2 together. +template +void EmitAddXX(X64Emitter& e, const ARGS& i) { + SEQ::EmitCommutativeBinaryOp( + e, i, + [](X64Emitter& e, const REG& dest_src, const REG& src) { e.add(dest_src, src); }, + [](X64Emitter& e, const REG& dest_src, int32_t constant) { e.add(dest_src, constant); }); + if (i.instr->flags & ARITHMETIC_SET_CARRY) { + // CF is set if carried. + e.StoreEflags(); + } +} +EMITTER(ADD_I8, MATCH(I, I8<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitAddXX(e, i); + } +}; +EMITTER(ADD_I16, MATCH(I, I16<>, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitAddXX(e, i); + } +}; +EMITTER(ADD_I32, MATCH(I, I32<>, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitAddXX(e, i); + } +}; +EMITTER(ADD_I64, MATCH(I, I64<>, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitAddXX(e, i); + } +}; +EMITTER(ADD_F32, MATCH(I, F32<>, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vaddss(i.dest, i.src1, i.src2); + } +}; +EMITTER(ADD_F64, MATCH(I, F64<>, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vaddsd(i.dest, i.src1, i.src2); + } +}; +EMITTER(ADD_V128, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vaddps(i.dest, i.src1, i.src2); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_ADD, + ADD_I8, + ADD_I16, + ADD_I32, + ADD_I64, + ADD_F32, + ADD_F64, + ADD_V128); + + +// ============================================================================ +// OPCODE_ADD_CARRY +// ============================================================================ +// TODO(benvanik): put dest/src1|2 together. +template +void EmitAddCarryXX(X64Emitter& e, const ARGS& i) { + // TODO(benvanik): faster setting? we could probably do some fun math tricks + // here to get the carry flag set. + if (i.src3.is_constant) { + if (i.src3.constant()) { + e.stc(); + } else { + e.clc(); + } + } else { + if (i.src3.reg().getIdx() <= 4) { + // Can move from A/B/C/DX to AH. + e.mov(e.ah, i.src3.reg().cvt8()); + } else { + e.mov(e.al, i.src3); + e.mov(e.ah, e.al); + } + e.sahf(); + } + if (i.src1.is_constant && i.src2.is_constant) { + auto ab = i.src1.constant() + i.src2.constant(); + if (!ab) { + e.xor(i.dest, i.dest); + } else { + e.mov(i.dest, ab); + } + e.adc(i.dest, 0); + } else { + SEQ::EmitCommutativeBinaryOp( + e, i, [](X64Emitter& e, const REG& dest_src, const REG& src) { + e.adc(dest_src, src); + }, [](X64Emitter& e, const REG& dest_src, int32_t constant) { + e.adc(dest_src, constant); + }); + } + if (i.instr->flags & ARITHMETIC_SET_CARRY) { + // CF is set if carried. + e.StoreEflags(); + } +} +EMITTER(ADD_CARRY_I8, MATCH(I, I8<>, I8<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitAddCarryXX(e, i); + } +}; +EMITTER(ADD_CARRY_I16, MATCH(I, I16<>, I16<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitAddCarryXX(e, i); + } +}; +EMITTER(ADD_CARRY_I32, MATCH(I, I32<>, I32<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitAddCarryXX(e, i); + } +}; +EMITTER(ADD_CARRY_I64, MATCH(I, I64<>, I64<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitAddCarryXX(e, i); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_ADD_CARRY, + ADD_CARRY_I8, + ADD_CARRY_I16, + ADD_CARRY_I32, + ADD_CARRY_I64); + + +// ============================================================================ +// OPCODE_VECTOR_ADD +// ============================================================================ + + +// ============================================================================ +// OPCODE_SUB +// ============================================================================ +// TODO(benvanik): put dest/src1|2 together. +template +void EmitSubXX(X64Emitter& e, const ARGS& i) { + if (i.instr->flags & ARITHMETIC_SET_CARRY) { + // TODO(benvanik): faster way of doing sub with CF set? + SEQ::EmitAssociativeBinaryOp( + e, i, + [](X64Emitter& e, const REG& dest_src, const REG& src) { + auto temp = GetTempReg(e); + e.mov(temp, src); + e.not(temp); + e.stc(); + e.adc(dest_src, temp); + }, + [](X64Emitter& e, const REG& dest_src, int32_t constant) { + auto temp = GetTempReg(e); + e.mov(temp, constant); + e.not(temp); + e.stc(); + e.adc(dest_src, temp); + }); + e.StoreEflags(); + } else { + SEQ::EmitAssociativeBinaryOp( + e, i, + [](X64Emitter& e, const REG& dest_src, const REG& src) { e.sub(dest_src, src); }, + [](X64Emitter& e, const REG& dest_src, int32_t constant) { e.sub(dest_src, constant); }); + } +} +EMITTER(SUB_I8, MATCH(I, I8<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitSubXX(e, i); + } +}; +EMITTER(SUB_I16, MATCH(I, I16<>, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitSubXX(e, i); + } +}; +EMITTER(SUB_I32, MATCH(I, I32<>, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitSubXX(e, i); + } +}; +EMITTER(SUB_I64, MATCH(I, I64<>, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitSubXX(e, i); + } +}; +EMITTER(SUB_F32, MATCH(I, F32<>, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + XEASSERT(!i.instr->flags); + e.vsubss(i.dest, i.src1, i.src2); + } +}; +EMITTER(SUB_F64, MATCH(I, F64<>, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + XEASSERT(!i.instr->flags); + e.vsubsd(i.dest, i.src1, i.src2); + } +}; +EMITTER(SUB_V128, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + XEASSERT(!i.instr->flags); + e.vsubps(i.dest, i.src1, i.src2); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_SUB, + SUB_I8, + SUB_I16, + SUB_I32, + SUB_I64, + SUB_F32, + SUB_F64, + SUB_V128); + + +// ============================================================================ +// OPCODE_MUL +// ============================================================================ +// Sign doesn't matter here, as we don't use the high bits. +// We exploit mulx here to avoid creating too much register pressure. +EMITTER(MUL_I8, MATCH(I, I8<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // dest hi, dest low = src * edx + // TODO(benvanik): place src2 in edx? + if (i.src1.is_constant) { + XEASSERT(!i.src2.is_constant); + e.movzx(e.edx, i.src2); + e.mov(e.eax, static_cast(i.src1.constant())); + e.mulx(e.edx, i.dest.reg().cvt32(), e.eax); + } else if (i.src2.is_constant) { + e.movzx(e.edx, i.src1); + e.mov(e.eax, static_cast(i.src2.constant())); + e.mulx(e.edx, i.dest.reg().cvt32(), e.eax); + } else { + e.movzx(e.edx, i.src2); + e.mulx(e.edx, i.dest.reg().cvt32(), i.src1.reg().cvt32()); + } + } +}; +EMITTER(MUL_I16, MATCH(I, I16<>, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // dest hi, dest low = src * edx + // TODO(benvanik): place src2 in edx? + if (i.src1.is_constant) { + XEASSERT(!i.src2.is_constant); + e.movzx(e.edx, i.src2); + e.mov(e.ax, static_cast(i.src1.constant())); + e.mulx(e.edx, i.dest.reg().cvt32(), e.eax); + } else if (i.src2.is_constant) { + e.movzx(e.edx, i.src1); + e.mov(e.ax, static_cast(i.src2.constant())); + e.mulx(e.edx, i.dest.reg().cvt32(), e.eax); + } else { + e.movzx(e.edx, i.src2); + e.mulx(e.edx, i.dest.reg().cvt32(), i.src1.reg().cvt32()); + } + e.ReloadEDX(); + } +}; +EMITTER(MUL_I32, MATCH(I, I32<>, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // dest hi, dest low = src * edx + // TODO(benvanik): place src2 in edx? + if (i.src1.is_constant) { + XEASSERT(!i.src2.is_constant); + e.mov(e.edx, i.src2); + e.mov(e.eax, i.src1.constant()); + e.mulx(e.edx, i.dest, e.eax); + } else if (i.src2.is_constant) { + e.mov(e.edx, i.src1); + e.mov(e.eax, i.src2.constant()); + e.mulx(e.edx, i.dest, e.eax); + } else { + e.mov(e.edx, i.src2); + e.mulx(e.edx, i.dest, i.src1); + } + e.ReloadEDX(); + } +}; +EMITTER(MUL_I64, MATCH(I, I64<>, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // dest hi, dest low = src * rdx + // TODO(benvanik): place src2 in edx? + if (i.src1.is_constant) { + XEASSERT(!i.src2.is_constant); + e.mov(e.rdx, i.src2); + e.mov(e.rax, i.src1.constant()); + e.mulx(e.rdx, i.dest, e.rax); + } else if (i.src2.is_constant) { + e.mov(e.rdx, i.src1); + e.mov(e.rax, i.src2.constant()); + e.mulx(e.rdx, i.dest, e.rax); + } else { + e.mov(e.rdx, i.src2); + e.mulx(e.rdx, i.dest, i.src1); + } + e.ReloadEDX(); + } +}; +EMITTER(MUL_F32, MATCH(I, F32<>, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + XEASSERT(!i.instr->flags); + e.vmulss(i.dest, i.src1, i.src2); + } +}; +EMITTER(MUL_F64, MATCH(I, F64<>, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + XEASSERT(!i.instr->flags); + e.vmulsd(i.dest, i.src1, i.src2); + } +}; +EMITTER(MUL_V128, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + XEASSERT(!i.instr->flags); + e.vmulps(i.dest, i.src1, i.src2); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_MUL, + MUL_I8, + MUL_I16, + MUL_I32, + MUL_I64, + MUL_F32, + MUL_F64, + MUL_V128); + + +// ============================================================================ +// OPCODE_MUL_HI +// ============================================================================ +EMITTER(MUL_HI_I8, MATCH(I, I8<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.instr->flags & ARITHMETIC_UNSIGNED) { + // TODO(benvanik): place src1 in eax? still need to sign extend + e.movzx(e.eax, i.src1); + e.mulx(i.dest.reg().cvt32(), e.eax, i.src2.reg().cvt32()); + } else { + e.mov(e.al, i.src1); + e.imul(i.src2); + e.mov(i.dest, e.ah); + } + e.ReloadEDX(); + } +}; +EMITTER(MUL_HI_I16, MATCH(I, I16<>, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.instr->flags & ARITHMETIC_UNSIGNED) { + // TODO(benvanik): place src1 in eax? still need to sign extend + e.movzx(e.eax, i.src1); + e.mulx(i.dest.reg().cvt32(), e.eax, i.src2.reg().cvt32()); + } else { + e.mov(e.ax, i.src1); + e.imul(i.src2); + e.mov(i.dest, e.dx); + } + e.ReloadEDX(); + } +}; +EMITTER(MUL_HI_I32, MATCH(I, I32<>, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.instr->flags & ARITHMETIC_UNSIGNED) { + // TODO(benvanik): place src1 in eax? still need to sign extend + e.mov(e.eax, i.src1); + e.mulx(i.dest, e.eax, i.src2); + } else { + e.mov(e.eax, i.src1); + e.imul(i.src2); + e.mov(i.dest, e.edx); + } + e.ReloadEDX(); + } +}; +EMITTER(MUL_HI_I64, MATCH(I, I64<>, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.instr->flags & ARITHMETIC_UNSIGNED) { + // TODO(benvanik): place src1 in eax? still need to sign extend + e.mov(e.rax, i.src1); + e.mulx(i.dest, e.rax, i.src2); + } else { + e.mov(e.rax, i.src1); + e.imul(i.src2); + e.mov(i.dest, e.rdx); + } + e.ReloadEDX(); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_MUL_HI, + MUL_HI_I8, + MUL_HI_I16, + MUL_HI_I32, + MUL_HI_I64); + + +// ============================================================================ +// OPCODE_DIV +// ============================================================================ +// TODO(benvanik): optimize common constant cases. +// TODO(benvanik): simplify code! +EMITTER(DIV_I8, MATCH(I, I8<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // NOTE: RDX clobbered. + bool clobbered_rcx = false; + if (i.src2.is_constant) { + XEASSERT(!i.src1.is_constant); + clobbered_rcx = true; + e.mov(e.cl, i.src2.constant()); + if (i.instr->flags & ARITHMETIC_UNSIGNED) { + e.movzx(e.ax, i.src1); + e.div(e.cl); + } else { + e.movsx(e.ax, i.src1); + e.idiv(e.cl); + } + } else { + if (i.instr->flags & ARITHMETIC_UNSIGNED) { + if (i.src1.is_constant) { + e.mov(e.ax, static_cast(i.src1.constant())); + } else { + e.movzx(e.ax, i.src1); + } + e.div(i.src2); + } else { + if (i.src1.is_constant) { + e.mov(e.ax, static_cast(i.src1.constant())); + } else { + e.movsx(e.ax, i.src1); + } + e.idiv(i.src2); + } + } + e.mov(i.dest, e.al); + if (clobbered_rcx) { + e.ReloadECX(); + } + e.ReloadEDX(); + } +}; +EMITTER(DIV_I16, MATCH(I, I16<>, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // NOTE: RDX clobbered. + bool clobbered_rcx = false; + if (i.src2.is_constant) { + XEASSERT(!i.src1.is_constant); + clobbered_rcx = true; + e.mov(e.cx, i.src2.constant()); + if (i.instr->flags & ARITHMETIC_UNSIGNED) { + e.mov(e.ax, i.src1); + // Zero upper bits. + e.xor(e.dx, e.dx); + e.div(e.cx); + } else { + e.mov(e.ax, i.src1); + // Set dx to sign bit of src1 (dx:ax = dx:ax / src). + e.mov(e.dx, e.ax); + e.sar(e.dx, 15); + e.idiv(e.cx); + } + } else { + if (i.instr->flags & ARITHMETIC_UNSIGNED) { + if (i.src1.is_constant) { + e.mov(e.ax, i.src1.constant()); + } else { + e.mov(e.ax, i.src1); + } + // Zero upper bits. + e.xor(e.dx, e.dx); + e.div(i.src2); + } else { + if (i.src1.is_constant) { + e.mov(e.ax, i.src1.constant()); + } else { + e.mov(e.ax, i.src1); + } + // Set dx to sign bit of src1 (dx:ax = dx:ax / src). + e.mov(e.dx, e.ax); + e.sar(e.dx, 15); + e.idiv(i.src2); + } + } + e.mov(i.dest, e.ax); + if (clobbered_rcx) { + e.ReloadECX(); + } + e.ReloadEDX(); + } +}; +EMITTER(DIV_I32, MATCH(I, I32<>, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // NOTE: RDX clobbered. + bool clobbered_rcx = false; + if (i.src2.is_constant) { + XEASSERT(!i.src1.is_constant); + clobbered_rcx = true; + e.mov(e.ecx, i.src2.constant()); + if (i.instr->flags & ARITHMETIC_UNSIGNED) { + e.mov(e.eax, i.src1); + // Zero upper bits. + e.xor(e.edx, e.edx); + e.div(e.ecx); + } else { + e.mov(e.eax, i.src1); + // Set dx to sign bit of src1 (dx:ax = dx:ax / src). + e.mov(e.edx, e.eax); + e.sar(e.edx, 31); + e.idiv(e.ecx); + } + } else { + if (i.instr->flags & ARITHMETIC_UNSIGNED) { + if (i.src1.is_constant) { + e.mov(e.eax, i.src1.constant()); + } else { + e.mov(e.eax, i.src1); + } + // Zero upper bits. + e.xor(e.edx, e.edx); + e.div(i.src2); + } else { + if (i.src1.is_constant) { + e.mov(e.eax, i.src1.constant()); + } else { + e.mov(e.eax, i.src1); + } + // Set dx to sign bit of src1 (dx:ax = dx:ax / src). + e.mov(e.edx, e.eax); + e.sar(e.edx, 31); + e.idiv(i.src2); + } + } + e.mov(i.dest, e.eax); + if (clobbered_rcx) { + e.ReloadECX(); + } + e.ReloadEDX(); + } +}; +EMITTER(DIV_I64, MATCH(I, I64<>, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // NOTE: RDX clobbered. + bool clobbered_rcx = false; + if (i.src2.is_constant) { + XEASSERT(!i.src1.is_constant); + clobbered_rcx = true; + e.mov(e.rcx, i.src2.constant()); + if (i.instr->flags & ARITHMETIC_UNSIGNED) { + e.mov(e.rax, i.src1); + // Zero upper bits. + e.xor(e.rdx, e.rdx); + e.div(e.rcx); + } else { + e.mov(e.rax, i.src1); + // Set dx to sign bit of src1 (dx:ax = dx:ax / src). + e.mov(e.rdx, e.rax); + e.sar(e.rdx, 63); + e.idiv(e.rcx); + } + } else { + if (i.instr->flags & ARITHMETIC_UNSIGNED) { + if (i.src1.is_constant) { + e.mov(e.rax, i.src1.constant()); + } else { + e.mov(e.rax, i.src1); + } + // Zero upper bits. + e.xor(e.rdx, e.rdx); + e.div(i.src2); + } else { + if (i.src1.is_constant) { + e.mov(e.rax, i.src1.constant()); + } else { + e.mov(e.rax, i.src1); + } + // Set dx to sign bit of src1 (dx:ax = dx:ax / src). + e.mov(e.rdx, e.rax); + e.sar(e.rdx, 63); + e.idiv(i.src2); + } + } + e.mov(i.dest, e.rax); + if (clobbered_rcx) { + e.ReloadECX(); + } + e.ReloadEDX(); + } +}; +EMITTER(DIV_F32, MATCH(I, F32<>, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + XEASSERT(!i.instr->flags); + e.vdivss(i.dest, i.src1, i.src2); + } +}; +EMITTER(DIV_F64, MATCH(I, F64<>, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + XEASSERT(!i.instr->flags); + e.vdivsd(i.dest, i.src1, i.src2); + } +}; +EMITTER(DIV_V128, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + XEASSERT(!i.instr->flags); + e.vdivps(i.dest, i.src1, i.src2); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_DIV, + DIV_I8, + DIV_I16, + DIV_I32, + DIV_I64, + DIV_F32, + DIV_F64, + DIV_V128); + + +// ============================================================================ +// OPCODE_MUL_ADD +// ============================================================================ +// d = 1 * 2 + 3 +// $0 = $1×$0 + $2 +// TODO(benvanik): use other forms (132/213/etc) to avoid register shuffling. +// dest could be src2 or src3 - need to ensure it's not before overwriting dest +// perhaps use other 132/213/etc +EMITTER(MUL_ADD_F32, MATCH(I, F32<>, F32<>, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.dest == i.src1) { + e.vfmadd213ss(i.dest, i.src2, i.src3); + } else { + if (i.dest != i.src2 && i.dest != i.src3) { + e.vmovss(i.dest, i.src1); + e.vfmadd213ss(i.dest, i.src2, i.src3); + } else { + e.vmovss(e.xmm0, i.src1); + e.vfmadd213ss(e.xmm0, i.src2, i.src3); + e.vmovss(i.dest, e.xmm0); + } + } + } +}; +EMITTER(MUL_ADD_F64, MATCH(I, F64<>, F64<>, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.dest == i.src1) { + e.vfmadd213sd(i.dest, i.src2, i.src3); + } else { + if (i.dest != i.src2 && i.dest != i.src3) { + e.vmovsd(i.dest, i.src1); + e.vfmadd213sd(i.dest, i.src2, i.src3); + } else { + e.vmovsd(e.xmm0, i.src1); + e.vfmadd213sd(e.xmm0, i.src2, i.src3); + e.vmovsd(i.dest, e.xmm0); + } + } + } +}; +EMITTER(MUL_ADD_V128, MATCH(I, V128<>, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.dest == i.src1) { + e.vfmadd213ps(i.dest, i.src2, i.src3); + } else { + if (i.dest != i.src2 && i.dest != i.src3) { + e.vmovdqa(i.dest, i.src1); + e.vfmadd213ps(i.dest, i.src2, i.src3); + } else { + e.vmovdqa(e.xmm0, i.src1); + e.vfmadd213ps(e.xmm0, i.src2, i.src3); + e.vmovdqa(i.dest, e.xmm0); + } + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_MUL_ADD, + MUL_ADD_F32, + MUL_ADD_F64, + MUL_ADD_V128); + + +// ============================================================================ +// OPCODE_MUL_SUB +// ============================================================================ +// d = 1 * 2 - 3 +// $0 = $2×$0 - $3 +// TODO(benvanik): use other forms (132/213/etc) to avoid register shuffling. +// dest could be src2 or src3 - need to ensure it's not before overwriting dest +// perhaps use other 132/213/etc +EMITTER(MUL_SUB_F32, MATCH(I, F32<>, F32<>, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.dest == i.src1) { + e.vfmsub213ss(i.dest, i.src2, i.src3); + } else { + if (i.dest != i.src2 && i.dest != i.src3) { + e.vmovss(i.dest, i.src1); + e.vfmsub213ss(i.dest, i.src2, i.src3); + } else { + e.vmovss(e.xmm0, i.src1); + e.vfmsub213ss(e.xmm0, i.src2, i.src3); + e.vmovss(i.dest, e.xmm0); + } + } + } +}; +EMITTER(MUL_SUB_F64, MATCH(I, F64<>, F64<>, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.dest == i.src1) { + e.vfmsub213sd(i.dest, i.src2, i.src3); + } else { + if (i.dest != i.src2 && i.dest != i.src3) { + e.vmovsd(i.dest, i.src1); + e.vfmsub213sd(i.dest, i.src2, i.src3); + } else { + e.vmovsd(e.xmm0, i.src1); + e.vfmsub213sd(e.xmm0, i.src2, i.src3); + e.vmovsd(i.dest, e.xmm0); + } + } + } +}; +EMITTER(MUL_SUB_V128, MATCH(I, V128<>, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.dest == i.src1) { + e.vfmsub213ps(i.dest, i.src2, i.src3); + } else { + if (i.dest != i.src2 && i.dest != i.src3) { + e.vmovdqa(i.dest, i.src1); + e.vfmsub213ps(i.dest, i.src2, i.src3); + } else { + e.vmovdqa(e.xmm0, i.src1); + e.vfmsub213ps(e.xmm0, i.src2, i.src3); + e.vmovdqa(i.dest, e.xmm0); + } + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_MUL_SUB, + MUL_SUB_F32, + MUL_SUB_F64, + MUL_SUB_V128); + + +// ============================================================================ +// OPCODE_NEG +// ============================================================================ +// TODO(benvanik): put dest/src1 together. +template +void EmitNegXX(X64Emitter& e, const ARGS& i) { + SEQ::EmitUnaryOp( + e, i, + [](X64Emitter& e, const REG& dest_src) { e.neg(dest_src); }); +} +EMITTER(NEG_I8, MATCH(I, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitNegXX(e, i); + } +}; +EMITTER(NEG_I16, MATCH(I, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitNegXX(e, i); + } +}; +EMITTER(NEG_I32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitNegXX(e, i); + } +}; +EMITTER(NEG_I64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitNegXX(e, i); + } +}; +EMITTER(NEG_F32, MATCH(I, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vpxor(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS)); + } +}; +EMITTER(NEG_F64, MATCH(I, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vpxor(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPD)); + } +}; +EMITTER(NEG_V128, MATCH(I, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + XEASSERT(!i.instr->flags); + e.vpxor(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS)); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_NEG, + NEG_I8, + NEG_I16, + NEG_I32, + NEG_I64, + NEG_F32, + NEG_F64, + NEG_V128); + + +// ============================================================================ +// OPCODE_ABS +// ============================================================================ +EMITTER(ABS_F32, MATCH(I, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMSignMaskPS)); + e.vpandn(i.dest, e.xmm0, i.src1); + } +}; +EMITTER(ABS_F64, MATCH(I, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMSignMaskPD)); + e.vpandn(i.dest, e.xmm0, i.src1); + } +}; +EMITTER(ABS_V128, MATCH(I, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vmovaps(e.xmm0, e.GetXmmConstPtr(XMMSignMaskPS)); + e.vpandn(i.dest, e.xmm0, i.src1); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_ABS, + ABS_F32, + ABS_F64, + ABS_V128); + + +// ============================================================================ +// OPCODE_SQRT +// ============================================================================ +EMITTER(SQRT_F32, MATCH(I, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vsqrtss(i.dest, i.src1); + } +}; +EMITTER(SQRT_F64, MATCH(I, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vsqrtsd(i.dest, i.src1); + } +}; +EMITTER(SQRT_V128, MATCH(I, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vsqrtps(i.dest, i.src1); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_SQRT, + SQRT_F32, + SQRT_F64, + SQRT_V128); + + +// ============================================================================ +// OPCODE_RSQRT +// ============================================================================ +EMITTER(RSQRT_F32, MATCH(I, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vrsqrtss(i.dest, i.src1); + } +}; +EMITTER(RSQRT_F64, MATCH(I, F64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vcvtsd2ss(i.dest, i.src1); + e.vrsqrtss(i.dest, i.dest); + e.vcvtss2sd(i.dest, i.dest); + } +}; +EMITTER(RSQRT_V128, MATCH(I, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vrsqrtps(i.dest, i.src1); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_RSQRT, + RSQRT_F32, + RSQRT_F64, + RSQRT_V128); + + +// ============================================================================ +// OPCODE_POW2 +// ============================================================================ + + +// ============================================================================ +// OPCODE_LOG2 +// ============================================================================ + + +// ============================================================================ +// OPCODE_DOT_PRODUCT_3 +// ============================================================================ +EMITTER(DOT_PRODUCT_3_V128, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // http://msdn.microsoft.com/en-us/library/bb514054(v=vs.90).aspx + // TODO(benvanik): verify ordering + // TODO(benvanik): apparently this is very slow - find alternative? + e.vdpps(i.dest, i.src1, i.src2, B01110001); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_DOT_PRODUCT_3, + DOT_PRODUCT_3_V128); + + +// ============================================================================ +// OPCODE_DOT_PRODUCT_4 +// ============================================================================ +EMITTER(DOT_PRODUCT_4_V128, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // http://msdn.microsoft.com/en-us/library/bb514054(v=vs.90).aspx + // TODO(benvanik): verify ordering + // TODO(benvanik): apparently this is very slow - find alternative? + e.vdpps(i.dest, i.src1, i.src2, B11110001); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_DOT_PRODUCT_4, + DOT_PRODUCT_4_V128); + + +// ============================================================================ +// OPCODE_AND +// ============================================================================ +// TODO(benvanik): put dest/src1|2 together. +template +void EmitAndXX(X64Emitter& e, const ARGS& i) { + SEQ::EmitCommutativeBinaryOp( + e, i, + [](X64Emitter& e, const REG& dest_src, const REG& src) { e.and(dest_src, src); }, + [](X64Emitter& e, const REG& dest_src, int32_t constant) { e.and(dest_src, constant); }); +} +EMITTER(AND_I8, MATCH(I, I8<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitAndXX(e, i); + } +}; +EMITTER(AND_I16, MATCH(I, I16<>, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitAndXX(e, i); + } +}; +EMITTER(AND_I32, MATCH(I, I32<>, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitAndXX(e, i); + } +}; +EMITTER(AND_I64, MATCH(I, I64<>, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitAndXX(e, i); + } +}; +EMITTER(AND_V128, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vpand(i.dest, i.src1, i.src2); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_AND, + AND_I8, + AND_I16, + AND_I32, + AND_I64, + AND_V128); + + +// ============================================================================ +// OPCODE_OR +// ============================================================================ +// TODO(benvanik): put dest/src1|2 together. +template +void EmitOrXX(X64Emitter& e, const ARGS& i) { + SEQ::EmitCommutativeBinaryOp( + e, i, + [](X64Emitter& e, const REG& dest_src, const REG& src) { e.or(dest_src, src); }, + [](X64Emitter& e, const REG& dest_src, int32_t constant) { e.or(dest_src, constant); }); +} +EMITTER(OR_I8, MATCH(I, I8<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitOrXX(e, i); + } +}; +EMITTER(OR_I16, MATCH(I, I16<>, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitOrXX(e, i); + } +}; +EMITTER(OR_I32, MATCH(I, I32<>, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitOrXX(e, i); + } +}; +EMITTER(OR_I64, MATCH(I, I64<>, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitOrXX(e, i); + } +}; +EMITTER(OR_V128, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vpor(i.dest, i.src1, i.src2); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_OR, + OR_I8, + OR_I16, + OR_I32, + OR_I64, + OR_V128); + + +// ============================================================================ +// OPCODE_XOR +// ============================================================================ +// TODO(benvanik): put dest/src1|2 together. +template +void EmitXorXX(X64Emitter& e, const ARGS& i) { + SEQ::EmitCommutativeBinaryOp( + e, i, + [](X64Emitter& e, const REG& dest_src, const REG& src) { e.xor(dest_src, src); }, + [](X64Emitter& e, const REG& dest_src, int32_t constant) { e.xor(dest_src, constant); }); +} +EMITTER(XOR_I8, MATCH(I, I8<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitXorXX(e, i); + } +}; +EMITTER(XOR_I16, MATCH(I, I16<>, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitXorXX(e, i); + } +}; +EMITTER(XOR_I32, MATCH(I, I32<>, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitXorXX(e, i); + } +}; +EMITTER(XOR_I64, MATCH(I, I64<>, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitXorXX(e, i); + } +}; +EMITTER(XOR_V128, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.vpxor(i.dest, i.src1, i.src2); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_XOR, + XOR_I8, + XOR_I16, + XOR_I32, + XOR_I64, + XOR_V128); + + +// ============================================================================ +// OPCODE_NOT +// ============================================================================ +// TODO(benvanik): put dest/src1 together. +template +void EmitNotXX(X64Emitter& e, const ARGS& i) { + SEQ::EmitUnaryOp( + e, i, + [](X64Emitter& e, const REG& dest_src) { e.not(dest_src); }); +} +EMITTER(NOT_I8, MATCH(I, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitNotXX(e, i); + } +}; +EMITTER(NOT_I16, MATCH(I, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitNotXX(e, i); + } +}; +EMITTER(NOT_I32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitNotXX(e, i); + } +}; +EMITTER(NOT_I64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitNotXX(e, i); + } +}; +EMITTER(NOT_V128, MATCH(I, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // dest = src ^ 0xFFFF... + e.vpxor(i.dest, i.src1, e.GetXmmConstPtr(XMMOne)); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_NOT, + NOT_I8, + NOT_I16, + NOT_I32, + NOT_I64, + NOT_V128); + + +// ============================================================================ +// OPCODE_SHL +// ============================================================================ +// TODO(benvanik): optimize common shifts. +template +void EmitShlXX(X64Emitter& e, const ARGS& i) { + SEQ::EmitAssociativeBinaryOp( + e, i, + [](X64Emitter& e, const REG& dest_src, const Reg8& src) { + if (dest_src.getBit() == 64) { + e.shlx(dest_src.cvt64(), dest_src.cvt64(), src.cvt64()); + } else { + e.shlx(dest_src.cvt32(), dest_src.cvt32(), src.cvt32()); + } + }, [](X64Emitter& e, const REG& dest_src, int8_t constant) { + e.shl(dest_src, constant); + }); +} +EMITTER(SHL_I8, MATCH(I, I8<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitShlXX(e, i); + } +}; +EMITTER(SHL_I16, MATCH(I, I16<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitShlXX(e, i); + } +}; +EMITTER(SHL_I32, MATCH(I, I32<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitShlXX(e, i); + } +}; +EMITTER(SHL_I64, MATCH(I, I64<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitShlXX(e, i); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_SHL, + SHL_I8, + SHL_I16, + SHL_I32, + SHL_I64); + + +// ============================================================================ +// OPCODE_SHR +// ============================================================================ +// TODO(benvanik): optimize common shifts. +template +void EmitShrXX(X64Emitter& e, const ARGS& i) { + SEQ::EmitAssociativeBinaryOp( + e, i, + [](X64Emitter& e, const REG& dest_src, const Reg8& src) { + if (dest_src.getBit() == 64) { + e.shrx(dest_src.cvt64(), dest_src.cvt64(), src.cvt64()); + } else if (dest_src.getBit() == 32) { + e.shrx(dest_src.cvt32(), dest_src.cvt32(), src.cvt32()); + } else { + e.movzx(dest_src.cvt32(), dest_src); + e.shrx(dest_src.cvt32(), dest_src.cvt32(), src.cvt32()); + } + }, [](X64Emitter& e, const REG& dest_src, int8_t constant) { + e.shr(dest_src, constant); + }); +} +EMITTER(SHR_I8, MATCH(I, I8<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitShrXX(e, i); + } +}; +EMITTER(SHR_I16, MATCH(I, I16<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitShrXX(e, i); + } +}; +EMITTER(SHR_I32, MATCH(I, I32<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitShrXX(e, i); + } +}; +EMITTER(SHR_I64, MATCH(I, I64<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitShrXX(e, i); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_SHR, + SHR_I8, + SHR_I16, + SHR_I32, + SHR_I64); + + +// ============================================================================ +// OPCODE_SHA +// ============================================================================ +// TODO(benvanik): optimize common shifts. +template +void EmitSarXX(X64Emitter& e, const ARGS& i) { + SEQ::EmitAssociativeBinaryOp( + e, i, + [](X64Emitter& e, const REG& dest_src, const Reg8& src) { + if (dest_src.getBit() == 64) { + e.sarx(dest_src.cvt64(), dest_src.cvt64(), src.cvt64()); + } else if (dest_src.getBit() == 32) { + e.sarx(dest_src.cvt32(), dest_src.cvt32(), src.cvt32()); + } else { + e.movsx(dest_src.cvt32(), dest_src); + e.sarx(dest_src.cvt32(), dest_src.cvt32(), src.cvt32()); + } + }, [](X64Emitter& e, const REG& dest_src, int8_t constant) { + e.sar(dest_src, constant); + }); +} +EMITTER(SHA_I8, MATCH(I, I8<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitSarXX(e, i); + } +}; +EMITTER(SHA_I16, MATCH(I, I16<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitSarXX(e, i); + } +}; +EMITTER(SHA_I32, MATCH(I, I32<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitSarXX(e, i); + } +}; +EMITTER(SHA_I64, MATCH(I, I64<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitSarXX(e, i); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_SHA, + SHA_I8, + SHA_I16, + SHA_I32, + SHA_I64); + + +// ============================================================================ +// OPCODE_VECTOR_SHL +// ============================================================================ +EMITTER(VECTOR_SHL_V128, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + switch (i.instr->flags) { + case INT32_TYPE: + // src shift mask may have values >31, and x86 sets to zero when + // that happens so we mask. + e.vandps(e.xmm0, i.src2, e.GetXmmConstPtr(XMMShiftMaskPS)); + e.vpsllvd(i.dest, i.src1, e.xmm0); + break; + default: + XEASSERTALWAYS(); + break; + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_VECTOR_SHL, + VECTOR_SHL_V128); + + +// ============================================================================ +// OPCODE_VECTOR_SHR +// ============================================================================ +EMITTER(VECTOR_SHR_V128, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + switch (i.instr->flags) { + case INT32_TYPE: + // src shift mask may have values >31, and x86 sets to zero when + // that happens so we mask. + e.vandps(e.xmm0, i.src2, e.GetXmmConstPtr(XMMShiftMaskPS)); + e.vpsrlvd(i.dest, i.src1, e.xmm0); + break; + default: + XEASSERTALWAYS(); + break; + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_VECTOR_SHR, + VECTOR_SHR_V128); + + +// ============================================================================ +// OPCODE_VECTOR_SHA +// ============================================================================ +EMITTER(VECTOR_SHA_V128, MATCH(I, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + switch (i.instr->flags) { + case INT32_TYPE: + // src shift mask may have values >31, and x86 sets to zero when + // that happens so we mask. + e.vandps(e.xmm0, i.src2, e.GetXmmConstPtr(XMMShiftMaskPS)); + e.vpsravd(i.dest, i.src1, e.xmm0); + break; + default: + XEASSERTALWAYS(); + break; + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_VECTOR_SHA, + VECTOR_SHA_V128); + + +// ============================================================================ +// OPCODE_ROTATE_LEFT +// ============================================================================ +// TODO(benvanik): put dest/src1 together, src2 in cl. +template +void EmitRotateLeftXX(X64Emitter& e, const ARGS& i) { + if (i.src2.is_constant) { + // Constant rotate. + if (i.dest != i.src1) { + if (i.src1.is_constant) { + e.mov(i.dest, i.src1.constant()); + } else { + e.mov(i.dest, i.src1); + } + } + e.rol(i.dest, i.src2.constant()); + } else { + // Variable rotate. + if (i.src2.reg().getIdx() != e.cl.getIdx()) { + e.mov(e.cl, i.src2); + } + if (i.dest != i.src1) { + if (i.src1.is_constant) { + e.mov(i.dest, i.src1.constant()); + } else { + e.mov(i.dest, i.src1); + } + } + e.rol(i.dest, e.cl); + e.ReloadECX(); + } +} +EMITTER(ROTATE_LEFT_I8, MATCH(I, I8<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitRotateLeftXX(e, i); + } +}; +EMITTER(ROTATE_LEFT_I16, MATCH(I, I16<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitRotateLeftXX(e, i); + } +}; +EMITTER(ROTATE_LEFT_I32, MATCH(I, I32<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitRotateLeftXX(e, i); + } +}; +EMITTER(ROTATE_LEFT_I64, MATCH(I, I64<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitRotateLeftXX(e, i); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_ROTATE_LEFT, + ROTATE_LEFT_I8, + ROTATE_LEFT_I16, + ROTATE_LEFT_I32, + ROTATE_LEFT_I64); + + +// ============================================================================ +// OPCODE_BYTE_SWAP +// ============================================================================ +// TODO(benvanik): put dest/src1 together. +EMITTER(BYTE_SWAP_I16, MATCH(I, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitUnaryOp( + e, i, + [](X64Emitter& e, const Reg16& dest_src) { e.ror(dest_src, 8); }); + } +}; +EMITTER(BYTE_SWAP_I32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitUnaryOp( + e, i, + [](X64Emitter& e, const Reg32& dest_src) { e.bswap(dest_src); }); + } +}; +EMITTER(BYTE_SWAP_I64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitUnaryOp( + e, i, + [](X64Emitter& e, const Reg64& dest_src) { e.bswap(dest_src); }); + } +}; +EMITTER(BYTE_SWAP_V128, MATCH(I, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // TODO(benvanik): find a way to do this without the memory load. + e.vpshufb(i.dest, i.src1, e.GetXmmConstPtr(XMMByteSwapMask)); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_BYTE_SWAP, + BYTE_SWAP_I16, + BYTE_SWAP_I32, + BYTE_SWAP_I64, + BYTE_SWAP_V128); + + +// ============================================================================ +// OPCODE_CNTLZ +// ============================================================================ +EMITTER(CNTLZ_I8, MATCH(I, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // No 8bit lzcnt, so do 16 and sub 8. + e.movzx(i.dest.reg().cvt16(), i.src1); + e.lzcnt(i.dest.reg().cvt16(), i.dest.reg().cvt16()); + e.sub(i.dest, 8); + } +}; +EMITTER(CNTLZ_I16, MATCH(I, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.lzcnt(i.dest.reg().cvt32(), i.src1); + } +}; +EMITTER(CNTLZ_I32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.lzcnt(i.dest.reg().cvt32(), i.src1); + } +}; +EMITTER(CNTLZ_I64, MATCH(I, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + e.lzcnt(i.dest.reg().cvt64(), i.src1); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_CNTLZ, + CNTLZ_I8, + CNTLZ_I16, + CNTLZ_I32, + CNTLZ_I64); + + +// ============================================================================ +// OPCODE_INSERT +// ============================================================================ + + +// ============================================================================ +// OPCODE_EXTRACT +// ============================================================================ +// TODO(benvanik): sequence extract/splat: +// v0.i32 = extract v0.v128, 0 +// v0.v128 = splat v0.i32 +// This can be a single broadcast. +EMITTER(EXTRACT_I8, MATCH(I, V128<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.src2.is_constant) { + e.vpextrb(i.dest, i.src1, i.src2.constant()); + } else { + XEASSERTALWAYS(); + } + } +}; +EMITTER(EXTRACT_I16, MATCH(I, V128<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.src2.is_constant) { + e.vpextrw(i.dest, i.src1, i.src2.constant()); + } else { + XEASSERTALWAYS(); + } + } +}; +EMITTER(EXTRACT_I32, MATCH(I, V128<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + static vec128_t extract_table_32[4] = { + vec128b( 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + vec128b( 7, 6, 5, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + vec128b(11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + vec128b(15, 14, 13, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + }; + if (i.src2.is_constant) { + e.vpextrd(i.dest, i.src1, i.src2.constant()); + } else { + // Get teh desired word in xmm0, then extract that. + // TODO(benvanik): find a better way, this sequence is terrible. + e.xor(e.rax, e.rax); + e.mov(e.al, i.src2); + e.and(e.al, 0x03); + e.shl(e.al, 4); + e.mov(e.rdx, reinterpret_cast(extract_table_32)); + e.vmovaps(e.xmm0, e.ptr[e.rdx + e.rax]); + e.vpshufb(e.xmm0, i.src1, e.xmm0); + e.vpextrd(i.dest, e.xmm0, 0); + e.ReloadEDX(); + } + } +}; +EMITTER(EXTRACT_F32, MATCH(I, V128<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.src2.is_constant) { + e.vextractps(i.dest, i.src1, i.src2.constant()); + } else { + XEASSERTALWAYS(); + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_EXTRACT, + EXTRACT_I8, + EXTRACT_I16, + EXTRACT_I32, + EXTRACT_F32); + + +// ============================================================================ +// OPCODE_SPLAT +// ============================================================================ +EMITTER(SPLAT_I8, MATCH(I, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.src1.is_constant) { + // TODO(benvanik): faster constant splats. + e.mov(e.al, i.src1.constant()); + e.vmovd(e.xmm0, e.eax); + e.vpbroadcastb(i.dest, e.xmm0); + } else { + e.vmovd(e.xmm0, i.src1.reg().cvt32()); + e.vpbroadcastb(i.dest, e.xmm0); + } + } +}; +EMITTER(SPLAT_I16, MATCH(I, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.src1.is_constant) { + // TODO(benvanik): faster constant splats. + e.mov(e.ax, i.src1.constant()); + e.vmovd(e.xmm0, e.eax); + e.vpbroadcastw(i.dest, e.xmm0); + } else { + e.vmovd(e.xmm0, i.src1.reg().cvt32()); + e.vpbroadcastw(i.dest, e.xmm0); + } + } +}; +EMITTER(SPLAT_I32, MATCH(I, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.src1.is_constant) { + // TODO(benvanik): faster constant splats. + e.mov(e.eax, i.src1.constant()); + e.vmovd(e.xmm0, e.eax); + e.vpbroadcastd(i.dest, e.xmm0); + } else { + e.vmovd(e.xmm0, i.src1); + e.vpbroadcastd(i.dest, e.xmm0); + } + } +}; +EMITTER(SPLAT_F32, MATCH(I, F32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + if (i.src1.is_constant) { + // TODO(benvanik): faster constant splats. + e.mov(e.eax, i.src1.value->constant.i32); + e.vmovd(e.xmm0, e.eax); + e.vbroadcastss(i.dest, e.xmm0); + } else { + e.vbroadcastss(i.dest, i.src1); + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_SPLAT, + SPLAT_I8, + SPLAT_I16, + SPLAT_I32, + SPLAT_F32); + + +// ============================================================================ +// OPCODE_PERMUTE +// ============================================================================ +EMITTER(PERMUTE_I32, MATCH(I, I32<>, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // Permute words between src2 and src3. + // TODO(benvanik): check src3 for zero. if 0, we can use pshufb. + if (i.src1.is_constant) { + uint32_t control = i.src1.constant(); + // Shuffle things into the right places in dest & xmm0, + // then we blend them together. + uint32_t src_control = + (((control >> 24) & 0x3) << 0) | + (((control >> 16) & 0x3) << 2) | + (((control >> 8) & 0x3) << 4) | + (((control >> 0) & 0x3) << 6); + uint32_t blend_control = + (((control >> 26) & 0x1) << 0) | + (((control >> 18) & 0x1) << 1) | + (((control >> 10) & 0x1) << 2) | + (((control >> 2) & 0x1) << 3); + if (i.dest != i.src3) { + e.vpshufd(i.dest, i.src2, src_control); + e.vpshufd(e.xmm0, i.src3, src_control); + e.vpblendd(i.dest, e.xmm0, blend_control); + } else { + e.vmovaps(e.xmm0, i.src3); + e.vpshufd(i.dest, i.src2, src_control); + e.vpshufd(e.xmm0, e.xmm0, src_control); + e.vpblendd(i.dest, e.xmm0, blend_control); + } + } else { + // Permute by non-constant. + XEASSERTALWAYS(); + } + } +}; +EMITTER(PERMUTE_V128, MATCH(I, V128<>, V128<>, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + // TODO(benvanik): find out how to do this with only one temp register! + // Permute bytes between src2 and src3. + if (i.src3.value->IsConstantZero()) { + // Permuting with src2/zero, so just shuffle/mask. + if (i.src2.value->IsConstantZero()) { + // src2 & src3 are zero, so result will always be zero. + e.vpxor(i.dest, i.dest); + } else { + // Control mask needs to be shuffled. + e.vpshufb(e.xmm0, i.src1, e.GetXmmConstPtr(XMMByteSwapMask)); + if (i.src2.is_constant) { + e.LoadConstantXmm(i.dest, i.src2.constant()); + e.vpshufb(i.dest, i.dest, e.xmm0); + } else { + e.vpshufb(i.dest, i.src2, e.xmm0); + } + // Build a mask with values in src2 having 0 and values in src3 having 1. + e.vpcmpgtb(e.xmm0, e.xmm0, e.GetXmmConstPtr(XMMPermuteControl15)); + e.vpandn(i.dest, e.xmm0, i.dest); + } + } else { + // General permute. + // Control mask needs to be shuffled. + if (i.src1.is_constant) { + e.LoadConstantXmm(e.xmm2, i.src1.constant()); + e.vpshufb(e.xmm2, e.xmm2, e.GetXmmConstPtr(XMMByteSwapMask)); + } else { + e.vpshufb(e.xmm2, i.src1, e.GetXmmConstPtr(XMMByteSwapMask)); + } + // Build a mask with values in src2 having 0 and values in src3 having 1. + e.vpcmpgtb(i.dest, e.xmm2, e.GetXmmConstPtr(XMMPermuteControl15)); + Xmm src2_shuf = e.xmm0; + if (i.src2.value->IsConstantZero()) { + e.vpxor(src2_shuf, src2_shuf); + } else if (i.src2.is_constant) { + e.LoadConstantXmm(src2_shuf, i.src2.constant()); + e.vpshufb(src2_shuf, src2_shuf, e.xmm2); + } else { + e.vpshufb(src2_shuf, i.src2, e.xmm2); + } + Xmm src3_shuf = e.xmm1; + if (i.src3.value->IsConstantZero()) { + e.vpxor(src3_shuf, src3_shuf); + } else if (i.src3.is_constant) { + e.LoadConstantXmm(src3_shuf, i.src3.constant()); + e.vpshufb(src3_shuf, src3_shuf, e.xmm2); + } else { + e.vpshufb(src3_shuf, i.src3, e.xmm2); + } + e.vpblendvb(i.dest, src2_shuf, src3_shuf, i.dest); + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_PERMUTE, + PERMUTE_I32, + PERMUTE_V128); + + +// ============================================================================ +// OPCODE_SWIZZLE +// ============================================================================ +EMITTER(SWIZZLE, MATCH(I, V128<>, OffsetOp>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + auto element_type = i.instr->flags; + if (element_type == INT8_TYPE) { + XEASSERTALWAYS(); + } else if (element_type == INT16_TYPE) { + XEASSERTALWAYS(); + } else if (element_type == INT32_TYPE || element_type == FLOAT32_TYPE) { + uint8_t swizzle_mask = static_cast(i.src2.value); + swizzle_mask = + (((swizzle_mask >> 6) & 0x3) << 0) | + (((swizzle_mask >> 4) & 0x3) << 2) | + (((swizzle_mask >> 2) & 0x3) << 4) | + (((swizzle_mask >> 0) & 0x3) << 6); + e.vpshufd(i.dest, i.src1, swizzle_mask); + } else if (element_type == INT64_TYPE || element_type == FLOAT64_TYPE) { + XEASSERTALWAYS(); + } else { + XEASSERTALWAYS(); + } + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_SWIZZLE, + SWIZZLE); + + +// ============================================================================ +// OPCODE_PACK +// ============================================================================ +EMITTER(PACK, MATCH(I, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + switch (i.instr->flags) { + case PACK_TYPE_D3DCOLOR: + EmitD3DCOLOR(e, i); + break; + case PACK_TYPE_FLOAT16_2: + EmitFLOAT16_2(e, i); + break; + case PACK_TYPE_FLOAT16_4: + EmitFLOAT16_4(e, i); + break; + case PACK_TYPE_SHORT_2: + EmitSHORT_2(e, i); + break; + case PACK_TYPE_S8_IN_16_LO: + EmitS8_IN_16_LO(e, i); + break; + case PACK_TYPE_S8_IN_16_HI: + EmitS8_IN_16_HI(e, i); + break; + case PACK_TYPE_S16_IN_32_LO: + EmitS16_IN_32_LO(e, i); + break; + case PACK_TYPE_S16_IN_32_HI: + EmitS16_IN_32_HI(e, i); + break; + default: XEASSERTALWAYS(); break; + } + } + static void EmitD3DCOLOR(X64Emitter& e, const EmitArgType& i) { + XEASSERTALWAYS(); + } + static void EmitFLOAT16_2(X64Emitter& e, const EmitArgType& i) { + XEASSERTALWAYS(); + } + static void EmitFLOAT16_4(X64Emitter& e, const EmitArgType& i) { + XEASSERTALWAYS(); + } + static void EmitSHORT_2(X64Emitter& e, const EmitArgType& i) { + XEASSERTALWAYS(); + } + static void EmitS8_IN_16_LO(X64Emitter& e, const EmitArgType& i) { + XEASSERTALWAYS(); + } + static void EmitS8_IN_16_HI(X64Emitter& e, const EmitArgType& i) { + XEASSERTALWAYS(); + } + static void EmitS16_IN_32_LO(X64Emitter& e, const EmitArgType& i) { + XEASSERTALWAYS(); + } + static void EmitS16_IN_32_HI(X64Emitter& e, const EmitArgType& i) { + XEASSERTALWAYS(); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_PACK, + PACK); + + +// ============================================================================ +// OPCODE_UNPACK +// ============================================================================ +EMITTER(UNPACK, MATCH(I, V128<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + switch (i.instr->flags) { + case PACK_TYPE_D3DCOLOR: + EmitD3DCOLOR(e, i); + break; + case PACK_TYPE_FLOAT16_2: + EmitFLOAT16_2(e, i); + break; + case PACK_TYPE_FLOAT16_4: + EmitFLOAT16_4(e, i); + break; + case PACK_TYPE_SHORT_2: + EmitSHORT_2(e, i); + break; + case PACK_TYPE_S8_IN_16_LO: + EmitS8_IN_16_LO(e, i); + break; + case PACK_TYPE_S8_IN_16_HI: + EmitS8_IN_16_HI(e, i); + break; + case PACK_TYPE_S16_IN_32_LO: + EmitS16_IN_32_LO(e, i); + break; + case PACK_TYPE_S16_IN_32_HI: + EmitS16_IN_32_HI(e, i); + break; + default: XEASSERTALWAYS(); break; + } + } + static void EmitD3DCOLOR(X64Emitter& e, const EmitArgType& i) { + // ARGB (WXYZ) -> RGBA (XYZW) + // XMLoadColor + // int32_t src = (int32_t)src1.iw; + // dest.f4[0] = (float)((src >> 16) & 0xFF) * (1.0f / 255.0f); + // dest.f4[1] = (float)((src >> 8) & 0xFF) * (1.0f / 255.0f); + // dest.f4[2] = (float)(src & 0xFF) * (1.0f / 255.0f); + // dest.f4[3] = (float)((src >> 24) & 0xFF) * (1.0f / 255.0f); + + // src = ZZYYXXWW + // unpack to 000000ZZ,000000YY,000000XX,000000WW + e.vpshufb(i.dest, i.src1, e.GetXmmConstPtr(XMMUnpackD3DCOLOR)); + // mult by 1/255 + e.vmulps(i.dest, e.GetXmmConstPtr(XMMOneOver255)); + } + static void Unpack_FLOAT16_2(void* raw_context, __m128& v) { + uint32_t src = v.m128_i32[3]; + v.m128_f32[0] = DirectX::PackedVector::XMConvertHalfToFloat((uint16_t)src); + v.m128_f32[1] = DirectX::PackedVector::XMConvertHalfToFloat((uint16_t)(src >> 16)); + v.m128_f32[2] = 0.0f; + v.m128_f32[3] = 1.0f; + } + static void EmitFLOAT16_2(X64Emitter& e, const EmitArgType& i) { + // 1 bit sign, 5 bit exponent, 10 bit mantissa + // D3D10 half float format + // TODO(benvanik): http://blogs.msdn.com/b/chuckw/archive/2012/09/11/directxmath-f16c-and-fma.aspx + // Use _mm_cvtph_ps -- requires very modern processors (SSE5+) + // Unpacking half floats: http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ + // Packing half floats: https://gist.github.com/rygorous/2156668 + // Load source, move from tight pack of X16Y16.... to X16...Y16... + // Also zero out the high end. + // TODO(benvanik): special case constant unpacks that just get 0/1/etc. + + // sx = src.iw >> 16; + // sy = src.iw & 0xFFFF; + // dest = { XMConvertHalfToFloat(sx), + // XMConvertHalfToFloat(sy), + // 0.0, + // 1.0 }; + auto addr = e.StashXmm(i.src1); + e.lea(e.rdx, addr); + e.CallNative(Unpack_FLOAT16_2); + e.vmovaps(i.dest, addr); + } + static void EmitFLOAT16_4(X64Emitter& e, const EmitArgType& i) { + // Could be shared with FLOAT16_2. + XEASSERTALWAYS(); + } + static void EmitSHORT_2(X64Emitter& e, const EmitArgType& i) { + // (VD.x) = 3.0 + (VB.x>>16)*2^-22 + // (VD.y) = 3.0 + (VB.x)*2^-22 + // (VD.z) = 0.0 + // (VD.w) = 1.0 + + // XMLoadShortN2 plus 3,3,0,3 (for some reason) + // src is (xx,xx,xx,VALUE) + // (VALUE,VALUE,VALUE,VALUE) + if (i.src1.is_constant) { + if (i.src1.value->IsConstantZero()) { + e.vpxor(i.dest, i.dest); + } else { + // TODO(benvanik): check other common constants. + e.LoadConstantXmm(i.dest, i.src1.constant()); + e.vbroadcastss(i.dest, i.src1); + } + } else { + e.vbroadcastss(i.dest, i.src1); + } + // (VALUE&0xFFFF,VALUE&0xFFFF0000,0,0) + e.vandps(i.dest, e.GetXmmConstPtr(XMMMaskX16Y16)); + // Sign extend. + e.vxorps(i.dest, e.GetXmmConstPtr(XMMFlipX16Y16)); + // Convert int->float. + e.cvtpi2ps(i.dest, e.StashXmm(i.dest)); + // 0x8000 to undo sign. + e.vaddps(i.dest, e.GetXmmConstPtr(XMMFixX16Y16)); + // Normalize. + e.vmulps(i.dest, e.GetXmmConstPtr(XMMNormalizeX16Y16)); + // Clamp. + e.vmaxps(i.dest, e.GetXmmConstPtr(XMMNegativeOne)); + // Add 3,3,0,1. + e.vaddps(i.dest, e.GetXmmConstPtr(XMM3301)); + } + static void EmitS8_IN_16_LO(X64Emitter& e, const EmitArgType& i) { + XEASSERTALWAYS(); + } + static void EmitS8_IN_16_HI(X64Emitter& e, const EmitArgType& i) { + XEASSERTALWAYS(); + } + static void EmitS16_IN_32_LO(X64Emitter& e, const EmitArgType& i) { + XEASSERTALWAYS(); + } + static void EmitS16_IN_32_HI(X64Emitter& e, const EmitArgType& i) { + XEASSERTALWAYS(); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_UNPACK, + UNPACK); + + +// ============================================================================ +// OPCODE_COMPARE_EXCHANGE +// ============================================================================ + + +// ============================================================================ +// OPCODE_ATOMIC_EXCHANGE +// ============================================================================ +// Note that the address we use here is a real, host address! +// This is weird, and should be fixed. +template +void EmitAtomicExchangeXX(X64Emitter& e, const ARGS& i) { + if (i.dest == i.src1) { + e.mov(e.rax, i.src1); + if (i.dest != i.src2) { + if (i.src2.is_constant) { + e.mov(i.dest, i.src2.constant()); + } else { + e.mov(i.dest, i.src2); + } + } + e.lock(); + e.xchg(e.dword[e.rax], i.dest); + } else { + if (i.dest != i.src2) { + if (i.src2.is_constant) { + e.mov(i.dest, i.src2.constant()); + } else { + e.mov(i.dest, i.src2); + } + } + e.lock(); + e.xchg(e.dword[i.src1.reg()], i.dest); + } +} +EMITTER(ATOMIC_EXCHANGE_I8, MATCH(I, I64<>, I8<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitAtomicExchangeXX(e, i); + } +}; +EMITTER(ATOMIC_EXCHANGE_I16, MATCH(I, I64<>, I16<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitAtomicExchangeXX(e, i); + } +}; +EMITTER(ATOMIC_EXCHANGE_I32, MATCH(I, I64<>, I32<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitAtomicExchangeXX(e, i); + } +}; +EMITTER(ATOMIC_EXCHANGE_I64, MATCH(I, I64<>, I64<>>)) { + static void Emit(X64Emitter& e, const EmitArgType& i) { + EmitAtomicExchangeXX(e, i); + } +}; +EMITTER_OPCODE_TABLE( + OPCODE_ATOMIC_EXCHANGE, + ATOMIC_EXCHANGE_I8, + ATOMIC_EXCHANGE_I16, + ATOMIC_EXCHANGE_I32, + ATOMIC_EXCHANGE_I64); + + +// ============================================================================ +// OPCODE_ATOMIC_ADD +// ============================================================================ + + +// ============================================================================ +// OPCODE_ATOMIC_SUB +// ============================================================================ + + + + +//SEQUENCE(ADD_ADD_BRANCH, MATCH( +// I, I32<>, I32C<>>, +// I, I32, I32C<>>, +// I)) { +// static void Emit(X64Emitter& e, const EmitArgs& _) { +// } +//}; + + + +void alloy::backend::x64::RegisterSequences() { + #define REGISTER_EMITTER_OPCODE_TABLE(opcode) Register_##opcode() + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMMENT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_NOP); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_SOURCE_OFFSET); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DEBUG_BREAK); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DEBUG_BREAK_TRUE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_TRAP); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_TRAP_TRUE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_CALL); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_CALL_TRUE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_CALL_INDIRECT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_CALL_INDIRECT_TRUE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_CALL_EXTERN); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_RETURN); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_RETURN_TRUE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_SET_RETURN_ADDRESS); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_BRANCH); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_BRANCH_TRUE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_BRANCH_FALSE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ASSIGN); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_CAST); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ZERO_EXTEND); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_SIGN_EXTEND); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_TRUNCATE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_CONVERT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ROUND); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_CONVERT_I2F); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_CONVERT_F2I); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_LOAD_VECTOR_SHL); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_LOAD_VECTOR_SHR); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_LOAD_CLOCK); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_LOAD_LOCAL); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_STORE_LOCAL); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_LOAD_CONTEXT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_STORE_CONTEXT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_LOAD); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_STORE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_PREFETCH); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MAX); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MIN); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_SELECT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_IS_TRUE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_IS_FALSE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_EQ); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_NE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SLT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SLE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SGT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_SGE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_ULT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_ULE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_UGE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_CARRY); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_OVERFLOW); + //REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DID_SATURATE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_COMPARE_EQ); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_COMPARE_SGT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_COMPARE_SGE); + //REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_COMPARE_UGT); + //REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_COMPARE_UGE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ADD); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ADD_CARRY); + //REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_ADD); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_SUB); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MUL); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MUL_HI); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DIV); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MUL_ADD); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_MUL_SUB); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_NEG); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ABS); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_SQRT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_RSQRT); + //REGISTER_EMITTER_OPCODE_TABLE(OPCODE_POW2); + //REGISTER_EMITTER_OPCODE_TABLE(OPCODE_LOG2); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DOT_PRODUCT_3); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_DOT_PRODUCT_4); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_AND); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_OR); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_XOR); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_NOT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_SHL); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_SHR); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_SHA); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHL); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHR); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_VECTOR_SHA); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ROTATE_LEFT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_BYTE_SWAP); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_CNTLZ); + //REGISTER_EMITTER_OPCODE_TABLE(OPCODE_INSERT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_EXTRACT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_SPLAT); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_PERMUTE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_SWIZZLE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_PACK); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_UNPACK); + //REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMPARE_EXCHANGE); + REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ATOMIC_EXCHANGE); + //REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ATOMIC_ADD); + //REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ATOMIC_SUB); +} + +bool alloy::backend::x64::SelectSequence(X64Emitter& e, const Instr* i, const Instr** new_tail) { + const InstrKey key(i); + const auto its = sequence_table.equal_range(key); + for (auto it = its.first; it != its.second; ++it) { + if (it->second(e, i, new_tail)) { + return true; + } + } + XELOGE("No sequence match for variant %s", i->opcode->name); + return false; +} diff --git a/src/alloy/backend/x64/lowering/lowering_sequences.h b/src/alloy/backend/x64/x64_sequences.h similarity index 59% rename from src/alloy/backend/x64/lowering/lowering_sequences.h rename to src/alloy/backend/x64/x64_sequences.h index 634d52f47..5a77e9987 100644 --- a/src/alloy/backend/x64/lowering/lowering_sequences.h +++ b/src/alloy/backend/x64/x64_sequences.h @@ -2,32 +2,32 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2013 Ben Vanik. All rights reserved. * + * Copyright 2014 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ -#ifndef ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_SEQUENCES_H_ -#define ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_SEQUENCES_H_ +#ifndef ALLOY_BACKEND_X64_X64_SEQUENCES_H_ +#define ALLOY_BACKEND_X64_X64_SEQUENCES_H_ #include -#include +XEDECLARECLASS2(alloy, hir, Instr); namespace alloy { namespace backend { namespace x64 { -namespace lowering { -class LoweringTable; - -void RegisterSequences(LoweringTable* table); +class X64Emitter; + + +void RegisterSequences(); +bool SelectSequence(X64Emitter& e, const hir::Instr* i, const hir::Instr** new_tail); -} // namespace lowering } // namespace x64 } // namespace backend } // namespace alloy -#endif // ALLOY_BACKEND_X64_X64_LOWERING_LOWERING_SEQUENCES_H_ +#endif // ALLOY_BACKEND_X64_X64_SEQUENCES_H_ diff --git a/src/alloy/backend/x64/lowering/tracers.cc b/src/alloy/backend/x64/x64_tracers.cc similarity index 96% rename from src/alloy/backend/x64/lowering/tracers.cc rename to src/alloy/backend/x64/x64_tracers.cc index f1c18f882..0ebb699cb 100644 --- a/src/alloy/backend/x64/lowering/tracers.cc +++ b/src/alloy/backend/x64/x64_tracers.cc @@ -7,7 +7,7 @@ ****************************************************************************** */ -#include +#include #include #include @@ -15,19 +15,14 @@ using namespace alloy; using namespace alloy::backend::x64; -using namespace alloy::backend::x64::lowering; using namespace alloy::runtime; namespace alloy { namespace backend { namespace x64 { -namespace lowering { - -#define IFLUSH() -#define IPRINT -#define DFLUSH() -#define DPRINT +#define ITRACE 0 +#define DTRACE 0 #define TARGET_THREAD 1 @@ -36,6 +31,16 @@ namespace lowering { #define DFLUSH() fflush(stdout) #define DPRINT DFLUSH(); if (thread_state->thread_id() == TARGET_THREAD) printf +uint32_t GetTracingMode() { + uint32_t mode = 0; +#if ITRACE + mode |= TRACING_INSTR; +#endif // ITRACE +#if DTRACE + mode |= TRACING_DATA; +#endif // DTRACE + return mode; +} void TraceString(void* raw_context, const char* str) { auto thread_state = *((ThreadState**)raw_context); @@ -190,7 +195,6 @@ void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value) { } -} // namespace lowering } // namespace x64 } // namespace backend } // namespace alloy diff --git a/src/alloy/backend/x64/lowering/tracers.h b/src/alloy/backend/x64/x64_tracers.h similarity index 89% rename from src/alloy/backend/x64/lowering/tracers.h rename to src/alloy/backend/x64/x64_tracers.h index 7201b4f25..64c788ff3 100644 --- a/src/alloy/backend/x64/lowering/tracers.h +++ b/src/alloy/backend/x64/x64_tracers.h @@ -7,8 +7,8 @@ ****************************************************************************** */ -#ifndef ALLOY_BACKEND_X64_X64_LOWERING_TRACERS_H_ -#define ALLOY_BACKEND_X64_X64_LOWERING_TRACERS_H_ +#ifndef ALLOY_BACKEND_X64_X64_TRACERS_H_ +#define ALLOY_BACKEND_X64_X64_TRACERS_H_ #include @@ -33,7 +33,15 @@ namespace alloy { namespace backend { namespace x64 { class X64Emitter; -namespace lowering { + +enum TracingMode { + TRACING_INSTR = (1 << 1), + TRACING_DATA = (1 << 2), +}; + +uint32_t GetTracingMode(); +inline bool IsTracingInstr() { return (GetTracingMode() & TRACING_INSTR) != 0; } +inline bool IsTracingData() { return (GetTracingMode() & TRACING_DATA) != 0; } void TraceString(void* raw_context, const char* str); @@ -69,10 +77,9 @@ void TraceMemoryStoreF32(void* raw_context, uint64_t address, __m128 value); void TraceMemoryStoreF64(void* raw_context, uint64_t address, __m128 value); void TraceMemoryStoreV128(void* raw_context, uint64_t address, __m128 value); -} // namespace lowering } // namespace x64 } // namespace backend } // namespace alloy -#endif // ALLOY_BACKEND_X64_X64_LOWERING_TRACERS_H_ +#endif // ALLOY_BACKEND_X64_X64_TRACERS_H_ diff --git a/src/alloy/compiler/passes/constant_propagation_pass.cc b/src/alloy/compiler/passes/constant_propagation_pass.cc index 03a514a94..a481d18af 100644 --- a/src/alloy/compiler/passes/constant_propagation_pass.cc +++ b/src/alloy/compiler/passes/constant_propagation_pass.cc @@ -368,6 +368,13 @@ int ConstantPropagationPass::Run(HIRBuilder* builder) { i->Remove(); } break; + case OPCODE_CNTLZ: + if (i->src1.value->IsConstant()) { + v->set_zero(v->type); + v->CountLeadingZeros(i->src1.value->constant); + i->Remove(); + } + break; // TODO(benvanik): INSERT/EXTRACT // TODO(benvanik): SPLAT/PERMUTE/SWIZZLE case OPCODE_SPLAT: diff --git a/src/alloy/compiler/passes/context_promotion_pass.cc b/src/alloy/compiler/passes/context_promotion_pass.cc index a5123486b..c880c4f0e 100644 --- a/src/alloy/compiler/passes/context_promotion_pass.cc +++ b/src/alloy/compiler/passes/context_promotion_pass.cc @@ -9,6 +9,8 @@ #include +#include + #include #include @@ -20,6 +22,10 @@ using namespace alloy::hir; using namespace alloy::runtime; +DEFINE_bool(store_all_context_values, false, + "Don't strip dead context stores to aid in debugging."); + + ContextPromotionPass::ContextPromotionPass() : context_values_size_(0), context_values_(0), CompilerPass() { @@ -69,10 +75,12 @@ int ContextPromotionPass::Run(HIRBuilder* builder) { } // Remove all dead stores. - block = builder->first_block(); - while (block) { - RemoveDeadStoresBlock(block); - block = block->next; + if (!FLAGS_store_all_context_values) { + block = builder->first_block(); + while (block) { + RemoveDeadStoresBlock(block); + block = block->next; + } } return 0; diff --git a/src/alloy/compiler/passes/control_flow_analysis_pass.cc b/src/alloy/compiler/passes/control_flow_analysis_pass.cc index bff651fe2..9c1abf118 100644 --- a/src/alloy/compiler/passes/control_flow_analysis_pass.cc +++ b/src/alloy/compiler/passes/control_flow_analysis_pass.cc @@ -13,12 +13,6 @@ #include #include -#pragma warning(push) -#pragma warning(disable : 4244) -#pragma warning(disable : 4267) -#include -#pragma warning(pop) - using namespace alloy; using namespace alloy::backend; using namespace alloy::compiler; diff --git a/src/alloy/compiler/passes/data_flow_analysis_pass.cc b/src/alloy/compiler/passes/data_flow_analysis_pass.cc index b4e1ea644..2a44f076d 100644 --- a/src/alloy/compiler/passes/data_flow_analysis_pass.cc +++ b/src/alloy/compiler/passes/data_flow_analysis_pass.cc @@ -36,8 +36,6 @@ DataFlowAnalysisPass::~DataFlowAnalysisPass() { } int DataFlowAnalysisPass::Run(HIRBuilder* builder) { - auto arena = builder->arena(); - // Linearize blocks so that we can detect cycles and propagate dependencies. uint32_t block_count = LinearizeBlocks(builder); diff --git a/src/alloy/compiler/passes/register_allocation_pass.cc b/src/alloy/compiler/passes/register_allocation_pass.cc index 20b4b021f..a89e1415c 100644 --- a/src/alloy/compiler/passes/register_allocation_pass.cc +++ b/src/alloy/compiler/passes/register_allocation_pass.cc @@ -9,6 +9,8 @@ #include +#include + using namespace alloy; using namespace alloy::backend; using namespace alloy::compiler; @@ -16,180 +18,135 @@ using namespace alloy::compiler::passes; using namespace alloy::hir; -struct RegisterAllocationPass::Interval { - uint32_t start_ordinal; - uint32_t end_ordinal; - Value* value; - RegisterFreeUntilSet* free_until_set; - // TODO(benvanik): reduce to offsets in arena? - struct Interval* next; - struct Interval* prev; +#define ASSERT_NO_CYCLES 0 - void AddToList(Interval** list_head) { - auto list_next = *list_head; - this->next = list_next; - if (list_next) { - list_next->prev = this; - } - *list_head = this; - } - - void InsertIntoList(Interval** list_head) { - auto it = *list_head; - while (it) { - if (it->start_ordinal > this->start_ordinal) { - // Went too far. Insert before this interval. - this->prev = it->prev; - this->next = it; - if (it->prev) { - it->prev->next = this; - } else { - *list_head = this; - } - it->prev = this; - return; - } - if (!it->next) { - // None found, add at tail. - it->next = this; - this->prev = it; - return; - } - it = it->next; - } - } - - void RemoveFromList(Interval** list_head) { - if (this->next) { - this->next->prev = this->prev; - } - if (this->prev) { - this->prev->next = this->next; - } else { - *list_head = this->next; - } - this->next = this->prev = NULL; - } -}; - -struct RegisterAllocationPass::Intervals { - Interval* unhandled; - Interval* active; - Interval* handled; -}; RegisterAllocationPass::RegisterAllocationPass( const MachineInfo* machine_info) : machine_info_(machine_info), CompilerPass() { - // Initialize register sets. The values of these will be - // cleared before use, so just the structure is required. + // Initialize register sets. + // TODO(benvanik): rewrite in a way that makes sense - this is terrible. auto mi_sets = machine_info->register_sets; - xe_zero_struct(&free_until_sets_, sizeof(free_until_sets_)); + xe_zero_struct(&usage_sets_, sizeof(usage_sets_)); uint32_t n = 0; while (mi_sets[n].count) { auto& mi_set = mi_sets[n]; - auto free_until_set = new RegisterFreeUntilSet(); - free_until_sets_.all_sets[n] = free_until_set; - free_until_set->count = mi_set.count; - free_until_set->set = &mi_set; + auto usage_set = new RegisterSetUsage(); + usage_sets_.all_sets[n] = usage_set; + usage_set->count = mi_set.count; + usage_set->set = &mi_set; if (mi_set.types & MachineInfo::RegisterSet::INT_TYPES) { - free_until_sets_.int_set = free_until_set; + usage_sets_.int_set = usage_set; } if (mi_set.types & MachineInfo::RegisterSet::FLOAT_TYPES) { - free_until_sets_.float_set = free_until_set; + usage_sets_.float_set = usage_set; } if (mi_set.types & MachineInfo::RegisterSet::VEC_TYPES) { - free_until_sets_.vec_set = free_until_set; + usage_sets_.vec_set = usage_set; } n++; } } RegisterAllocationPass::~RegisterAllocationPass() { - for (size_t n = 0; n < XECOUNT(free_until_sets_.all_sets); n++) { - if (!free_until_sets_.all_sets[n]) { + for (size_t n = 0; n < XECOUNT(usage_sets_.all_sets); n++) { + if (!usage_sets_.all_sets[n]) { break; } - delete free_until_sets_.all_sets[n]; + delete usage_sets_.all_sets[n]; } } int RegisterAllocationPass::Run(HIRBuilder* builder) { - // A (probably broken) implementation of a linear scan register allocator - // that operates directly on SSA form: - // http://www.christianwimmer.at/Publications/Wimmer10a/Wimmer10a.pdf - // - // Requirements: - // - SSA form (single definition for variables) - // - block should be in linear order: - // - dominators *should* come before (a->b->c) - // - loop block sequences *should not* have intervening non-loop blocks + // Simple per-block allocator that operates on SSA form. + // Registers do not move across blocks, though this could be + // optimized with some intra-block analysis (dominators/etc). + // Really, it'd just be nice to have someone who knew what they + // were doing lower SSA and do this right. - auto arena = scratch_arena(); - - // Renumber everything. uint32_t block_ordinal = 0; uint32_t instr_ordinal = 0; auto block = builder->first_block(); while (block) { // Sequential block ordinals. block->ordinal = block_ordinal++; + + // Reset all state. + PrepareBlockState(); + + // Renumber all instructions in the block. This is required so that + // we can sort the usage pointers below. auto instr = block->instr_head; while (instr) { // Sequential global instruction ordinals. instr->ordinal = instr_ordinal++; instr = instr->next; } - block = block->next; - } - // Compute all liveness ranges by walking forward through all - // blocks/instructions and checking the last use of each value. This lets - // us know the exact order in (block#,instr#) form, which is then used to - // setup the range. - // TODO(benvanik): ideally we would have a list of all values and not have - // to keep walking instructions over and over. - Interval* prev_interval = NULL; - Interval* head_interval = NULL; - block = builder->first_block(); - while (block) { - auto instr = block->instr_head; + instr = block->instr_head; while (instr) { - // Compute last-use for the dest value. - // Since we know all values of importance must be defined, we can avoid - // having to check every value and just look at dest. const OpcodeInfo* info = instr->opcode; - if (GET_OPCODE_SIG_TYPE_DEST(info->signature) == OPCODE_SIG_TYPE_V) { - auto v = instr->dest; - if (!v->last_use) { - ComputeLastUse(v); - } + uint32_t signature = info->signature; - // Add interval. - auto interval = arena->Alloc(); - interval->start_ordinal = instr->ordinal; - interval->end_ordinal = v->last_use ? - v->last_use->ordinal : v->def->ordinal; - interval->value = v; - interval->next = NULL; - interval->prev = prev_interval; - if (prev_interval) { - prev_interval->next = interval; - } else { - head_interval = interval; - } - prev_interval = interval; + // Update the register use heaps. + AdvanceUses(instr); - // Grab register set to use. - // We do this now so it's only once per interval, and it makes it easy - // to only compare intervals that overlap their sets. - if (v->type <= INT64_TYPE) { - interval->free_until_set = free_until_sets_.int_set; - } else if (v->type <= FLOAT64_TYPE) { - interval->free_until_set = free_until_sets_.float_set; + // Check sources for retirement. If any are unused after this instruction + // we can eagerly evict them to speed up register allocation. + // Since X64 (and other platforms) can often take advantage of dest==src1 + // register mappings we track retired src1 so that we can attempt to + // reuse it. + // NOTE: these checks require that the usage list be sorted! + bool has_preferred_reg = false; + RegAssignment preferred_reg = { 0 }; + if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V && + !instr->src1.value->IsConstant()) { + if (!instr->src1_use->next) { + // Pull off preferred register. We will try to reuse this for the + // dest. + has_preferred_reg = true; + preferred_reg = instr->src1.value->reg; + XEASSERTNOTNULL(preferred_reg.set); + } + } + + if (GET_OPCODE_SIG_TYPE_DEST(signature) == OPCODE_SIG_TYPE_V) { + // Must not have been set already. + XEASSERTNULL(instr->dest->reg.set); + + // Sort the usage list. We depend on this in future uses of this variable. + SortUsageList(instr->dest); + + // If we have a preferred register, use that. + // This way we can help along the stupid X86 two opcode instructions. + bool allocated; + if (has_preferred_reg) { + // Allocate with the given preferred register. If the register is in + // the wrong set it will not be reused. + allocated = TryAllocateRegister(instr->dest, preferred_reg); } else { - interval->free_until_set = free_until_sets_.vec_set; + // Allocate a register. This will either reserve a free one or + // spill and reuse an active one. + allocated = TryAllocateRegister(instr->dest); + } + if (!allocated) { + // Failed to allocate register -- need to spill and try again. + // We spill only those registers we aren't using. + if (!SpillOneRegister(builder, instr->dest->type)) { + // Unable to spill anything - this shouldn't happen. + XELOGE("Unable to spill any registers"); + XEASSERTALWAYS(); + return 1; + } + + // Demand allocation. + if (!TryAllocateRegister(instr->dest)) { + // Boned. + XELOGE("Register allocation failed"); + XEASSERTALWAYS(); + return 1; + } } } @@ -198,228 +155,266 @@ int RegisterAllocationPass::Run(HIRBuilder* builder) { block = block->next; } - // Now have a sorted list of intervals, minus their ending ordinals. - Intervals intervals; - intervals.unhandled = head_interval; - intervals.active = intervals.handled = NULL; - while (intervals.unhandled) { - // Get next unhandled interval. - auto current = intervals.unhandled; - intervals.unhandled = intervals.unhandled->next; - current->RemoveFromList(&intervals.unhandled); - - // Check for intervals in active that are handled or inactive. - auto it = intervals.active; - while (it) { - auto next = it->next; - if (it->end_ordinal <= current->start_ordinal) { - // Move from active to handled. - it->RemoveFromList(&intervals.active); - it->AddToList(&intervals.handled); - } - it = next; - } - - // Find a register for current. - if (!TryAllocateFreeReg(current, intervals)) { - // Failed, spill. - AllocateBlockedReg(builder, current, intervals); - } - - if (current->value->reg.index!= -1) { - // Add current to active. - current->AddToList(&intervals.active); - } - } - return 0; } -void RegisterAllocationPass::ComputeLastUse(Value* value) { - // TODO(benvanik): compute during construction? - // Note that this list isn't sorted (unfortunately), so we have to scan - // them all. - uint32_t max_ordinal = 0; - Value::Use* last_use = NULL; - auto use = value->use_head; - while (use) { - if (!last_use || use->instr->ordinal >= max_ordinal) { - last_use = use; - max_ordinal = use->instr->ordinal; - } - use = use->next; - } - value->last_use = last_use ? last_use->instr : NULL; -} - -bool RegisterAllocationPass::TryAllocateFreeReg( - Interval* current, Intervals& intervals) { - // Reset all registers in the set to unused. - auto free_until_set = current->free_until_set; - for (uint32_t n = 0; n < free_until_set->count; n++) { - free_until_set->pos[n] = -1; - } - - // Mark all active registers as used. - // TODO(benvanik): keep some kind of bitvector so that this is instant? - auto it = intervals.active; - while (it) { - if (it->free_until_set == free_until_set) { - free_until_set->pos[it->value->reg.index] = 0; - } - it = it->next; - } - - uint32_t max_pos = 0; - for (uint32_t n = 0; n < free_until_set->count; n++) { - if (max_pos == -1) { - max_pos = n; - } else { - if (free_until_set->pos[n] > free_until_set->pos[max_pos]) { - max_pos = n; +void RegisterAllocationPass::DumpUsage(const char* name) { +#if 0 + fprintf(stdout, "\n%s:\n", name); + for (size_t i = 0; i < XECOUNT(usage_sets_.all_sets); ++i) { + auto usage_set = usage_sets_.all_sets[i]; + if (usage_set) { + fprintf(stdout, "set %s:\n", usage_set->set->name); + fprintf(stdout, " avail: %s\n", usage_set->availability.to_string().c_str()); + fprintf(stdout, " upcoming uses:\n"); + for (auto it = usage_set->upcoming_uses.begin(); + it != usage_set->upcoming_uses.end(); ++it) { + fprintf(stdout, " v%d, used at %d\n", + it->value->ordinal, + it->use->instr->ordinal); } } } - if (!free_until_set->pos[max_pos]) { - // No register available without spilling. - return false; - } - if (current->end_ordinal < free_until_set->pos[max_pos]) { - // Register available for the whole interval. - current->value->reg.set = free_until_set->set; - current->value->reg.index = max_pos; - } else { - // Register available for the first part of the interval. - // Split the interval at where it hits the next one. - //current->value->reg = max_pos; - //SplitRange(current, free_until_set->pos[max_pos]); - // TODO(benvanik): actually split -- for now we just spill. - return false; - } - - return true; + fflush(stdout); +#endif } -void RegisterAllocationPass::AllocateBlockedReg( - HIRBuilder* builder, Interval* current, Intervals& intervals) { - auto free_until_set = current->free_until_set; - // TODO(benvanik): smart heuristics. - // wimmer AllocateBlockedReg has some stuff for deciding whether to - // spill current or some other active interval - which we ignore. - - // Pick a random interval. Maybe the first. Sure. - auto spill_interval = intervals.active; - Value* spill_value = NULL; - Instr* prev_use = NULL; - Instr* next_use = NULL; - while (spill_interval) { - if (spill_interval->free_until_set != free_until_set || - spill_interval->start_ordinal == current->start_ordinal) { - // Only interested in ones of the same register set. - // We also ensure that ones at the same ordinal as us are ignored, - // which can happen with multiple local inserts/etc. - spill_interval = spill_interval->next; - continue; +void RegisterAllocationPass::PrepareBlockState() { + for (size_t i = 0; i < XECOUNT(usage_sets_.all_sets); ++i) { + auto usage_set = usage_sets_.all_sets[i]; + if (usage_set) { + usage_set->availability.set(); + usage_set->upcoming_uses.clear(); } - spill_value = spill_interval->value; + } + DumpUsage("PrepareBlockState"); +} - // Find the uses right before/after current. - auto use = spill_value->use_head; - while (use) { - if (use->instr->ordinal != -1) { - if (use->instr->ordinal < current->start_ordinal) { - if (!prev_use || prev_use->ordinal < use->instr->ordinal) { - prev_use = use->instr; - } - } else if (use->instr->ordinal > current->start_ordinal) { - if (!next_use || next_use->ordinal > use->instr->ordinal) { - next_use = use->instr; - } +void RegisterAllocationPass::AdvanceUses(Instr* instr) { + for (size_t i = 0; i < XECOUNT(usage_sets_.all_sets); ++i) { + auto usage_set = usage_sets_.all_sets[i]; + if (!usage_set) { + break; + } + auto& upcoming_uses = usage_set->upcoming_uses; + for (auto it = upcoming_uses.begin(); it != upcoming_uses.end();) { + if (!it->use) { + // No uses at all - we can remove right away. + // This comes up from instructions where the dest is never used, + // like the ATOMIC ops. + MarkRegAvailable(it->value->reg); + it = upcoming_uses.erase(it); + continue; + } + if (it->use->instr != instr) { + // Not yet at this instruction. + ++it; + continue; + } + // The use is from this instruction. + if (!it->use->next) { + // Last use of the value. We can retire it now. + MarkRegAvailable(it->value->reg); + it = upcoming_uses.erase(it); + } else { + // Used again. Push back the next use. + // Note that we may be used multiple times this instruction, so + // eat those. + auto next_use = it->use->next; + while (next_use->next && next_use->instr == instr) { + next_use = next_use->next; } + // Remove the iterator. + auto value = it->value; + it = upcoming_uses.erase(it); + upcoming_uses.emplace_back(value, next_use); } - use = use->next; } - if (!prev_use) { - prev_use = spill_value->def; - } - if (prev_use->next == next_use) { - // Uh, this interval is way too short. - spill_interval = spill_interval->next; - continue; - } - XEASSERT(prev_use->ordinal != -1); - XEASSERTNOTNULL(next_use); - break; } - XEASSERT(spill_interval->free_until_set == free_until_set); + DumpUsage("AdvanceUses"); +} - // Find the real last use -- paired ops may require sequences to stay - // intact. This is a bad design. - auto prev_def_tail = prev_use; - while (prev_def_tail && - prev_def_tail->opcode->flags & OPCODE_FLAG_PAIRED_PREV) { - prev_def_tail = prev_def_tail->prev; +bool RegisterAllocationPass::IsRegInUse(const RegAssignment& reg) { + RegisterSetUsage* usage_set; + if (reg.set == usage_sets_.int_set->set) { + usage_set = usage_sets_.int_set; + } else if (reg.set == usage_sets_.float_set->set) { + usage_set = usage_sets_.float_set; + } else { + usage_set = usage_sets_.vec_set; + } + return !usage_set->availability.test(reg.index); +} + +RegisterAllocationPass::RegisterSetUsage* +RegisterAllocationPass::MarkRegUsed(const RegAssignment& reg, + Value* value, Value::Use* use) { + auto usage_set = RegisterSetForValue(value); + usage_set->availability.set(reg.index, false); + usage_set->upcoming_uses.emplace_back(value, use); + DumpUsage("MarkRegUsed"); + return usage_set; +} + +RegisterAllocationPass::RegisterSetUsage* +RegisterAllocationPass::MarkRegAvailable(const hir::RegAssignment& reg) { + RegisterSetUsage* usage_set; + if (reg.set == usage_sets_.int_set->set) { + usage_set = usage_sets_.int_set; + } else if (reg.set == usage_sets_.float_set->set) { + usage_set = usage_sets_.float_set; + } else { + usage_set = usage_sets_.vec_set; + } + usage_set->availability.set(reg.index, true); + return usage_set; +} + +bool RegisterAllocationPass::TryAllocateRegister( + Value* value, const RegAssignment& preferred_reg) { + // If the preferred register matches type and is available, use it. + auto usage_set = RegisterSetForValue(value); + if (usage_set->set == preferred_reg.set) { + // Check if available. + if (!IsRegInUse(preferred_reg)) { + // Mark as in-use and return. Best case. + MarkRegUsed(preferred_reg, value, value->use_head); + value->reg = preferred_reg; + return true; + } } - Value* new_value; - uint32_t end_ordinal; + // Otherwise, fallback to allocating like normal. + return TryAllocateRegister(value); +} + +bool RegisterAllocationPass::TryAllocateRegister(Value* value) { + // Get the set this register is in. + RegisterSetUsage* usage_set = RegisterSetForValue(value); + + // Find the first free register, if any. + // We have to ensure it's a valid one (in our count). + unsigned long first_unused = 0; + bool all_used = _BitScanForward(&first_unused, usage_set->availability.to_ulong()) == 0; + if (!all_used && first_unused < usage_set->count) { + // Available! Use it!. + value->reg.set = usage_set->set; + value->reg.index = first_unused; + MarkRegUsed(value->reg, value, value->use_head); + return true; + } + + // None available! Spill required. + return false; +} + +bool RegisterAllocationPass::SpillOneRegister( + HIRBuilder* builder, TypeName required_type) { + // Get the set that we will be picking from. + RegisterSetUsage* usage_set; + if (required_type <= INT64_TYPE) { + usage_set = usage_sets_.int_set; + } else if (required_type <= FLOAT64_TYPE) { + usage_set = usage_sets_.float_set; + } else { + usage_set = usage_sets_.vec_set; + } + + DumpUsage("SpillOneRegister (pre)"); + // Pick the one with the furthest next use. + XEASSERT(!usage_set->upcoming_uses.empty()); + auto furthest_usage = std::max_element( + usage_set->upcoming_uses.begin(), usage_set->upcoming_uses.end(), + RegisterUsage::Comparer()); + Value* spill_value = furthest_usage->value; + Value::Use* prev_use = furthest_usage->use->prev; + Value::Use* next_use = furthest_usage->use; + XEASSERTNOTNULL(next_use); + usage_set->upcoming_uses.erase(furthest_usage); + DumpUsage("SpillOneRegister (post)"); + const auto reg = spill_value->reg; + + // We know the spill_value use list is sorted, so we can cut it right now. + // This makes it easier down below. + auto new_head_use = next_use; + + // Allocate local. if (spill_value->local_slot) { - // Value is already assigned a slot, so load from that. - // We can then split the interval right after the previous use to - // before the next use. - - // Update the last use of the spilled interval/value. - end_ordinal = spill_interval->end_ordinal; - spill_interval->end_ordinal = current->start_ordinal;//prev_def_tail->ordinal; - XEASSERT(end_ordinal != -1); - XEASSERT(spill_interval->end_ordinal != -1); - - // Insert a load right before the next use. - new_value = builder->LoadLocal(spill_value->local_slot); - builder->last_instr()->MoveBefore(next_use); - - // Update last use info. - new_value->last_use = spill_value->last_use; - spill_value->last_use = prev_use; + // Value is already assigned a slot. Since we allocate in order and this is + // all SSA we know the stored value will be exactly what we want. Yay, + // we can prevent the redundant store! + // In fact, we may even want to pin this spilled value so that we always + // use the spilled value and prevent the need for more locals. } else { // Allocate a local slot. spill_value->local_slot = builder->AllocLocal(spill_value->type); - // Insert a spill right after the def. + // Add store. builder->StoreLocal(spill_value->local_slot, spill_value); auto spill_store = builder->last_instr(); - spill_store->MoveBefore(prev_def_tail->next); + auto spill_store_use = spill_store->src2_use; + XEASSERTNULL(spill_store_use->prev); + if (prev_use && prev_use->instr->opcode->flags & OPCODE_FLAG_PAIRED_PREV) { + // Instruction is paired. This is bad. We will insert the spill after the + // paired instruction. + XEASSERTNOTNULL(prev_use->instr->next); + spill_store->MoveBefore(prev_use->instr->next); - // Update last use of spilled interval/value. - end_ordinal = spill_interval->end_ordinal; - spill_interval->end_ordinal = current->start_ordinal;//prev_def_tail->ordinal; - XEASSERT(end_ordinal != -1); - XEASSERT(spill_interval->end_ordinal != -1); + // Update last use. + spill_value->last_use = spill_store; + } else if (prev_use) { + // We insert the store immediately before the previous use. + // If we were smarter we could then re-run allocation and reuse the register + // once dropped. + spill_store->MoveBefore(prev_use->instr); - // Insert a load right before the next use. - new_value = builder->LoadLocal(spill_value->local_slot); - builder->last_instr()->MoveBefore(next_use); + // Update last use. + spill_value->last_use = prev_use->instr; + } else { + // This is the first use, so the only thing we have is the define. + // Move the store to right after that. + spill_store->MoveBefore(spill_value->def->next); - // Update last use info. - new_value->last_use = spill_value->last_use; - spill_value->last_use = spill_store; + // Update last use. + spill_value->last_use = spill_store; + } } - // Reuse the same local slot. Hooray SSA. +#if ASSERT_NO_CYCLES + builder->AssertNoCycles(); + spill_value->def->block->AssertNoCycles(); +#endif // ASSERT_NO_CYCLES + + // Add load. + // Inserted immediately before the next use. Since by definition the next + // use is after the instruction requesting the spill we know we haven't + // done allocation for that code yet and can let that be handled + // automatically when we get to it. + auto new_value = builder->LoadLocal(spill_value->local_slot); + auto spill_load = builder->last_instr(); + spill_load->MoveBefore(next_use->instr); + // Note: implicit first use added. + +#if ASSERT_NO_CYCLES + builder->AssertNoCycles(); + spill_value->def->block->AssertNoCycles(); +#endif // ASSERT_NO_CYCLES + + // Set the local slot of the new value to our existing one. This way we will + // reuse that same memory if needed. new_value->local_slot = spill_value->local_slot; - // Rename all future uses to that loaded value. - auto use = spill_value->use_head; - while (use) { - // TODO(benvanik): keep use list sorted so we don't have to do this. - if (use->instr->ordinal <= spill_interval->end_ordinal || - use->instr->ordinal == -1) { - use = use->next; - continue; - } - auto next = use->next; - auto instr = use->instr; + // Rename all future uses of the SSA value to the new value as loaded + // from the local. + // We can quickly do this by walking the use list. Because the list is + // already sorted we know we are going to end up with a sorted list. + auto walk_use = new_head_use; + auto new_use_tail = walk_use; + while (walk_use) { + auto next_walk_use = walk_use->next; + auto instr = walk_use->instr; + uint32_t signature = instr->opcode->signature; if (GET_OPCODE_SIG_TYPE_SRC1(signature) == OPCODE_SIG_TYPE_V) { if (instr->src1.value == spill_value) { @@ -436,36 +431,107 @@ void RegisterAllocationPass::AllocateBlockedReg( instr->set_src3(new_value); } } - use = next; + + walk_use = next_walk_use; + if (walk_use) { + new_use_tail = walk_use; + } } + new_value->last_use = new_use_tail->instr; - // Create new interval. - auto arena = scratch_arena(); - auto new_interval = arena->Alloc(); - new_interval->start_ordinal = new_value->def->ordinal; - new_interval->end_ordinal = end_ordinal; - new_interval->value = new_value; - new_interval->next = NULL; - new_interval->prev = NULL; - if (new_value->type <= INT64_TYPE) { - new_interval->free_until_set = free_until_sets_.int_set; - } else if (new_value->type <= FLOAT64_TYPE) { - new_interval->free_until_set = free_until_sets_.float_set; - } else { - new_interval->free_until_set = free_until_sets_.vec_set; - } + // Update tracking. + MarkRegAvailable(reg); - // Remove the old interval from the active list, as it's been spilled. - spill_interval->RemoveFromList(&intervals.active); - spill_interval->AddToList(&intervals.handled); - - // Insert interval into the right place in the list. - // We know it's ahead of us. - new_interval->InsertIntoList(&intervals.unhandled); - - // TODO(benvanik): use the register we just freed? - //current->value->reg.set = free_until_set->set; - //current->value->reg.index = spill_interval->value->reg.index; - bool allocated = TryAllocateFreeReg(current, intervals); - XEASSERTTRUE(allocated); + return true; +} + +RegisterAllocationPass::RegisterSetUsage* +RegisterAllocationPass::RegisterSetForValue( + const Value* value) { + if (value->type <= INT64_TYPE) { + return usage_sets_.int_set; + } else if (value->type <= FLOAT64_TYPE) { + return usage_sets_.float_set; + } else { + return usage_sets_.vec_set; + } +} + +namespace { +int CompareValueUse(const Value::Use* a, const Value::Use* b) { + return a->instr->ordinal - b->instr->ordinal; +} +} // namespace +void RegisterAllocationPass::SortUsageList(Value* value) { + // Modified in-place linked list sort from: + // http://www.chiark.greenend.org.uk/~sgtatham/algorithms/listsort.c + if (!value->use_head) { + return; + } + Value::Use* head = value->use_head; + Value::Use* tail = nullptr; + int insize = 1; + while (true) { + auto p = head; + head = nullptr; + tail = nullptr; + // count number of merges we do in this pass + int nmerges = 0; + while (p) { + // there exists a merge to be done + nmerges++; + // step 'insize' places along from p + auto q = p; + int psize = 0; + for (int i = 0; i < insize; i++) { + psize++; + q = q->next; + if (!q) break; + } + // if q hasn't fallen off end, we have two lists to merge + int qsize = insize; + // now we have two lists; merge them + while (psize > 0 || (qsize > 0 && q)) { + // decide whether next element of merge comes from p or q + Value::Use* e = nullptr; + if (psize == 0) { + // p is empty; e must come from q + e = q; q = q->next; qsize--; + } else if (qsize == 0 || !q) { + // q is empty; e must come from p + e = p; p = p->next; psize--; + } else if (CompareValueUse(p, q) <= 0) { + // First element of p is lower (or same); e must come from p + e = p; p = p->next; psize--; + } else { + // First element of q is lower; e must come from q + e = q; q = q->next; qsize--; + } + // add the next element to the merged list + if (tail) { + tail->next = e; + } else { + head = e; + } + // Maintain reverse pointers in a doubly linked list. + e->prev = tail; + tail = e; + } + // now p has stepped 'insize' places along, and q has too + p = q; + } + if (tail) { + tail->next = nullptr; + } + // If we have done only one merge, we're finished + if (nmerges <= 1) { + // allow for nmerges==0, the empty list case + break; + } + // Otherwise repeat, merging lists twice the size + insize *= 2; + } + + value->use_head = head; + value->last_use = tail->instr; } diff --git a/src/alloy/compiler/passes/register_allocation_pass.h b/src/alloy/compiler/passes/register_allocation_pass.h index 3167000ec..aa5943aea 100644 --- a/src/alloy/compiler/passes/register_allocation_pass.h +++ b/src/alloy/compiler/passes/register_allocation_pass.h @@ -10,6 +10,10 @@ #ifndef ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_ #define ALLOY_COMPILER_PASSES_REGISTER_ALLOCATION_PASS_H_ +#include +#include +#include + #include #include @@ -27,28 +31,53 @@ public: virtual int Run(hir::HIRBuilder* builder); private: - struct Interval; - struct Intervals; - void ComputeLastUse(hir::Value* value); - bool TryAllocateFreeReg(Interval* current, Intervals& intervals); - void AllocateBlockedReg(hir::HIRBuilder* builder, - Interval* current, Intervals& intervals); + // TODO(benvanik): rewrite all this set shit -- too much indirection, the + // complexity is not needed. + struct RegisterUsage { + hir::Value* value; + hir::Value::Use* use; + RegisterUsage() : value(nullptr), use(nullptr) {} + RegisterUsage(hir::Value* value_, hir::Value::Use* use_) + : value(value_), use(use_) {} + struct Comparer : std::binary_function { + bool operator()(const RegisterUsage& a, const RegisterUsage& b) const { + return a.use->instr->ordinal < b.use->instr->ordinal; + } + }; + }; + struct RegisterSetUsage { + const backend::MachineInfo::RegisterSet* set = nullptr; + uint32_t count = 0; + std::bitset<32> availability = 0; + // TODO(benvanik): another data type. + std::vector upcoming_uses; + }; + + void DumpUsage(const char* name); + void PrepareBlockState(); + void AdvanceUses(hir::Instr* instr); + bool IsRegInUse(const hir::RegAssignment& reg); + RegisterSetUsage* MarkRegUsed(const hir::RegAssignment& reg, + hir::Value* value, hir::Value::Use* use); + RegisterSetUsage* MarkRegAvailable(const hir::RegAssignment& reg); + + bool TryAllocateRegister(hir::Value* value, + const hir::RegAssignment& preferred_reg); + bool TryAllocateRegister(hir::Value* value); + bool SpillOneRegister(hir::HIRBuilder* builder, hir::TypeName required_type); + + RegisterSetUsage* RegisterSetForValue(const hir::Value* value); + + void SortUsageList(hir::Value* value); private: const backend::MachineInfo* machine_info_; - - struct RegisterFreeUntilSet { - uint32_t count; - uint32_t pos[32]; - const backend::MachineInfo::RegisterSet* set; - }; - struct RegisterFreeUntilSets { - RegisterFreeUntilSet* int_set; - RegisterFreeUntilSet* float_set; - RegisterFreeUntilSet* vec_set; - RegisterFreeUntilSet* all_sets[3]; - }; - RegisterFreeUntilSets free_until_sets_; + struct { + RegisterSetUsage* int_set = nullptr; + RegisterSetUsage* float_set = nullptr; + RegisterSetUsage* vec_set = nullptr; + RegisterSetUsage* all_sets[3]; + } usage_sets_; }; diff --git a/src/alloy/compiler/passes/validation_pass.cc b/src/alloy/compiler/passes/validation_pass.cc index 15e89bd67..bc77ab482 100644 --- a/src/alloy/compiler/passes/validation_pass.cc +++ b/src/alloy/compiler/passes/validation_pass.cc @@ -88,12 +88,12 @@ int ValidationPass::ValidateInstruction(Block* block, Instr* instr) { } int ValidationPass::ValidateValue(Block* block, Instr* instr, Value* value) { - if (value->def) { - /*auto def = value->def; - XEASSERT(def->block == block); - if (def->block != block) { - return 1; - }*/ - } + //if (value->def) { + // auto def = value->def; + // XEASSERT(def->block == block); + // if (def->block != block) { + // return 1; + // } + //} return 0; } diff --git a/src/alloy/core.h b/src/alloy/core.h index aef7e57c2..3beb11ba4 100644 --- a/src/alloy/core.h +++ b/src/alloy/core.h @@ -44,6 +44,10 @@ typedef struct XECACHEALIGN vec128_s { uint64_t high; }; }; + + bool operator== (const vec128_s& b) const { + return low == b.low && high == b.high; + } } vec128_t; XEFORCEINLINE vec128_t vec128i(uint32_t x, uint32_t y, uint32_t z, uint32_t w) { vec128_t v; diff --git a/src/alloy/frontend/ppc/ppc_emit_alu.cc b/src/alloy/frontend/ppc/ppc_emit_alu.cc index 9b25e824c..ce023eb85 100644 --- a/src/alloy/frontend/ppc/ppc_emit_alu.cc +++ b/src/alloy/frontend/ppc/ppc_emit_alu.cc @@ -643,20 +643,20 @@ XEEMITTER(cmpli, 0x28000000, D )(PPCHIRBuilder& f, InstrData& i) { XEEMITTER(andx, 0x7C000038, X )(PPCHIRBuilder& f, InstrData& i) { // RA <- (RS) & (RB) Value* ra = f.And(f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB)); + f.StoreGPR(i.X.RA, ra); if (i.X.Rc) { f.UpdateCR(0, ra); } - f.StoreGPR(i.X.RA, ra); return 0; } XEEMITTER(andcx, 0x7C000078, X )(PPCHIRBuilder& f, InstrData& i) { // RA <- (RS) & ¬(RB) Value* ra = f.And(f.LoadGPR(i.X.RT), f.Not(f.LoadGPR(i.X.RB))); + f.StoreGPR(i.X.RA, ra); if (i.X.Rc) { f.UpdateCR(0, ra); } - f.StoreGPR(i.X.RA, ra); return 0; } @@ -665,8 +665,8 @@ XEEMITTER(andix, 0x70000000, D )(PPCHIRBuilder& f, InstrData& i) { Value* ra = f.And( f.LoadGPR(i.D.RT), f.LoadConstant((uint64_t)i.D.DS)); - f.UpdateCR(0, ra); f.StoreGPR(i.D.RA, ra); + f.UpdateCR(0, ra); return 0; } @@ -675,8 +675,8 @@ XEEMITTER(andisx, 0x74000000, D )(PPCHIRBuilder& f, InstrData& i) { Value* ra = f.And( f.LoadGPR(i.D.RT), f.LoadConstant((uint64_t(i.D.DS) << 16))); - f.UpdateCR(0, ra); f.StoreGPR(i.D.RA, ra); + f.UpdateCR(0, ra); return 0; } @@ -688,10 +688,10 @@ XEEMITTER(cntlzdx, 0x7C000074, X )(PPCHIRBuilder& f, InstrData& i) { // RA <- n Value* v = f.CountLeadingZeros(f.LoadGPR(i.X.RT)); v = f.ZeroExtend(v, INT64_TYPE); + f.StoreGPR(i.X.RA, v); if (i.X.Rc) { f.UpdateCR(0, v); } - f.StoreGPR(i.X.RA, v); return 0; } @@ -704,10 +704,10 @@ XEEMITTER(cntlzwx, 0x7C000034, X )(PPCHIRBuilder& f, InstrData& i) { Value* v = f.CountLeadingZeros( f.Truncate(f.LoadGPR(i.X.RT), INT32_TYPE)); v = f.ZeroExtend(v, INT64_TYPE); + f.StoreGPR(i.X.RA, v); if (i.X.Rc) { f.UpdateCR(0, v); } - f.StoreGPR(i.X.RA, v); return 0; } @@ -715,10 +715,10 @@ XEEMITTER(eqvx, 0x7C000238, X )(PPCHIRBuilder& f, InstrData& i) { // RA <- (RS) == (RB) Value* ra = f.Xor(f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB)); ra = f.Not(ra); + f.StoreGPR(i.X.RA, ra); if (i.X.Rc) { f.UpdateCR(0, ra); } - f.StoreGPR(i.X.RA, ra); return 0; } @@ -728,10 +728,10 @@ XEEMITTER(extsbx, 0x7C000774, X )(PPCHIRBuilder& f, InstrData& i) { // RA[0:55] <- i56.s Value* rt = f.LoadGPR(i.X.RT); rt = f.SignExtend(f.Truncate(rt, INT8_TYPE), INT64_TYPE); + f.StoreGPR(i.X.RA, rt); if (i.X.Rc) { f.UpdateCR(0, rt); } - f.StoreGPR(i.X.RA, rt); return 0; } @@ -741,10 +741,10 @@ XEEMITTER(extshx, 0x7C000734, X )(PPCHIRBuilder& f, InstrData& i) { // RA[0:47] <- 48.s Value* rt = f.LoadGPR(i.X.RT); rt = f.SignExtend(f.Truncate(rt, INT16_TYPE), INT64_TYPE); + f.StoreGPR(i.X.RA, rt); if (i.X.Rc) { f.UpdateCR(0, rt); } - f.StoreGPR(i.X.RA, rt); return 0; } @@ -754,10 +754,10 @@ XEEMITTER(extswx, 0x7C0007B4, X )(PPCHIRBuilder& f, InstrData& i) { // RA[0:31] <- i32.s Value* rt = f.LoadGPR(i.X.RT); rt = f.SignExtend(f.Truncate(rt, INT32_TYPE), INT64_TYPE); + f.StoreGPR(i.X.RA, rt); if (i.X.Rc) { f.UpdateCR(0, rt); } - f.StoreGPR(i.X.RA, rt); return 0; } @@ -767,10 +767,10 @@ XEEMITTER(nandx, 0x7C0003B8, X )(PPCHIRBuilder& f, InstrData& i) { f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB)); ra = f.Not(ra); + f.StoreGPR(i.X.RA, ra); if (i.X.Rc) { f.UpdateCR(0, ra); } - f.StoreGPR(i.X.RA, ra); return 0; } @@ -780,10 +780,10 @@ XEEMITTER(norx, 0x7C0000F8, X )(PPCHIRBuilder& f, InstrData& i) { f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB)); ra = f.Not(ra); + f.StoreGPR(i.X.RA, ra); if (i.X.Rc) { f.UpdateCR(0, ra); } - f.StoreGPR(i.X.RA, ra); return 0; } @@ -803,10 +803,10 @@ XEEMITTER(orx, 0x7C000378, X )(PPCHIRBuilder& f, InstrData& i) { f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB)); } + f.StoreGPR(i.X.RA, ra); if (i.X.Rc) { f.UpdateCR(0, ra); } - f.StoreGPR(i.X.RA, ra); return 0; } @@ -815,10 +815,10 @@ XEEMITTER(orcx, 0x7C000338, X )(PPCHIRBuilder& f, InstrData& i) { Value* ra = f.Or( f.LoadGPR(i.X.RT), f.Not(f.LoadGPR(i.X.RB))); + f.StoreGPR(i.X.RA, ra); if (i.X.Rc) { f.UpdateCR(0, ra); } - f.StoreGPR(i.X.RA, ra); return 0; } @@ -849,10 +849,10 @@ XEEMITTER(xorx, 0x7C000278, X )(PPCHIRBuilder& f, InstrData& i) { Value* ra = f.Xor( f.LoadGPR(i.X.RT), f.LoadGPR(i.X.RB)); + f.StoreGPR(i.X.RA, ra); if (i.X.Rc) { f.UpdateCR(0, ra); } - f.StoreGPR(i.X.RA, ra); return 0; } @@ -895,10 +895,10 @@ XEEMITTER(rld, 0x78000000, MDS)(PPCHIRBuilder& f, InstrData& i) { if (m != 0xFFFFFFFFFFFFFFFF) { v = f.And(v, f.LoadConstant(m)); } + f.StoreGPR(i.MD.RA, v); if (i.MD.Rc) { f.UpdateCR(0, v); } - f.StoreGPR(i.MD.RA, v); return 0; } else if (i.MD.idx == 1) { // XEEMITTER(rldicrx, 0x78000004, MD ) @@ -922,10 +922,10 @@ XEEMITTER(rld, 0x78000000, MDS)(PPCHIRBuilder& f, InstrData& i) { v = f.And(v, f.LoadConstant(m)); } } + f.StoreGPR(i.MD.RA, v); if (i.MD.Rc) { f.UpdateCR(0, v); } - f.StoreGPR(i.MD.RA, v); return 0; } else if (i.MD.idx == 2) { // XEEMITTER(rldicx, 0x78000008, MD ) @@ -959,10 +959,10 @@ XEEMITTER(rld, 0x78000000, MDS)(PPCHIRBuilder& f, InstrData& i) { f.And(v, f.LoadConstant(m)), f.And(ra, f.LoadConstant(~m))); } + f.StoreGPR(i.MD.RA, v); if (i.MD.Rc) { f.UpdateCR(0, v); } - f.StoreGPR(i.MD.RA, v); return 0; } else { XEINSTRNOTIMPLEMENTED(); @@ -987,10 +987,10 @@ XEEMITTER(rlwimix, 0x50000000, M )(PPCHIRBuilder& f, InstrData& i) { } v = f.ZeroExtend(v, INT64_TYPE); v = f.Or(v, f.And(f.LoadGPR(i.M.RA), f.LoadConstant((~(uint64_t)m)))); + f.StoreGPR(i.M.RA, v); if (i.M.Rc) { f.UpdateCR(0, v); } - f.StoreGPR(i.M.RA, v); return 0; } @@ -1014,10 +1014,10 @@ XEEMITTER(rlwinmx, 0x54000000, M )(PPCHIRBuilder& f, InstrData& i) { v = f.And(v, f.LoadConstant((uint32_t)XEMASK(i.M.MB + 32, i.M.ME + 32))); } v = f.ZeroExtend(v, INT64_TYPE); + f.StoreGPR(i.M.RA, v); if (i.M.Rc) { f.UpdateCR(0, v); } - f.StoreGPR(i.M.RA, v); return 0; } @@ -1036,10 +1036,10 @@ XEEMITTER(rlwnmx, 0x5C000000, M )(PPCHIRBuilder& f, InstrData& i) { v = f.And(v, f.LoadConstant((uint32_t)XEMASK(i.M.MB + 32, i.M.ME + 32))); } v = f.ZeroExtend(v, INT64_TYPE); + f.StoreGPR(i.M.RA, v); if (i.M.Rc) { f.UpdateCR(0, v); } - f.StoreGPR(i.M.RA, v); return 0; } @@ -1146,7 +1146,7 @@ XEEMITTER(sradx, 0x7C000634, X )(PPCHIRBuilder& f, InstrData& i) { // CA is set to 1 if the low-order 32 bits of (RS) contain a negative number // and any 1-bits are shifted out of position 63; otherwise CA is set to 0. // We already have ca set to indicate the pos 63 bit, now just and in sign. - ca = f.And(ca, f.Shr(v, 63)); + ca = f.And(ca, f.Truncate(f.Shr(v, 63), INT8_TYPE)); f.StoreCA(ca); f.StoreGPR(i.X.RA, v); @@ -1174,15 +1174,15 @@ XEEMITTER(sradix, 0x7C000674, XS )(PPCHIRBuilder& f, InstrData& i) { XEASSERT(sh); uint64_t mask = XEMASK(64 - sh, 63); Value* ca = f.And( - f.Shr(v, 63), + f.Truncate(f.Shr(v, 63), INT8_TYPE), f.IsTrue(f.And(v, f.LoadConstant(mask)))); f.StoreCA(ca); v = f.Sha(v, sh); + f.StoreGPR(i.XS.RA, v); if (i.XS.Rc) { f.UpdateCR(0, v); } - f.StoreGPR(i.XS.RA, v); return 0; } @@ -1203,7 +1203,7 @@ XEEMITTER(srawx, 0x7C000630, X )(PPCHIRBuilder& f, InstrData& i) { // is negative. Value* mask = f.Not(f.Shl(f.LoadConstant(-1), sh)); Value* ca = f.And( - f.Shr(v, 31), + f.Truncate(f.Shr(v, 31), INT8_TYPE), f.IsTrue(f.And(v, mask))); f.StoreCA(ca); v = f.Sha(v, sh), @@ -1235,8 +1235,8 @@ XEEMITTER(srawix, 0x7C000670, X )(PPCHIRBuilder& f, InstrData& i) { // is negative. uint32_t mask = (uint32_t)XEMASK(64 - i.X.RB, 63); ca = f.And( - f.Shr(v, 31), - f.ZeroExtend(f.IsTrue(f.And(v, f.LoadConstant(mask))), INT32_TYPE)); + f.Truncate(f.Shr(v, 31), INT8_TYPE), + f.IsTrue(f.And(v, f.LoadConstant(mask)))); v = f.Sha(v, (int8_t)i.X.RB), v = f.SignExtend(v, INT64_TYPE); diff --git a/src/alloy/frontend/ppc/ppc_hir_builder.cc b/src/alloy/frontend/ppc/ppc_hir_builder.cc index dd25c4f8a..1b254ea4e 100644 --- a/src/alloy/frontend/ppc/ppc_hir_builder.cc +++ b/src/alloy/frontend/ppc/ppc_hir_builder.cc @@ -240,18 +240,18 @@ void PPCHIRBuilder::UpdateCR( void PPCHIRBuilder::UpdateCR( uint32_t n, Value* lhs, Value* rhs, bool is_signed) { - Value* lt; - Value* gt; if (is_signed) { - lt = CompareSLT(lhs, rhs); - gt = CompareSGT(lhs, rhs); + Value* lt = CompareSLT(lhs, rhs); + StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 0, lt); + Value* gt = CompareSGT(lhs, rhs); + StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 1, gt); } else { - lt = CompareULT(lhs, rhs); - gt = CompareUGT(lhs, rhs); + Value* lt = CompareULT(lhs, rhs); + StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 0, lt); + Value* gt = CompareUGT(lhs, rhs); + StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 1, gt); } Value* eq = CompareEQ(lhs, rhs); - StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 0, lt); - StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 1, gt); StoreContext(offsetof(PPCContext, cr0) + (4 * n) + 2, eq); // Value* so = AllocValue(UINT8_TYPE); @@ -280,7 +280,7 @@ Value* PPCHIRBuilder::LoadCA() { } void PPCHIRBuilder::StoreCA(Value* value) { - value = Truncate(value, INT8_TYPE); + XEASSERT(value->type == INT8_TYPE); StoreContext(offsetof(PPCContext, xer_ca), value); } diff --git a/src/alloy/hir/block.cc b/src/alloy/hir/block.cc new file mode 100644 index 000000000..ebace67fa --- /dev/null +++ b/src/alloy/hir/block.cc @@ -0,0 +1,39 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include + +using namespace alloy; +using namespace alloy::hir; + + +void Block::AssertNoCycles() { + Instr* hare = instr_head; + Instr* tortoise = instr_head; + if (!hare) { + return; + } + while (hare = hare->next) { + if (hare == tortoise) { + // Cycle! + XEASSERTALWAYS(); + } + hare = hare->next; + if (hare == tortoise) { + // Cycle! + XEASSERTALWAYS(); + } + tortoise = tortoise->next; + if (!hare || !tortoise) { + return; + } + } +} diff --git a/src/alloy/hir/block.h b/src/alloy/hir/block.h index 1683b333c..f60dd83c5 100644 --- a/src/alloy/hir/block.h +++ b/src/alloy/hir/block.h @@ -61,6 +61,8 @@ public: Instr* instr_tail; uint16_t ordinal; + + void AssertNoCycles(); }; diff --git a/src/alloy/hir/hir_builder.cc b/src/alloy/hir/hir_builder.cc index cad24c32c..f93a310e8 100644 --- a/src/alloy/hir/hir_builder.cc +++ b/src/alloy/hir/hir_builder.cc @@ -92,7 +92,7 @@ void HIRBuilder::DumpValue(StringBuffer* str, Value* value) { case INT8_TYPE: str->Append("%X", value->constant.i8); break; case INT16_TYPE: str->Append("%X", value->constant.i16); break; case INT32_TYPE: str->Append("%X", value->constant.i32); break; - case INT64_TYPE: str->Append("%X", value->constant.i64); break; + case INT64_TYPE: str->Append("%llX", value->constant.i64); break; case FLOAT32_TYPE: str->Append("%F", value->constant.f32); break; case FLOAT64_TYPE: str->Append("%F", value->constant.f64); break; case VEC128_TYPE: str->Append("(%F,%F,%F,%F)", @@ -252,6 +252,29 @@ void HIRBuilder::Dump(StringBuffer* str) { } } +void HIRBuilder::AssertNoCycles() { + Block* hare = block_head_; + Block* tortoise = block_head_; + if (!hare) { + return; + } + while (hare = hare->next) { + if (hare == tortoise) { + // Cycle! + XEASSERTALWAYS(); + } + hare = hare->next; + if (hare == tortoise) { + // Cycle! + XEASSERTALWAYS(); + } + tortoise = tortoise->next; + if (!hare || !tortoise) { + return; + } + } +} + Block* HIRBuilder::current_block() const { return current_block_; } @@ -1729,16 +1752,19 @@ Value* HIRBuilder::Extract(Value* value, Value* index, TypeName target_type) { // TODO(benvanik): could do some of this as constants. + Value* trunc_index = index->type != INT8_TYPE ? + Truncate(index, INT8_TYPE) : index; + Instr* i = AppendInstr( OPCODE_EXTRACT_info, 0, AllocValue(target_type)); i->set_src1(value); - i->set_src2(ZeroExtend(index, INT64_TYPE)); + i->set_src2(trunc_index); i->src3.value = NULL; return i->dest; } -Value* HIRBuilder::Extract(Value* value, uint64_t index, +Value* HIRBuilder::Extract(Value* value, uint8_t index, TypeName target_type) { return Extract(value, LoadConstant(index), target_type); } diff --git a/src/alloy/hir/hir_builder.h b/src/alloy/hir/hir_builder.h index 1ebdb01a1..6568a5a49 100644 --- a/src/alloy/hir/hir_builder.h +++ b/src/alloy/hir/hir_builder.h @@ -35,6 +35,7 @@ public: virtual int Finalize(); void Dump(StringBuffer* str); + void AssertNoCycles(); Arena* arena() const { return arena_; } @@ -196,7 +197,7 @@ public: Value* Insert(Value* value, Value* index, Value* part); Value* Insert(Value* value, uint64_t index, Value* part); Value* Extract(Value* value, Value* index, TypeName target_type); - Value* Extract(Value* value, uint64_t index, TypeName target_type); + Value* Extract(Value* value, uint8_t index, TypeName target_type); // i8->i16/i32/... (i8|i8 / i8|i8|i8|i8 / ...) // i8/i16/i32 -> vec128 Value* Splat(Value* value, TypeName target_type); diff --git a/src/alloy/hir/instr.cc b/src/alloy/hir/instr.cc index 51de2da2c..dc489ef4b 100644 --- a/src/alloy/hir/instr.cc +++ b/src/alloy/hir/instr.cc @@ -48,19 +48,6 @@ void Instr::set_src3(Value* value) { src3_use = value ? value->AddUse(block->arena, this) : NULL; } -bool Instr::Match(SignatureType dest_req, - SignatureType src1_req, - SignatureType src2_req, - SignatureType src3_req) const { - #define TO_SIG_TYPE(v) \ - (v ? (v->IsConstant() ? SignatureType((v->type + 1) | SIG_TYPE_C) : SignatureType(v->type + 1)) : SIG_TYPE_X) - return - ((dest_req == SIG_TYPE_IGNORE) || (dest_req == TO_SIG_TYPE(dest))) && - ((src1_req == SIG_TYPE_IGNORE) || (src1_req == TO_SIG_TYPE(src1.value))) && - ((src2_req == SIG_TYPE_IGNORE) || (src2_req == TO_SIG_TYPE(src2.value))) && - ((src3_req == SIG_TYPE_IGNORE) || (src3_req == TO_SIG_TYPE(src3.value))); -} - void Instr::MoveBefore(Instr* other) { if (next == other) { return; diff --git a/src/alloy/hir/instr.h b/src/alloy/hir/instr.h index 62983401d..b128c534a 100644 --- a/src/alloy/hir/instr.h +++ b/src/alloy/hir/instr.h @@ -24,26 +24,6 @@ namespace hir { class Block; class Label; -enum SignatureType { - SIG_TYPE_X = 0, - SIG_TYPE_I8 = 1, - SIG_TYPE_I16 = 2, - SIG_TYPE_I32 = 3, - SIG_TYPE_I64 = 4, - SIG_TYPE_F32 = 5, - SIG_TYPE_F64 = 6, - SIG_TYPE_V128 = 7, - SIG_TYPE_C = (1 << 3), - SIG_TYPE_I8C = SIG_TYPE_C | SIG_TYPE_I8, - SIG_TYPE_I16C = SIG_TYPE_C | SIG_TYPE_I16, - SIG_TYPE_I32C = SIG_TYPE_C | SIG_TYPE_I32, - SIG_TYPE_I64C = SIG_TYPE_C | SIG_TYPE_I64, - SIG_TYPE_F32C = SIG_TYPE_C | SIG_TYPE_F32, - SIG_TYPE_F64C = SIG_TYPE_C | SIG_TYPE_F64, - SIG_TYPE_V128C = SIG_TYPE_C | SIG_TYPE_V128, - SIG_TYPE_IGNORE = 0xFF, -}; - class Instr { public: Block* block; @@ -74,11 +54,6 @@ public: void set_src2(Value* value); void set_src3(Value* value); - bool Match(SignatureType dest = SIG_TYPE_X, - SignatureType src1 = SIG_TYPE_X, - SignatureType src2 = SIG_TYPE_X, - SignatureType src3 = SIG_TYPE_X) const; - void MoveBefore(Instr* other); void Replace(const OpcodeInfo* opcode, uint16_t flags); void Remove(); diff --git a/src/alloy/hir/opcodes.inl b/src/alloy/hir/opcodes.inl index baf214f25..deb789675 100644 --- a/src/alloy/hir/opcodes.inl +++ b/src/alloy/hir/opcodes.inl @@ -11,590 +11,590 @@ DEFINE_OPCODE( OPCODE_COMMENT, "comment", - OPCODE_SIG_X, - OPCODE_FLAG_IGNORE); + OPCODE_SIG_X_O, + OPCODE_FLAG_IGNORE) DEFINE_OPCODE( OPCODE_NOP, "nop", OPCODE_SIG_X, - OPCODE_FLAG_IGNORE); + OPCODE_FLAG_IGNORE) DEFINE_OPCODE( OPCODE_SOURCE_OFFSET, "source_offset", OPCODE_SIG_X_O, - OPCODE_FLAG_IGNORE | OPCODE_FLAG_HIDE); + OPCODE_FLAG_IGNORE | OPCODE_FLAG_HIDE) DEFINE_OPCODE( OPCODE_DEBUG_BREAK, "debug_break", OPCODE_SIG_X, - OPCODE_FLAG_VOLATILE); + OPCODE_FLAG_VOLATILE) DEFINE_OPCODE( OPCODE_DEBUG_BREAK_TRUE, "debug_break_true", OPCODE_SIG_X_V, - OPCODE_FLAG_VOLATILE); + OPCODE_FLAG_VOLATILE) DEFINE_OPCODE( OPCODE_TRAP, "trap", OPCODE_SIG_X, - OPCODE_FLAG_VOLATILE); + OPCODE_FLAG_VOLATILE) DEFINE_OPCODE( OPCODE_TRAP_TRUE, "trap_true", OPCODE_SIG_X_V, - OPCODE_FLAG_VOLATILE); + OPCODE_FLAG_VOLATILE) DEFINE_OPCODE( OPCODE_CALL, "call", OPCODE_SIG_X_S, - OPCODE_FLAG_BRANCH); + OPCODE_FLAG_BRANCH) DEFINE_OPCODE( OPCODE_CALL_TRUE, "call_true", OPCODE_SIG_X_V_S, - OPCODE_FLAG_BRANCH); + OPCODE_FLAG_BRANCH) DEFINE_OPCODE( OPCODE_CALL_INDIRECT, "call_indirect", OPCODE_SIG_X_V, - OPCODE_FLAG_BRANCH); + OPCODE_FLAG_BRANCH) DEFINE_OPCODE( OPCODE_CALL_INDIRECT_TRUE, "call_indirect_true", OPCODE_SIG_X_V_V, - OPCODE_FLAG_BRANCH); + OPCODE_FLAG_BRANCH) DEFINE_OPCODE( OPCODE_CALL_EXTERN, "call_extern", OPCODE_SIG_X_S, - OPCODE_FLAG_BRANCH); + OPCODE_FLAG_BRANCH) DEFINE_OPCODE( OPCODE_RETURN, "return", OPCODE_SIG_X, - OPCODE_FLAG_BRANCH); + OPCODE_FLAG_BRANCH) DEFINE_OPCODE( OPCODE_RETURN_TRUE, "return_true", OPCODE_SIG_X_V, - OPCODE_FLAG_BRANCH); + OPCODE_FLAG_BRANCH) DEFINE_OPCODE( OPCODE_SET_RETURN_ADDRESS, "set_return_address", OPCODE_SIG_X_V, - 0); + 0) DEFINE_OPCODE( OPCODE_BRANCH, "branch", OPCODE_SIG_X_L, - OPCODE_FLAG_BRANCH); + OPCODE_FLAG_BRANCH) DEFINE_OPCODE( OPCODE_BRANCH_TRUE, "branch_true", OPCODE_SIG_X_V_L, - OPCODE_FLAG_BRANCH); + OPCODE_FLAG_BRANCH) DEFINE_OPCODE( OPCODE_BRANCH_FALSE, "branch_false", OPCODE_SIG_X_V_L, - OPCODE_FLAG_BRANCH); + OPCODE_FLAG_BRANCH) DEFINE_OPCODE( OPCODE_ASSIGN, "assign", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_CAST, "cast", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_ZERO_EXTEND, "zero_extend", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_SIGN_EXTEND, "sign_extend", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_TRUNCATE, "truncate", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_CONVERT, "convert", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_ROUND, "round", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_VECTOR_CONVERT_I2F, "vector_convert_i2f", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_VECTOR_CONVERT_F2I, "vector_convert_f2i", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_LOAD_VECTOR_SHL, "load_vector_shl", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_LOAD_VECTOR_SHR, "load_vector_shr", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_LOAD_CLOCK, "load_clock", OPCODE_SIG_V, - 0); + 0) DEFINE_OPCODE( OPCODE_LOAD_LOCAL, "load_local", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_STORE_LOCAL, "store_local", OPCODE_SIG_X_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_LOAD_CONTEXT, "load_context", OPCODE_SIG_V_O, - 0); + 0) DEFINE_OPCODE( OPCODE_STORE_CONTEXT, "store_context", OPCODE_SIG_X_O_V, - 0); + 0) DEFINE_OPCODE( OPCODE_LOAD, "load", OPCODE_SIG_V_V, - OPCODE_FLAG_MEMORY); + OPCODE_FLAG_MEMORY) DEFINE_OPCODE( OPCODE_STORE, "store", OPCODE_SIG_X_V_V, - OPCODE_FLAG_MEMORY); + OPCODE_FLAG_MEMORY) DEFINE_OPCODE( OPCODE_PREFETCH, "prefetch", OPCODE_SIG_X_V_O, - 0); + 0) DEFINE_OPCODE( OPCODE_MAX, "max", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_MIN, "min", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_SELECT, "select", OPCODE_SIG_V_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_IS_TRUE, "is_true", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_IS_FALSE, "is_false", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_COMPARE_EQ, "compare_eq", OPCODE_SIG_V_V_V, - OPCODE_FLAG_COMMUNATIVE); + OPCODE_FLAG_COMMUNATIVE) DEFINE_OPCODE( OPCODE_COMPARE_NE, "compare_ne", OPCODE_SIG_V_V_V, - OPCODE_FLAG_COMMUNATIVE); + OPCODE_FLAG_COMMUNATIVE) DEFINE_OPCODE( OPCODE_COMPARE_SLT, "compare_slt", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_COMPARE_SLE, "compare_sle", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_COMPARE_SGT, "compare_sgt", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_COMPARE_SGE, "compare_sge", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_COMPARE_ULT, "compare_ult", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_COMPARE_ULE, "compare_ule", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_COMPARE_UGT, "compare_ugt", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_COMPARE_UGE, "compare_uge", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_DID_CARRY, "did_carry", OPCODE_SIG_V_V, - OPCODE_FLAG_PAIRED_PREV); + OPCODE_FLAG_PAIRED_PREV) DEFINE_OPCODE( OPCODE_DID_OVERFLOW, "did_overflow", OPCODE_SIG_V_V, - OPCODE_FLAG_PAIRED_PREV); + OPCODE_FLAG_PAIRED_PREV) DEFINE_OPCODE( OPCODE_DID_SATURATE, "did_saturate", OPCODE_SIG_V_V, - OPCODE_FLAG_PAIRED_PREV); + OPCODE_FLAG_PAIRED_PREV) DEFINE_OPCODE( OPCODE_VECTOR_COMPARE_EQ, "vector_compare_eq", OPCODE_SIG_V_V_V, - OPCODE_FLAG_COMMUNATIVE); + OPCODE_FLAG_COMMUNATIVE) DEFINE_OPCODE( OPCODE_VECTOR_COMPARE_SGT, "vector_compare_sgt", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_VECTOR_COMPARE_SGE, "vector_compare_sge", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_VECTOR_COMPARE_UGT, "vector_compare_ugt", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_VECTOR_COMPARE_UGE, "vector_compare_uge", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_ADD, "add", OPCODE_SIG_V_V_V, - OPCODE_FLAG_COMMUNATIVE); + OPCODE_FLAG_COMMUNATIVE) DEFINE_OPCODE( OPCODE_ADD_CARRY, "add_carry", OPCODE_SIG_V_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_VECTOR_ADD, "vector_add", OPCODE_SIG_V_V_V, - OPCODE_FLAG_COMMUNATIVE); + OPCODE_FLAG_COMMUNATIVE) DEFINE_OPCODE( OPCODE_SUB, "sub", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_MUL, "mul", OPCODE_SIG_V_V_V, - OPCODE_FLAG_COMMUNATIVE); + OPCODE_FLAG_COMMUNATIVE) DEFINE_OPCODE( OPCODE_MUL_HI, "mul_hi", OPCODE_SIG_V_V_V, - OPCODE_FLAG_COMMUNATIVE); + OPCODE_FLAG_COMMUNATIVE) DEFINE_OPCODE( OPCODE_DIV, "div", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_MUL_ADD, "mul_add", OPCODE_SIG_V_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_MUL_SUB, "mul_sub", OPCODE_SIG_V_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_NEG, "neg", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_ABS, "abs", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_SQRT, "sqrt", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_RSQRT, "rsqrt", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_POW2, "pow2", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_LOG2, "log2", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_DOT_PRODUCT_3, "dot_product_3", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_DOT_PRODUCT_4, "dot_product_4", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_AND, "and", OPCODE_SIG_V_V_V, - OPCODE_FLAG_COMMUNATIVE); + OPCODE_FLAG_COMMUNATIVE) DEFINE_OPCODE( OPCODE_OR, "or", OPCODE_SIG_V_V_V, - OPCODE_FLAG_COMMUNATIVE); + OPCODE_FLAG_COMMUNATIVE) DEFINE_OPCODE( OPCODE_XOR, "xor", OPCODE_SIG_V_V_V, - OPCODE_FLAG_COMMUNATIVE); + OPCODE_FLAG_COMMUNATIVE) DEFINE_OPCODE( OPCODE_NOT, "not", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_SHL, "shl", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_VECTOR_SHL, "vector_shl", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_SHR, "shr", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_VECTOR_SHR, "vector_shr", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_SHA, "sha", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_VECTOR_SHA, "vector_sha", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_ROTATE_LEFT, "rotate_left", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_BYTE_SWAP, "byte_swap", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_CNTLZ, "cntlz", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_INSERT, "insert", OPCODE_SIG_V_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_EXTRACT, "extract", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_SPLAT, "splat", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_PERMUTE, "permute", OPCODE_SIG_V_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_SWIZZLE, "swizzle", OPCODE_SIG_V_V_O, - 0); + 0) DEFINE_OPCODE( OPCODE_PACK, "pack", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_UNPACK, "unpack", OPCODE_SIG_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_COMPARE_EXCHANGE, "compare_exchange", OPCODE_SIG_V_V_V_V, - OPCODE_FLAG_VOLATILE); + OPCODE_FLAG_VOLATILE) DEFINE_OPCODE( OPCODE_ATOMIC_EXCHANGE, "atomic_exchange", OPCODE_SIG_V_V_V, - OPCODE_FLAG_VOLATILE); + OPCODE_FLAG_VOLATILE) DEFINE_OPCODE( OPCODE_ATOMIC_ADD, "atomic_add", OPCODE_SIG_V_V_V, - 0); + 0) DEFINE_OPCODE( OPCODE_ATOMIC_SUB, "atomic_sub", OPCODE_SIG_V_V_V, - 0); + 0) diff --git a/src/alloy/hir/sources.gypi b/src/alloy/hir/sources.gypi index 948b43dd8..1ea2d7783 100644 --- a/src/alloy/hir/sources.gypi +++ b/src/alloy/hir/sources.gypi @@ -1,6 +1,7 @@ # Copyright 2013 Ben Vanik. All Rights Reserved. { 'sources': [ + 'block.cc', 'block.h', 'hir_builder.cc', 'hir_builder.h', diff --git a/src/alloy/hir/value.cc b/src/alloy/hir/value.cc index a684c6f2b..f70d6ceb2 100644 --- a/src/alloy/hir/value.cc +++ b/src/alloy/hir/value.cc @@ -560,6 +560,26 @@ void Value::ByteSwap() { } } +void Value::CountLeadingZeros(const ConstantValue& src) { + switch (type) { + case INT8_TYPE: + constant.i8 = __lzcnt16(src.i8) - 8; + break; + case INT16_TYPE: + constant.i8 = __lzcnt16(src.i16); + break; + case INT32_TYPE: + constant.i8 = __lzcnt(src.i32); + break; + case INT64_TYPE: + constant.i8 = __lzcnt64(src.i64); + break; + default: + XEASSERTALWAYS(); + break; + } +} + bool Value::Compare(Opcode opcode, Value* other) { // TODO(benvanik): big matrix. XEASSERTALWAYS(); diff --git a/src/alloy/hir/value.h b/src/alloy/hir/value.h index 4587efb19..e3af4906f 100644 --- a/src/alloy/hir/value.h +++ b/src/alloy/hir/value.h @@ -68,6 +68,10 @@ enum ValueFlags { VALUE_IS_ALLOCATED = (1 << 2), // Used by backends. Do not set. }; +struct RegAssignment { + const backend::MachineInfo::RegisterSet* set; + int32_t index; +}; class Value { public: @@ -91,10 +95,7 @@ public: TypeName type; uint32_t flags; - struct { - const backend::MachineInfo::RegisterSet* set; - int32_t index; - } reg; + RegAssignment reg; ConstantValue constant; Instr* def; @@ -392,6 +393,7 @@ public: void Shr(Value* other); void Sha(Value* other); void ByteSwap(); + void CountLeadingZeros(const ConstantValue& src); bool Compare(Opcode opcode, Value* other); }; diff --git a/third_party/xbyak b/third_party/xbyak index 702d6e668..2d599b3bd 160000 --- a/third_party/xbyak +++ b/third_party/xbyak @@ -1 +1 @@ -Subproject commit 702d6e6683c322f08a36ea059f6d6f8263b1bd0d +Subproject commit 2d599b3bd64a6d13c8b47a5f7410c67837bfff5d diff --git a/xenia.gyp b/xenia.gyp index e59823058..a765e5c00 100644 --- a/xenia.gyp +++ b/xenia.gyp @@ -24,6 +24,18 @@ 'target_arch%': 'x64', }, + 'conditions': [ + ['OS=="win"', { + 'variables': { + 'move_command%': 'move' + }, + }, { + 'variables': { + 'move_command%': 'mv' + }, + }] + ], + 'target_defaults': { 'include_dirs': [ 'include/', @@ -255,6 +267,7 @@ 'include_dirs': [ '.', 'src/', + '<(INTERMEDIATE_DIR)', ], 'includes': [