diff --git a/projectfiles/qtcreator/fba_qt.pro b/projectfiles/qtcreator/fba_qt.pro index 0af02a059..a257f2da3 100644 --- a/projectfiles/qtcreator/fba_qt.pro +++ b/projectfiles/qtcreator/fba_qt.pro @@ -45,6 +45,11 @@ GEN = $$SRC/dep/generated FBA_LD = ld #DEFINES += FBA_DEBUG +#------------------------------------------------------------------------------- +# Dynamic recompilers +#------------------------------------------------------------------------------- +DRC_MIPS3_X64 = true + #------------------------------------------------------------------------------- # Additional include paths #------------------------------------------------------------------------------- @@ -717,7 +722,8 @@ SOURCES += \ ../../src/cpu/mips3/cop1.cpp \ ../../src/cpu/mips3/dasm.cpp \ ../../src/cpu/mips3/mips3.cpp \ - ../../src/cpu/mips3_intf.cpp + ../../src/cpu/mips3_intf.cpp \ + HEADERS += \ @@ -900,7 +906,38 @@ HEADERS += \ ../../src/cpu/mips3/mips3_rw.h \ ../../src/cpu/mips3/mips3_shift.h \ ../../src/cpu/mips3/mipsdef.h \ - ../../src/cpu/mips3_intf.h + ../../src/cpu/mips3_intf.h \ + + +#------------------------------------------------------------------------------- +# MIPS3 x64 recompiler +#------------------------------------------------------------------------------- + +$$DRC_MIPS3_X64 { + message("MIPS3 x64 dynarec enabled") + DEFINES += \ + XBYAK_NO_OP_NAMES \ + MIPS3_X64_DRC + + HEADERS += \ + ../../src/cpu/mips3/x64/mips3_x64.h \ + ../../src/cpu/mips3/x64/mips3_x64_arithm.h \ + ../../src/cpu/mips3/x64/mips3_x64_bitops.h \ + ../../src/cpu/mips3/x64/mips3_x64_branch.h \ + ../../src/cpu/mips3/x64/mips3_x64_cop0.h \ + ../../src/cpu/mips3/x64/mips3_x64_cop1.h \ + ../../src/cpu/mips3/x64/mips3_x64_defs.h \ + ../../src/cpu/mips3/x64/mips3_x64_misc.h \ + ../../src/cpu/mips3/x64/mips3_x64_rw.h \ + ../../src/cpu/mips3/x64/mips3_x64_shift.h \ + ../../src/cpu/mips3/x64/xbyak/xbyak.h \ + ../../src/cpu/mips3/x64/xbyak/xbyak_bin2hex.h \ + ../../src/cpu/mips3/x64/xbyak/xbyak_mnemonic.h \ + ../../src/cpu/mips3/x64/xbyak/xbyak_util.h + + SOURCES += \ + ../../src/cpu/mips3/x64/mips3_x64.cpp +} #------------------------------------------------------------------------------- diff --git a/src/cpu/mips3/common.h b/src/cpu/mips3/common.h index 818a69c1e..9fd4100df 100644 --- a/src/cpu/mips3/common.h +++ b/src/cpu/mips3/common.h @@ -3,6 +3,12 @@ #include +#ifdef __GNUC__ +#define ALIGN_DECL(n) __attribute__ ((aligned (n))) +#elif __MSVC__ +#else +#define ALIGN_DECL(n) __declspec(align(n)) +#endif namespace mips { using namespace std; diff --git a/src/cpu/mips3/cop0.cpp b/src/cpu/mips3/cop0.cpp index aee6a033e..3f5fc5fbe 100644 --- a/src/cpu/mips3/cop0.cpp +++ b/src/cpu/mips3/cop0.cpp @@ -19,41 +19,7 @@ const char *mips3::cop0_reg_names[32] = { "ECC", "CacheErr", "TagLo", "TagHi", "ErrorEPC", "--" }; -enum { - INDEX = 0, - RANDOM, - ENTRYLO0, - ENTRYLO1, - CONTEXT, - PAGEMASK, - WIRED, - __COP0_UNUSED0, - BADVADDR, - COUNT, - ENTRYHI, - COMPARE, - SR, - CAUSE, - EPC, - PRId, - CONFIG, - LLADDR, - WATCHLO, - WATCHHI, - XCONTEXT, - __COP0_UNUSED1, - __COP0_UNUSED2, - __COP0_UNUSED3, - __COP0_UNUSED4, - __COP0_UNUSED5, - ECC, - CACHEERR, - TAGLO, - TAGHI, - ERROREPC -} ; - -#define COP0_R(x) m_state.cpr[0][x] +#define CR(x) m_state.cpr[0][x] void mips3::tlb_init() { @@ -74,7 +40,9 @@ void mips3::cop0_execute(uint32_t opcode) switch (RSNUM) { // MFC case 0x00: - RT = COP0_R(RDNUM & 0x1F); + if (RTNUM) { + RT = CR(RDNUM); + } break; // MTC @@ -84,16 +52,16 @@ void mips3::cop0_execute(uint32_t opcode) // TLBWI case 0x10: { - unsigned char idx = COP0_R(INDEX); + unsigned char idx = CR(COP0_Index); if (idx >= 48) { cout << "TLBWI index > 48" << endl; return; } tlb_entry *e = &m_tlb[idx]; - e->b.even_lo = COP0_R(ENTRYLO0); - e->b.odd_lo = COP0_R(ENTRYLO1); - e->b.hi = COP0_R(ENTRYHI); - e->b.pagemask = COP0_R(PAGEMASK); + e->b.even_lo = CR(COP0_EntryLo0); + e->b.odd_lo = CR(COP0_EntryLo1); + e->b.hi = CR(COP0_EntryHi); + e->b.pagemask = CR(COP0_PageMask); break; } diff --git a/src/cpu/mips3/mips3.cpp b/src/cpu/mips3/mips3.cpp index e691992d4..d535e9b04 100644 --- a/src/cpu/mips3/mips3.cpp +++ b/src/cpu/mips3/mips3.cpp @@ -28,6 +28,7 @@ const char *mips3::reg_names[32] = { mips3::mips3() : m_tlb_entries(48) { tlb_init(); + m_state.total_cycles = 0; } mips3::~mips3() @@ -48,6 +49,7 @@ void mips3::reset() cop1_reset(); tlb_flush(); m_counter = 0; + m_state.reset_cycle = m_state.total_cycles; } @@ -336,7 +338,7 @@ bool mips3::run(int cycles, bool skip_bps) // COP1 (FPU) > TODO case 0x11: - if (m_state.cpr[0][12] & (1 << 26)) + if (m_state.cpr[0][COP0_SR] & (1 << 26)) cop1_execute_32(opcode); else cop1_execute_16(opcode); @@ -414,11 +416,12 @@ bool mips3::run(int cycles, bool skip_bps) } // Increment COP0 Count - m_state.cpr[0][9] += 20; + m_state.cpr[0][COP0_Count] += 20; m_counter++; if (!skip_bps && check_breakpoint()) return true; } + m_state.total_cycles += m_counter; return false; } diff --git a/src/cpu/mips3/mips3.h b/src/cpu/mips3/mips3.h index fde179972..767cd42e2 100644 --- a/src/cpu/mips3/mips3.h +++ b/src/cpu/mips3/mips3.h @@ -13,6 +13,40 @@ namespace mips { +enum COP0_Registers { + COP0_Index = 0, + COP0_Random, + COP0_EntryLo0, + COP0_EntryLo1, + COP0_Context, + COP0_PageMask, + COP0_Wired, + COP0_Unused0, + COP0_BadVAddr, + COP0_Count, + COP0_EntryHi, + COP0_Compare, + COP0_SR, + COP0_Cause, + COP0_EPC, + COP0_PRId, + COP0_Config, + COP0_LLAddr, + COP0_WatchLo, + COP0_WatchHi, + COP0_XContext, + COP0_Unused1, + COP0_Unused2, + COP0_Unused3, + COP0_Unused4, + COP0_Unused5, + COP0_ECC, + COP0_CacheErr, + COP0_TagLo, + COP0_TagHi, + COP0_ErrorEPC +}; + #ifdef MIPS3_X64_DRC class mips3_x64; #endif @@ -62,7 +96,10 @@ public: uint64_t cpr[3][32]; // fpu control registers (FCR) uint64_t fcr[32]; - } __attribute__ ((aligned (16))) m_state; + uint64_t reset_cycle; + uint64_t total_cycles; + }; + ALIGN_DECL(16) cpu_state m_state; addr_t m_prev_pc; static const char *reg_names[]; diff --git a/src/cpu/mips3/x64/mips3_x64.cpp b/src/cpu/mips3/x64/mips3_x64.cpp new file mode 100644 index 000000000..2bec6e059 --- /dev/null +++ b/src/cpu/mips3/x64/mips3_x64.cpp @@ -0,0 +1,506 @@ +/* + * Copyright (c) 2015, Marcos Medeiros + * Licensed under BSD 3-clause. + */ +#include +#include "mips3_x64.h" +#include "xbyak/xbyak.h" +#include "../mips3.h" +#include "../mipsdef.h" +#include "../memory.h" +#include "mips3_x64_defs.h" +#include "mips3_x64_rw.h" +#include "mips3_x64_branch.h" +#include "mips3_x64_bitops.h" +#include "mips3_x64_arithm.h" +#include "mips3_x64_shift.h" +#include "mips3_x64_misc.h" +#include "mips3_x64_cop0.h" +#include "mips3_x64_cop1.h" + +#ifdef HAS_UDIS86 +#include "udis86/udis86.h" +#endif + +#define LOG_DYNAREC 0 +#define LOG_DYNAREC_DASM 0 +#define FULL_FALLBACK 0 + +namespace mips +{ + +mips3_x64::mips3_x64(mips3 *interpreter) : CodeGenerator(1024 * 1024 * 16) +{ + m_core = interpreter; + m_blocks.clear(); + +#ifdef HAS_UDIS86 + ud_init(&m_udobj); + ud_set_mode(&m_udobj, 64); + ud_set_syntax(&m_udobj, UD_SYN_INTEL); +#endif +} + +inline void *mips3_x64::get_block(addr_t pc) +{ + if (m_blocks.find(pc) == m_blocks.end()) + return nullptr; + return m_blocks[pc]; +} + + +void mips3_x64::run(int cycles) +{ + m_icounter = cycles; + m_stop_translation = false; + m_translate_failed = false; + m_is_delay_slot = false; + + void *recompiled_code; + while (m_icounter > 0) { + recompiled_code = get_block(m_core->m_state.pc); + + if (recompiled_code == nullptr) { + try { + auto ptr = compile_block(m_core->m_state.pc); + if (m_translate_failed) + break; + + m_blocks[m_core->m_state.pc] = ptr; + recompiled_code = ptr; + } catch(Xbyak::Error& e) { + // code flush + if (e == Xbyak::ERR_CODE_IS_TOO_BIG) { + drc_log("Flushing recompiler cache...\n"); + m_blocks.clear(); + reset(); + recompiled_code = nullptr; + } else { + drc_log("%s", e.what()); + exit(-1); + } + } + } + if (recompiled_code) + Xbyak::CastTo(recompiled_code)(); + } + + if (m_translate_failed) + drc_log_error("Translation failed at PC: %X", m_drc_pc); +} + + + +void mips3_x64::prolog() +{ + // R15 = cycle counter + // RBX = cpu_state base + push(rbp); + push(rbx); + push(r15); + mov(rbp, rsp); + sub(rsp, 16); + mov(rbx, ADR(m_core->m_state)); + mov(r15, ADR(m_icounter)); + mov(r15, ptr[r15]); + + check_icounter(); +} + +void mips3_x64::epilog(bool do_ret) +{ + add(rsp, 16); + mov(rax, ADR(m_icounter)); + mov(ptr[rax], r15); + pop(r15); + pop(rbx); + pop(rbp); + if (do_ret) + ret(); +} + + + +void *mips3_x64::compile_block(addr_t pc) +{ + static int depth = 0; + m_drc_pc = pc; + +#if LOG_DYNAREC + drc_log("Recompile block at %X\n", m_drc_pc); +#endif + + uint32_t opcode; + addr_t eaddr; + bool do_recompile = true; + + void *block_ptr = Xbyak::CastTo(getCurr()); + + prolog(); + + m_block_icounter = 0; + + while (do_recompile) { + m_core->translate(m_drc_pc, &eaddr); + opcode = mem::read_word(eaddr); + m_drc_pc += 4; + m_block_icounter++; + if (compile_instruction(opcode)) { + // Jump Instr + do_recompile = false; + } + } + + ready(); + +#if LOG_DYNAREC && defined(HAS_UDIS86) + if (!depth) { + drc_log("=======================================\n"); + drc_log("Generated code for %x\n", pc); + drc_log("=======================================\n"); + // show recompiled code + ud_set_input_buffer(&m_udobj, (unsigned char *) block_ptr, + (Xbyak::CastTo(getCurr()) - Xbyak::CastTo(block_ptr))); + ud_set_pc(&m_udobj, (uint64_t) block_ptr); + uint64_t dasm_pc = (uint64_t) block_ptr; + unsigned k; + while ((k = ud_disassemble(&m_udobj))) { + drc_log("%08X %s\n", dasm_pc, ud_insn_asm(&m_udobj)); + dasm_pc += k; + } + } +#endif + return block_ptr; +} + + +bool mips3_x64::compile_instruction(uint32_t opcode) +{ +#if LOG_DYNAREC_DASM + drc_log("%s\n", m_core->dasm(opcode, m_drc_pc - 4).c_str()); +#endif + + bool result = false; + switch (opcode >> 26) { + // SPECIAL + case 0x00: + { + switch (opcode & 0x3F) { + + +#if !FULL_FALLBACK + case 0x00: result = SLL(opcode); break; + case 0x02: result = SRL(opcode); break; + case 0x03: result = SRA(opcode); break; + case 0x04: result = SLLV(opcode); break; + case 0x06: result = SRLV(opcode); break; + case 0x07: result = SRAV(opcode); break; + case 0x14: result = DSLLV(opcode); break; + case 0x16: result = DSLRV(opcode); break; + case 0x17: result = DSRAV(opcode); break; + case 0x38: result = DSLL(opcode); break; + case 0x3A: result = DSRL(opcode); break; + case 0x3B: result = DSRA(opcode); break; + case 0x3C: result = DSLL32(opcode); break; + case 0x3E: result = DSRL32(opcode); break; + case 0x3F: result = DSRA32(opcode); break; + + case 0x10: result = MFHI(opcode); break; + case 0x11: result = MTHI(opcode); break; + case 0x12: result = MFLO(opcode); break; + case 0x13: result = MTLO(opcode); break; + + case 0x18: result = MULT(opcode); break; + case 0x19: result = MULTU(opcode); break; + case 0x1A: result = DIV(opcode); break; + case 0x1B: result = DIVU(opcode); break; + case 0x1C: result = DMULT(opcode); break; + case 0x1E: result = DDIV(opcode); break; + case 0x1F: result = DDIVU(opcode); break; + + case 0x20: result = ADD(opcode); break; + case 0x21: result = ADDU(opcode); break; + case 0x22: result = SUB(opcode); break; + case 0x23: result = SUBU(opcode); break; + case 0x2C: result = DADD(opcode); break; + case 0x2D: result = DADDU(opcode); break; + case 0x2F: result = DSUBU(opcode); break; + + case 0x24: result = AND(opcode); break; + case 0x25: result = OR(opcode); break; + case 0x26: result = XOR(opcode); break; + case 0x27: result = NOR(opcode); break; + + case 0x2A: result = SLT(opcode); break; + case 0x2B: result = SLTU(opcode); break; + +#else + case 0x00: fallback(opcode, &mips3::SLL); break; + case 0x02: fallback(opcode, &mips3::SRL); break; + case 0x03: fallback(opcode, &mips3::SRA); break; + case 0x04: fallback(opcode, &mips3::SLLV); break; + case 0x06: fallback(opcode, &mips3::SRLV); break; + case 0x07: fallback(opcode, &mips3::SRAV); break; + case 0x14: fallback(opcode, &mips3::DSLLV); break; + case 0x16: fallback(opcode, &mips3::DSLRV); break; + case 0x38: fallback(opcode, &mips3::DSLL); break; + case 0x3A: fallback(opcode, &mips3::DSRL); break; + case 0x3B: fallback(opcode, &mips3::DSRA); break; + case 0x3C: fallback(opcode, &mips3::DSLL32); break; + case 0x3E: fallback(opcode, &mips3::DSRL32); break; + case 0x3F: fallback(opcode, &mips3::DSRA32); break; + + case 0x10: fallback(opcode, &mips3::MFHI); break; + case 0x11: fallback(opcode, &mips3::MTHI); break; + case 0x12: fallback(opcode, &mips3::MFLO); break; + case 0x13: fallback(opcode, &mips3::MTLO); break; + + case 0x18: fallback(opcode, &mips3::MULT); break; + case 0x19: fallback(opcode, &mips3::MULTU); break; + case 0x1A: fallback(opcode, &mips3::DIV); break; + case 0x1B: fallback(opcode, &mips3::DIVU); break; + case 0x1C: fallback(opcode, &mips3::DMULT); break; + case 0x1E: fallback(opcode, &mips3::DDIV); break; + case 0x1F: fallback(opcode, &mips3::DDIVU); break; + case 0x20: fallback(opcode, &mips3::ADD); break; + case 0x21: fallback(opcode, &mips3::ADDU); break; + case 0x22: fallback(opcode, &mips3::SUB); break; + case 0x23: fallback(opcode, &mips3::SUBU); break; + case 0x2D: fallback(opcode, &mips3::DADDU); break; + case 0x2F: fallback(opcode, &mips3::DSUBU); break; + + case 0x24: fallback(opcode, &mips3::AND); break; + case 0x25: fallback(opcode, &mips3::OR); break; + case 0x26: fallback(opcode, &mips3::XOR); break; + case 0x27: fallback(opcode, &mips3::NOR); break; + + case 0x2A: fallback(opcode, &mips3::SLT); break; + case 0x2B: fallback(opcode, &mips3::SLTU); break; + +#endif + + case 0x08: result = JR(opcode); break; + case 0x09: result = JALR(opcode); break; + case 0x0C: drc_log("syscall %08X\n", m_drc_pc); break; + case 0x0D: drc_log("break %08X\n", m_drc_pc); break; + default: + drc_log_error("%08X %X [special]\n", m_drc_pc, opcode & 0x3F); + translate_failed(); + result = true; + exit(-3); + break; + } + break; + } + + // REGIMM + case 0x01: + { + switch ((opcode >> 16) & 0x1F) { + case 0x00: result = BLTZ(opcode); break; + case 0x01: result = BGEZ(opcode); break; + case 0x10: result = BLTZAL(opcode); break; + case 0x11: result = BGEZAL(opcode); break; + default: + drc_log_error("%08X %X [regimm]\n", m_drc_pc, (opcode >> 16) & 0x1F); + translate_failed(); + result = true; + exit(-3); + break; + } + + break; + } + + case 0x02: result = J(opcode); break; + case 0x03: result = JAL(opcode); break; + case 0x04: result = BEQ(opcode); break; + case 0x05: result = BNE(opcode); break; + case 0x06: result = BLEZ(opcode); break; + case 0x07: result = BGTZ(opcode); break; + +#if !FULL_FALLBACK + case 0x08: result = ADDI(opcode); break; + case 0x09: result = ADDIU(opcode); break; + case 0x0A: result = SLTI(opcode); break; + case 0x0B: result = SLTIU(opcode); break; + case 0x0C: result = ANDI(opcode); break; + case 0x0D: result = ORI(opcode); break; + case 0x0E: result = XORI(opcode); break; + case 0x0F: result = LUI(opcode); break; +#else + case 0x08: fallback(opcode, &mips3::ADDI); break; + case 0x09: fallback(opcode, &mips3::ADDIU); break; + case 0x0A: fallback(opcode, &mips3::SLTI); break; + case 0x0B: fallback(opcode, &mips3::SLTIU); break; + case 0x0C: fallback(opcode, &mips3::ANDI); break; + case 0x0D: fallback(opcode, &mips3::ORI); break; + case 0x0E: fallback(opcode, &mips3::XORI); break; + case 0x0F: fallback(opcode, &mips3::LUI); break; +#endif + case 0x10: result = compile_cop0(opcode); break; +#if !FULL_FALLBACK + case 0x11: result = compile_cop1(opcode); break; + case 0x18: result = DADDI(opcode); break; + case 0x19: result = DADDIU(opcode); break; +#else + case 0x11: fallback(opcode, &mips3::cop1_execute_32); break; + case 0x18: fallback(opcode, &mips3::DADDI); break; + case 0x19: fallback(opcode, &mips3::DADDIU); break; +#endif + + case 0x2F: break; + + // Load & Store fallbacks +#if 0 /* !FULL_FALLBACK */ /* Use fallbacks */ + case 0x20: LB(opcode); break; + case 0x24: LBU(opcode); break; + case 0x21: LH(opcode); break; + case 0x25: LHU(opcode); break; + case 0x23: LW(opcode); break; + case 0x27: LWU(opcode); break; + case 0x37: LD(opcode); break; + + case 0x28: SB(opcode); break; + case 0x29: SH(opcode); break; + case 0x2B: SW(opcode); break; + case 0x3F: SD(opcode); break; + + case 0x22: LWL(opcode); break; + case 0x26: LWR(opcode); break; + + case 0x1A: fallback(opcode, &mips3::LDL); break; + case 0x1B: fallback(opcode, &mips3::LDR); break; + + case 0x2C: fallback(opcode, &mips3::SDL); break; + case 0x2D: fallback(opcode, &mips3::SDR); break; + + case 0x31: fallback(opcode, &mips3::LWC1); break; + case 0x39: fallback(opcode, &mips3::SWC1); break; + +#else + case 0x20: fallback(opcode, &mips3::LB); break; + case 0x24: fallback(opcode, &mips3::LBU); break; + case 0x21: fallback(opcode, &mips3::LH); break; + case 0x25: fallback(opcode, &mips3::LHU); break; + case 0x23: fallback(opcode, &mips3::LW); break; + case 0x27: fallback(opcode, &mips3::LWU); break; + case 0x37: fallback(opcode, &mips3::LD); break; + + case 0x28: fallback(opcode, &mips3::SB); break; + case 0x29: fallback(opcode, &mips3::SH); break; + case 0x2B: fallback(opcode, &mips3::SW); break; + case 0x3F: fallback(opcode, &mips3::SD); break; + + case 0x22: fallback(opcode, &mips3::LWL); break; + case 0x26: fallback(opcode, &mips3::LWR); break; + + case 0x1A: fallback(opcode, &mips3::LDL); break; + case 0x1B: fallback(opcode, &mips3::LDR); break; + + case 0x2C: fallback(opcode, &mips3::SDL); break; + case 0x2D: fallback(opcode, &mips3::SDR); break; + + case 0x31: fallback(opcode, &mips3::LWC1); break; + case 0x39: fallback(opcode, &mips3::SWC1); break; +#endif + + default: + drc_log_error("%08X Op %X\n", m_drc_pc, opcode >> 26); + translate_failed(); + result = true; + exit(-3); + break; + } + + return result; +} + +void mips3_x64::check_icounter() +{ + inLocalLabel(); + cmp(r15, 0); + jg(".l"); + set_next_pc(m_drc_pc); + epilog(); + L(".l"); + outLocalLabel(); +} + +bool mips3_x64::cop1_fallback(uint32_t opcode) +{ + inLocalLabel(); + mov(rax, COP0_x(COP0_SR)); + + mov(rcx, 1 << 26); + test(rax, rcx); + jne(".fr0"); + fallback(opcode, &mips3::cop1_execute_32); + jmp(".end"); + L(".fr0"); + fallback(opcode, &mips3::cop1_execute_16); + L(".end"); + + outLocalLabel(); + return false; +} + +void mips3_x64::set_next_pc(addr_t addr) +{ + mov(PC_q, addr); +} + +void mips3_x64::fallback(uint32_t opcode, void (mips3::*f)(uint32_t)) +{ + // SysV AMD64 ABI - GNU + mov(rdi, (size_t) m_core); + mov(esi, opcode); + mov(rax, (size_t) (void*)f); + call(rax); +} + +void mips3_x64::update_icounter() +{ + sub(r15, m_block_icounter); + mov(rax, ADR(m_core->m_state.total_cycles)); + add(qword[rax], m_block_icounter); + m_block_icounter = 0; +} + +void mips3_x64::jmp_to_block(uint64_t addr) +{ + // Simple block linking + void *next_ptr = get_block(addr); + if (next_ptr) { +#if LOG_DYNAREC + drc_log("Block link: %08X to %08X\n", CORE_PC, addr); +#endif + epilog(false); + mov(rax, (size_t) next_ptr); + jmp(rax); + return; + } else { + set_next_pc(addr); + } + epilog(); +} + +void mips3_x64::jmp_to_register(int reg) +{ + mov(ecx, Rn_x(reg)); + mov(PC_x, rcx); + epilog(); +} + +void mips3_x64::translate_failed() +{ + m_translate_failed = true; + update_icounter(); + epilog(); +} + + + +} + + diff --git a/src/cpu/mips3/x64/mips3_x64.h b/src/cpu/mips3/x64/mips3_x64.h new file mode 100644 index 000000000..e4d2ef686 --- /dev/null +++ b/src/cpu/mips3/x64/mips3_x64.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2015, Marcos Medeiros + * Licensed under BSD 3-clause. + */ +#ifndef MIPS3_X64 +#define MIPS3_X64 + +#include +#include "xbyak/xbyak.h" +#include "../mips3.h" + +#ifdef HAS_UDIS86 +#include "udis86/udis86.h" +#endif + +namespace mips +{ + + +class mips3_x64 : public Xbyak::CodeGenerator +{ +public: + mips3_x64(mips3 *interpreter); + void run(int cycles); + +private: + int64_t m_icounter; + addr_t m_drc_pc; + bool m_is_delay_slot; + void run_this(void *ptr); + void *compile_block(addr_t pc); + void *get_block(addr_t pc); + bool compile_special(uint32_t opcode); + bool compile_regimm(uint32_t opcode); + bool compile_instruction(uint32_t opcode); + bool compile_cop0(uint32_t opcode); + bool compile_cop1(uint32_t opcode); + void check_icounter(); + bool cop1_fallback(uint32_t opcode); + void set_next_pc(addr_t addr); + void fallback(uint32_t opcode, void (mips3::*f)(uint32_t)); + void update_icounter(); + void jmp_to_block(uint64_t addr); + void jmp_to_register(int reg); + void translate_failed(); + void update_cp0_count(); + void prolog(); + void epilog(bool do_ret=true); + + uint8_t *m_cache; + mips3 *m_core; + void *m_current_block; + uint64_t m_block_icounter; + bool m_translate_failed; + bool m_stop_translation; + unordered_map m_blocks; +#ifdef HAS_UDIS86 + ud_t m_udobj; +#endif + + // COP1 branch + bool BC1F(uint32_t opcode); + bool BC1FL(uint32_t opcode); + bool BC1T(uint32_t opcode); + bool BC1TL(uint32_t opcode); + + // Arithmetic + bool ADD(uint32_t opcode); + bool SUB(uint32_t opcode); + bool MULT(uint32_t opcode); + bool DIV(uint32_t opcode); + bool ADDU(uint32_t opcode); + bool SUBU(uint32_t opcode); + bool MULTU(uint32_t opcode); + bool DIVU(uint32_t opcode); + + bool ADDI(uint32_t opcode); + bool ADDIU(uint32_t opcode); + bool DADDI(uint32_t opcode); + bool DADDIU(uint32_t opcode); + + bool DADD(uint32_t opcode); + bool DSUB(uint32_t opcode); + bool DMULT(uint32_t opcode); + bool DDIV(uint32_t opcode); + bool DADDU(uint32_t opcode); + bool DSUBU(uint32_t opcode); + bool DMULTU(uint32_t opcode); + bool DDIVU(uint32_t opcode); + + // Bitwise + bool AND(uint32_t opcode); + bool XOR(uint32_t opcode); + bool OR(uint32_t opcode); + bool NOR(uint32_t opcode); + bool ANDI(uint32_t opcode); + bool XORI(uint32_t opcode); + bool ORI(uint32_t opcode); + + // Shifts + bool SLL(uint32_t opcode); + bool SRL(uint32_t opcode); + bool SRA(uint32_t opcode); + bool SLLV(uint32_t opcode); + bool SRLV(uint32_t opcode); + bool SRAV(uint32_t opcode); + bool DSLLV(uint32_t opcode); + bool DSLRV(uint32_t opcode); + bool SLT(uint32_t opcode); + bool SLTU(uint32_t opcode); + bool DSLL(uint32_t opcode); + bool DSRL(uint32_t opcode); + bool DSRA(uint32_t opcode); + bool DSRAV(uint32_t opcode); + bool DSLL32(uint32_t opcode); + bool DSRL32(uint32_t opcode); + bool DSRA32(uint32_t opcode); + + // Jump & Branchs + bool J(uint32_t opcode); + bool JR(uint32_t opcode); + bool JAL(uint32_t opcode); + bool JALR(uint32_t opcode); + bool BLTZ(uint32_t opcode); + bool BLTZAL(uint32_t opcode); + bool BGEZ(uint32_t opcode); + bool BGEZAL(uint32_t opcode); + bool BEQ(uint32_t opcode); + bool BNE(uint32_t opcode); + bool BLEZ(uint32_t opcode); + bool BGTZ(uint32_t opcode); + + // Load & Store + bool LUI(uint32_t opcode); + bool SB(uint32_t opcode); + bool SH(uint32_t opcode); + bool SW(uint32_t opcode); + bool SD(uint32_t opcode); + bool SDL(uint32_t opcode); + bool SDR(uint32_t opcode); + bool LWL(uint32_t opcode); + bool LWR(uint32_t opcode); + bool LDL(uint32_t opcode); + bool LDR(uint32_t opcode); + bool LB(uint32_t opcode); + bool LBU(uint32_t opcode); + bool LH(uint32_t opcode); + bool LHU(uint32_t opcode); + bool LW(uint32_t opcode); + bool LWU(uint32_t opcode); + bool LD(uint32_t opcode); + bool LL(uint32_t opcode); + bool LWC1(uint32_t opcode); + bool SWC1(uint32_t opcode); + + // Misc + bool SLTI(uint32_t opcode); + bool SLTIU(uint32_t opcode); + bool MFHI(uint32_t opcode); + bool MTHI(uint32_t opcode); + bool MFLO(uint32_t opcode); + bool MTLO(uint32_t opcode); + +}; + +} + +#endif // MIPS3_X64 + diff --git a/src/cpu/mips3/x64/mips3_x64_arithm.h b/src/cpu/mips3/x64/mips3_x64_arithm.h new file mode 100644 index 000000000..5bbb5e206 --- /dev/null +++ b/src/cpu/mips3/x64/mips3_x64_arithm.h @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2015, Marcos Medeiros + * Licensed under BSD 3-clause. + */ +#ifndef MIPS3_X64_ARITHM +#define MIPS3_X64_ARITHM + + +#include "../mips3.h" +#include "mips3_x64_defs.h" +#include "mips3_x64.h" +#include "xbyak/xbyak.h" + +namespace mips +{ + +inline bool mips3_x64::ADD(uint32_t opcode) +{ + if (RDNUM) { + mov(eax, RS_x); + add(eax, RT_x); + cdqe(); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::ADDU(uint32_t opcode) +{ + if (RDNUM) { + mov(eax, RS_x); + add(eax, RT_x); + cdqe(); + mov(RD_x, rax); + } + return false; +} + +// TODO: Overflow exception +inline bool mips3_x64::ADDI(uint32_t opcode) +{ + if (RTNUM) { + mov(eax, RS_x); + add(eax, (int32_t)SIMM); + cdqe(); + mov(RT_x, rax); + } + return false; +} + +inline bool mips3_x64::ADDIU(uint32_t opcode) +{ + if (RTNUM) { + mov(eax, RS_x); + add(eax, (int32_t)SIMM); + cdqe(); + mov(RT_x, rax); + } + return false; +} + +inline bool mips3_x64::DADDI(uint32_t opcode) +{ + if (RTNUM) { + mov(rax, RS_x); + add(rax, (size_t)(int32_t)SIMM); + mov(RT_x, rax); + } + return false; +} + +inline bool mips3_x64::DADDIU(uint32_t opcode) +{ + if (RTNUM) { + mov(rax, RS_x); + add(rax, (size_t)(int32_t)SIMM); + mov(RT_x, rax); + } + return false; +} + +// TODO: Overflow exception +inline bool mips3_x64::DADD(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RS_x); + add(rax, RT_x); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::DADDU(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RS_x); + add(rax, RT_x); + mov(RD_x, rax); + } + return false; +} + + +// TODO: Overflow exception +inline bool mips3_x64::SUB(uint32_t opcode) +{ + if (RDNUM) { + mov(eax, RS_x); + sub(eax, RT_x); + cdqe(); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::SUBU(uint32_t opcode) +{ + if (RDNUM) { + mov(eax, RS_x); + sub(eax, RT_x); + cdqe(); + mov(RD_x, rax); + } + return false; +} + + +// TODO: Overflow exception +inline bool mips3_x64::DSUB(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RS_x); + sub(rax, RT_x); + mov(RD_x, rax); + } + return false; +} + + +inline bool mips3_x64::DSUBU(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RS_x); + sub(rax, RT_x); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::MULT(uint32_t opcode) +{ + mov(eax, RS_x); + mov(ecx, RT_x); + imul(ecx); + movsxd(rdx, edx); + movsxd(rax, eax); + mov(HI_x, rdx); + mov(LO_x, rax); + + m_block_icounter += 2; + return false; +} + +inline bool mips3_x64::MULTU(uint32_t opcode) +{ + mov(eax, RS_x); + mov(ecx, RT_x); + mul(ecx); + movsxd(rdx, edx); + movsxd(rax, eax); + mov(HI_x, rdx); + mov(LO_x, rax); + + m_block_icounter += 2; + return false; +} + + +inline bool mips3_x64::DIV(uint32_t opcode) +{ + if (RTNUM) { + inLocalLabel(); + mov(rcx, RT_x); + cmp(rcx, 0); + je(".end"); + + mov(eax, RS_x); + cdq(); + idiv(ecx); + cdqe(); + + mov(LO_x, rax); + movsxd(rcx, edx); + mov(HI_x, rdx); + L(".end"); + outLocalLabel(); + + } + m_block_icounter += 34; + return false; +} + + +inline bool mips3_x64::DIVU(uint32_t opcode) +{ + if (RTNUM) { + inLocalLabel(); + mov(ecx, RT_x); + cmp(ecx, 0); + je(".end"); + + mov(eax, RS_x); + xor_(edx, edx); + div(ecx); + cdqe(); + + mov(LO_x, rax); + movsxd(rcx, edx); + mov(HI_x, rdx); + L(".end"); + outLocalLabel(); + + } + m_block_icounter += 34; + return false; +} + +inline bool mips3_x64::DMULT(uint32_t opcode) +{ + mov(rax, RS_x); + mov(rcx, RT_x); + imul(rcx); + mov(HI_x, rdx); + mov(LO_x, rax); + m_block_icounter += 6; + return false; +} + +inline bool mips3_x64::DDIV(uint32_t opcode) +{ + if (RTNUM) { + inLocalLabel(); + mov(rcx, RT_x); + cmp(rcx, 0); + je(".end"); + + mov(rax, RS_x); + cqo(); + idiv(rcx); + + mov(LO_x, rax); + mov(HI_x, rdx); + L(".end"); + outLocalLabel(); + + } + m_block_icounter += 66; + return false; +} + +inline bool mips3_x64::DMULTU(uint32_t opcode) +{ + mov(rax, RS_x); + mov(rcx, RT_x); + mul(rcx); + mov(HI_x, rdx); + mov(LO_x, rax); + m_block_icounter += 6; + return false; +} + +inline bool mips3_x64::DDIVU(uint32_t opcode) +{ + if (RTNUM) { + inLocalLabel(); + mov(rcx, RT_x); + cmp(rcx, 0); + je(".end"); + + mov(rax, RS_x); + cqo(); + div(rcx); + + mov(LO_x, rax); + mov(HI_x, rdx); + L(".end"); + outLocalLabel(); + + } + m_block_icounter += 66; + return false; +} + +} + +#endif // MIPS3_X64_ARITHM + diff --git a/src/cpu/mips3/x64/mips3_x64_bitops.h b/src/cpu/mips3/x64/mips3_x64_bitops.h new file mode 100644 index 000000000..34361fd6b --- /dev/null +++ b/src/cpu/mips3/x64/mips3_x64_bitops.h @@ -0,0 +1,87 @@ +#ifndef MIPS3_X64_BITOPS +#define MIPS3_X64_BITOPS + +#include "../mips3.h" +#include "mips3_x64_defs.h" +#include "mips3_x64.h" +#include "xbyak/xbyak.h" + +namespace mips +{ + + +inline bool mips3_x64::AND(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RS_x); + and_(rax, RT_x); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::XOR(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RS_x); + xor_(rax, RT_x); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::OR(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RS_x); + or_(rax, RT_x); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::NOR(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RS_x); + or_(rax, RT_x); + not_(rax); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::ANDI(uint32_t opcode) +{ + if (RTNUM) { + mov(rax, RS_x); + and_(rax, IMM); + mov(RT_x, rax); + } + return false; +} + +inline bool mips3_x64::XORI(uint32_t opcode) +{ + if (RTNUM) { + mov(rax, RS_x); + xor_(rax, IMM); + mov(RT_x, rax); + } + return false; +} + +inline bool mips3_x64::ORI(uint32_t opcode) +{ + if (RTNUM) { + mov(rax, RS_x); + or_(rax, IMM); + mov(RT_x, rax); + } + return false; +} + +} + +#endif // MIPS3_X64_BITOPS + diff --git a/src/cpu/mips3/x64/mips3_x64_branch.h b/src/cpu/mips3/x64/mips3_x64_branch.h new file mode 100644 index 000000000..69d873ea9 --- /dev/null +++ b/src/cpu/mips3/x64/mips3_x64_branch.h @@ -0,0 +1,396 @@ +/* + * Copyright (c) 2015, Marcos Medeiros + * Licensed under BSD 3-clause. + */ +#ifndef MIPS3_X64_BRANCH +#define MIPS3_X64_BRANCH + +#include "../mips3.h" +#include "../memory.h" +#include "mips3_x64_defs.h" +#include "mips3_x64.h" +#include "xbyak/xbyak.h" + +namespace mips +{ + +bool mips3_x64::J(uint32_t opcode) +{ + if (m_is_delay_slot) + return true; + + addr_t eaddr = 0; + addr_t nextpc = (m_drc_pc & 0xF0000000) | (TARGET << 2); + + m_core->translate(m_drc_pc, &eaddr); + uint32_t next_opcode = mem::read_word(eaddr); + + m_drc_pc += 4; + m_is_delay_slot = true; + if (compile_instruction(next_opcode)) { + drc_log("Branch on delay slot!!! aborting..."); + exit(-1); + } + m_is_delay_slot = false; + + update_icounter(); + + jmp_to_block(nextpc); + return true; +} + +inline bool mips3_x64::JR(uint32_t opcode) +{ + if (m_is_delay_slot) + return true; + + addr_t eaddr; + m_core->translate(m_drc_pc, &eaddr); + uint32_t next_opcode = mem::read_word(eaddr); + + m_drc_pc += 4; + m_is_delay_slot = true; + if (compile_instruction(next_opcode)) { + drc_log("Branch on delay slot!!! aborting..."); + exit(-1); + } + m_is_delay_slot = false; + + update_icounter(); + + jmp_to_register(RSNUM); + return true; +} + +inline bool mips3_x64::JAL(uint32_t opcode) +{ + if (m_is_delay_slot) + return true; + + addr_t eaddr = 0; + addr_t nextpc = (m_drc_pc & 0xF0000000) | (TARGET << 2); + + m_core->translate(m_drc_pc, &eaddr); + uint32_t next_opcode = mem::read_word(eaddr); + + m_drc_pc += 4; + m_is_delay_slot = true; + if (compile_instruction(next_opcode)) { + drc_log("Branch on delay slot!!! aborting..."); + exit(-1); + } + m_is_delay_slot = false; + + update_icounter(); + + mov(eax, (uint32_t)m_drc_pc); + cdqe(); + mov(rcx, R_ref(31)); + mov(ptr[rcx], rax); + + jmp_to_block(nextpc); + return true; +} + +inline bool mips3_x64::JALR(uint32_t opcode) +{ + if (m_is_delay_slot) + return true; + + addr_t eaddr; + m_core->translate(m_drc_pc, &eaddr); + uint32_t next_opcode = mem::read_word(eaddr); + + m_drc_pc += 4; + m_is_delay_slot = true; + if (compile_instruction(next_opcode)) { + drc_log("Branch on delay slot!!! aborting..."); + exit(-1); + } + m_is_delay_slot = false; + + update_icounter(); + + mov(eax, (uint32_t)m_drc_pc); + cdqe(); + mov(rcx, R_ref(31)); + mov(ptr[rcx], rax); + + jmp_to_register(RSNUM); + return true; +} + +inline bool mips3_x64::BLTZ(uint32_t opcode) +{ + if (m_is_delay_slot) + return true; + + addr_t nextpc = m_drc_pc + ((int32_t)(SIMM) << 2); + addr_t eaddr = 0; + + m_core->translate(m_drc_pc, &eaddr); + uint32_t next_opcode = mem::read_word(eaddr); + + m_drc_pc += 4; + + update_icounter(); + + inLocalLabel(); + + cmp(RS_q, 0); + jge(".false"); + { + m_is_delay_slot = true; + compile_instruction(next_opcode); + m_is_delay_slot = false; + jmp_to_block(nextpc); + } + + L(".false"); + compile_instruction(next_opcode); + outLocalLabel(); + check_icounter(); + return false; +} + +inline bool mips3_x64::BLTZAL(uint32_t opcode) +{ + if (m_is_delay_slot) + return true; + + addr_t nextpc = m_drc_pc + ((int32_t)(SIMM) << 2); + addr_t eaddr = 0; + + m_core->translate(m_drc_pc, &eaddr); + uint32_t next_opcode = mem::read_word(eaddr); + + m_drc_pc += 4; + + update_icounter(); + + inLocalLabel(); + + cmp(RS_q, 0); + jge(".false"); + { + mov(eax, (uint32_t)m_drc_pc); + cdqe(); + mov(Rn_x(31), rax); + + m_is_delay_slot = true; + compile_instruction(next_opcode); + m_is_delay_slot = false; + jmp_to_block(nextpc); + } + + L(".false"); + compile_instruction(next_opcode); + outLocalLabel(); + check_icounter(); + return false; +} + + +inline bool mips3_x64::BGEZ(uint32_t opcode) +{ + if (m_is_delay_slot) + return true; + + addr_t nextpc = m_drc_pc + ((int32_t)(SIMM) << 2); + addr_t eaddr = 0; + + m_core->translate(m_drc_pc, &eaddr); + uint32_t next_opcode = mem::read_word(eaddr); + + m_drc_pc += 4; + + update_icounter(); + + inLocalLabel(); + cmp(RS_q, 0); + jl(".false"); + { + m_is_delay_slot = true; + compile_instruction(next_opcode); + m_is_delay_slot = false; + jmp_to_block(nextpc); + } + + L(".false"); + compile_instruction(next_opcode); + outLocalLabel(); + check_icounter(); + return false; +} + +inline bool mips3_x64::BGEZAL(uint32_t opcode) +{ + if (m_is_delay_slot) + return true; + + addr_t nextpc = m_drc_pc + ((int32_t)(SIMM) << 2); + addr_t eaddr = 0; + + m_core->translate(m_drc_pc, &eaddr); + uint32_t next_opcode = mem::read_word(eaddr); + + m_drc_pc += 4; + + update_icounter(); + + inLocalLabel(); + cmp(RS_q, 0); + jl(".false"); + { + mov(eax, (uint32_t)m_drc_pc); + cdqe(); + mov(Rn_x(31), rax); + + m_is_delay_slot = true; + compile_instruction(next_opcode); + m_is_delay_slot = false; + jmp_to_block(nextpc); + } + + L(".false"); + compile_instruction(next_opcode); + outLocalLabel(); + check_icounter(); + return false; +} + +inline bool mips3_x64::BEQ(uint32_t opcode) +{ + if (m_is_delay_slot) + return true; + + addr_t nextpc = m_drc_pc + ((int32_t)(SIMM) << 2); + addr_t eaddr = 0; + + m_core->translate(m_drc_pc, &eaddr); + uint32_t next_opcode = mem::read_word(eaddr); + + m_drc_pc += 4; + + update_icounter(); + + inLocalLabel(); + mov(rax, RS_x); + cmp(rax, RT_x); + jne(".false"); + { + m_is_delay_slot = true; + compile_instruction(next_opcode); + m_is_delay_slot = false; + jmp_to_block(nextpc); + } + + L(".false"); + compile_instruction(next_opcode); + outLocalLabel(); + check_icounter(); + return false; +} + + +inline bool mips3_x64::BNE(uint32_t opcode) +{ + if (m_is_delay_slot) + return true; + + addr_t nextpc = m_drc_pc + ((int32_t)(SIMM) << 2); + addr_t eaddr = 0; + + m_core->translate(m_drc_pc, &eaddr); + uint32_t next_opcode = mem::read_word(eaddr); + + m_drc_pc += 4; + + update_icounter(); + + inLocalLabel(); + mov(rax, RS_x); + cmp(rax, RT_x); + je(".false"); + { + m_is_delay_slot = true; + compile_instruction(next_opcode); + m_is_delay_slot = false; + jmp_to_block(nextpc); + } + + L(".false"); + compile_instruction(next_opcode); + outLocalLabel(); + check_icounter(); + return false; +} + +inline bool mips3_x64::BLEZ(uint32_t opcode) +{ + if (m_is_delay_slot) + return true; + + addr_t nextpc = m_drc_pc + ((int32_t)(SIMM) << 2); + addr_t eaddr = 0; + + m_core->translate(m_drc_pc, &eaddr); + uint32_t next_opcode = mem::read_word(eaddr); + + m_drc_pc += 4; + + update_icounter(); + + inLocalLabel(); + cmp(RS_q, 0); + jg(".false"); + { + m_is_delay_slot = true; + compile_instruction(next_opcode); + m_is_delay_slot = false; + jmp_to_block(nextpc); + } + + L(".false"); + compile_instruction(next_opcode); + outLocalLabel(); + check_icounter(); + return false; +} + +inline bool mips3_x64::BGTZ(uint32_t opcode) +{ + if (m_is_delay_slot) + return true; + + addr_t nextpc = m_drc_pc + ((int32_t)(SIMM) << 2); + addr_t eaddr = 0; + + m_core->translate(m_drc_pc, &eaddr); + uint32_t next_opcode = mem::read_word(eaddr); + + m_drc_pc += 4; + + update_icounter(); + + inLocalLabel(); + cmp(RS_q, 0); + jle(".false"); + { + m_is_delay_slot = true; + compile_instruction(next_opcode); + m_is_delay_slot = false; + jmp_to_block(nextpc); + } + + L(".false"); + compile_instruction(next_opcode); + outLocalLabel(); + check_icounter(); + return false; +} + +} + +#endif // MIPS3_X64_BRANCH + diff --git a/src/cpu/mips3/x64/mips3_x64_cop1.h b/src/cpu/mips3/x64/mips3_x64_cop1.h new file mode 100644 index 000000000..7c4525247 --- /dev/null +++ b/src/cpu/mips3/x64/mips3_x64_cop1.h @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2015, Marcos Medeiros + * Licensed under BSD 3-clause. + */ +#ifndef MIPS3_X64_COP1 +#define MIPS3_X64_COP1 + + +#include "../mips3.h" +#include "mips3_x64_defs.h" +#include "mips3_x64.h" +#include "xbyak/xbyak.h" + +namespace mips +{ + +bool mips3_x64::BC1F(uint32_t opcode) +{ + if (m_is_delay_slot) + return true; + + addr_t nextpc = m_drc_pc + ((int32_t)(SIMM) << 2); + addr_t eaddr = 0; + + m_core->translate(m_drc_pc, &eaddr); + uint32_t next_opcode = mem::read_word(eaddr); + + m_drc_pc += 4; + + update_icounter(); + + inLocalLabel(); + mov(rcx, FCR_ref(31)); + mov(rcx, ptr[rcx]); + not_(ecx); + and_(ecx, 0x800000); + test(ecx, ecx); + je(".false"); + { + m_is_delay_slot = true; + compile_instruction(next_opcode); + m_is_delay_slot = false; + jmp_to_block(nextpc); + } + + L(".false"); + compile_instruction(next_opcode); + outLocalLabel(); + check_icounter(); + return false; + +} + +bool mips3_x64::BC1FL(uint32_t opcode) +{ + if (m_is_delay_slot) + return true; + + addr_t nextpc = m_drc_pc + ((int32_t)(SIMM) << 2); + addr_t eaddr = 0; + + m_core->translate(m_drc_pc, &eaddr); + uint32_t next_opcode = mem::read_word(eaddr); + + m_drc_pc += 4; + + update_icounter(); + + inLocalLabel(); + mov(rcx, FCR_ref(31)); + mov(rcx, ptr[rcx]); + not_(ecx); + and_(ecx, 0x800000); + test(ecx, ecx); + je(".false"); + { + m_is_delay_slot = true; + compile_instruction(next_opcode); + m_is_delay_slot = false; + jmp_to_block(nextpc); + } + + L(".false"); + outLocalLabel(); + check_icounter(); + return false; + + return false; +} + +bool mips3_x64::BC1T(uint32_t opcode) +{ + if (m_is_delay_slot) + return true; + + addr_t nextpc = m_drc_pc + ((int32_t)(SIMM) << 2); + addr_t eaddr = 0; + + m_core->translate(m_drc_pc, &eaddr); + uint32_t next_opcode = mem::read_word(eaddr); + + m_drc_pc += 4; + + update_icounter(); + + inLocalLabel(); + mov(rcx, FCR_ref(31)); + mov(rcx, ptr[rcx]); + and_(ecx, 0x800000); + test(ecx, ecx); + je(".false"); + { + m_is_delay_slot = true; + compile_instruction(next_opcode); + m_is_delay_slot = false; + jmp_to_block(nextpc); + } + + L(".false"); + compile_instruction(next_opcode); + outLocalLabel(); + check_icounter(); + return false; + +} + +bool mips3_x64::BC1TL(uint32_t opcode) +{ + if (m_is_delay_slot) + return true; + + addr_t nextpc = m_drc_pc + ((int32_t)(SIMM) << 2); + addr_t eaddr = 0; + + m_core->translate(m_drc_pc, &eaddr); + uint32_t next_opcode = mem::read_word(eaddr); + + m_drc_pc += 4; + + update_icounter(); + + inLocalLabel(); + mov(rcx, FCR_ref(31)); + mov(rcx, ptr[rcx]); + and_(ecx, 0x800000); + test(ecx, ecx); + je(".false"); + { + m_is_delay_slot = true; + compile_instruction(next_opcode); + m_is_delay_slot = false; + jmp_to_block(nextpc); + } + + L(".false"); + outLocalLabel(); + check_icounter(); + return false; +} + +bool mips3_x64::compile_cop1(uint32_t opcode) +{ + bool result = false; + + switch (RSNUM) { + // MFC1 rt, rd + case 0x00: + if (RTNUM) { + mov(rax, FPR_ref(RDNUM)); + mov(rcx, RT_ref); + mov(eax, ptr[rax]); + cdqe(); + mov(ptr[rcx], rax); + } + break; + + // DMFC1 rt, rd + case 0x01: + if (RTNUM) { + mov(rax, FPR_ref(RDNUM)); + mov(rcx, RT_ref); + mov(rax, ptr[rax]); + mov(ptr[rcx], rax); + } + break; + + // CFC1 rt, fs + case 0x02: + if (RTNUM) { + mov(rax, FCR_ref(FSNUM)); + mov(rcx, RT_ref); + mov(eax, ptr[rax]); + cdqe(); + mov(ptr[rcx], rax); + } + break; + + // MTC1 rt, fs + case 0x04: + mov(rax, FPR_ref(FSNUM)); + mov(rcx, RT_ref); + mov(ecx, ptr[rcx]); + mov(ptr[rax], rcx); + break; + + // DMTC1 rt, fs + case 0x05: + mov(rax, FPR_ref(FSNUM)); + mov(rcx, RT_ref); + mov(rcx, ptr[rcx]); + mov(ptr[rax], rcx); + break; + + // CTC1 rt, fs + case 0x06: + mov(rcx, FCR_ref(FSNUM)); + mov(rax, RT_ref); + mov(eax, ptr[rax]); + cdqe(); + mov(ptr[rcx], rax); + break; + + // BC + case 0x08: + { + switch ((opcode >> 16) & 3) { + // BC1F offset + case 0x00: result = BC1F(opcode); break; + + // BC1FL offset + case 0x02: result = BC1FL(opcode); break; + + // BC1T offset + case 0x01: result = BC1T(opcode); break; + break; + + // BC1TL offset + case 0x03: result = BC1TL(opcode); break; + } + break; + } + + default: + fallback(opcode, &mips3::cop1_execute_32); + break; + } + return result; +} + +} + +#endif // MIPS3_X64_COP1 + diff --git a/src/cpu/mips3/x64/mips3_x64_defs.h b/src/cpu/mips3/x64/mips3_x64_defs.h new file mode 100644 index 000000000..9b4709ff5 --- /dev/null +++ b/src/cpu/mips3/x64/mips3_x64_defs.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2015, Marcos Medeiros + * Licensed under BSD 3-clause. + */ +#ifndef MIPS3_X64_DEFS +#define MIPS3_X64_DEFS + +#include +#include "../mipsdef.h" + +#define DEBUG_DRC 1 + +#if DEBUG_DRC +# define drc_log(...) printf("drc: " __VA_ARGS__); fflush(stdout) +#else +# define drc_log(...) +#endif + +#define drc_log_error(...) printf("drc_err: " __VA_ARGS__); fflush(stdout) + +#define DEBUG_CALL(f) \ + mov(rax, (size_t)(void*)&f); \ + call(rax); + +#define CORE_PC (m_core->m_state.pc) +#define RS_ref ((size_t)&m_core->m_state.r[RSNUM]) +#define RD_ref ((size_t)&m_core->m_state.r[RDNUM]) +#define RT_ref ((size_t)&m_core->m_state.r[RTNUM]) +#define LO_ref ((size_t)&m_core->m_state.lo) +#define HI_ref ((size_t)&m_core->m_state.hi) +#define R_ref(n) ((size_t)&m_core->m_state.r[n]) +#define ADR(n) ((size_t)&n) +#define F_ADR(f) ((size_t)(void*)&f) + +#define FPR_ref(n) ((size_t)&m_core->m_state.cpr[1][n]) +#define FCR_ref(n) ((size_t)&m_core->m_state.fcr[n]) + +#define Rn_x(n) ptr[rbx + ((size_t)offsetof(mips3::cpu_state, r[n]))] +#define RS_x ptr[rbx + ((size_t)offsetof(mips3::cpu_state, r[RSNUM]))] +#define RD_x ptr[rbx + ((size_t)offsetof(mips3::cpu_state, r[RDNUM]))] +#define RT_x ptr[rbx + ((size_t)offsetof(mips3::cpu_state, r[RTNUM]))] +#define LO_x ptr[rbx + ((size_t)offsetof(mips3::cpu_state, lo))] +#define HI_x ptr[rbx + ((size_t)offsetof(mips3::cpu_state, hi))] +#define PC_x ptr[rbx + ((size_t)offsetof(mips3::cpu_state, pc))] + +#define COP0_x(n) ptr[rbx + ((size_t)offsetof(mips3::cpu_state, cpr[0][n]))] +#define COP1_x(n) ptr[rbx + ((size_t)offsetof(mips3::cpu_state, cpr[1][n]))] + +#define TOTAL_x ptr[rbx + ((size_t)offsetof(mips3::cpu_state, total_cycles))] +#define RSTCYC_x ptr[rbx + ((size_t)offsetof(mips3::cpu_state, reset_cycle))] + +#define RS_q qword[rbx + ((size_t)offsetof(mips3::cpu_state, r[RSNUM]))] +#define RD_q qword[rbx + ((size_t)offsetof(mips3::cpu_state, r[RDNUM]))] +#define RT_q qword[rbx + ((size_t)offsetof(mips3::cpu_state, r[RTNUM]))] +#define PC_q qword[rbx + ((size_t)offsetof(mips3::cpu_state, pc))] + +#define COP0_q(n) qword[rbx + ((size_t)offsetof(mips3::cpu_state, cpr[0][n]))] +#define COP1_q(n) qword[rbx + ((size_t)offsetof(mips3::cpu_state, cpr[1][n]))] + +#define TOTAL_q qword[rbx + ((size_t)offsetof(mips3::cpu_state, total_cycles))] +#define RSTCYC_q qword[rbx + ((size_t)offsetof(mips3::cpu_state, reset_cycle))] + +#endif // MIPS3_X64_DEFS + diff --git a/src/cpu/mips3/x64/mips3_x64_misc.h b/src/cpu/mips3/x64/mips3_x64_misc.h new file mode 100644 index 000000000..6c78b2871 --- /dev/null +++ b/src/cpu/mips3/x64/mips3_x64_misc.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2015, Marcos Medeiros + * Licensed under BSD 3-clause. + */ +#ifndef MIPS3_X64_MISC +#define MIPS3_X64_MISC + + +#include "../mips3.h" +#include "mips3_x64_defs.h" +#include "mips3_x64.h" +#include "xbyak/xbyak.h" + + +namespace mips +{ + +inline bool mips3_x64::SLT(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RS_x); + cmp(rax, RT_x); + setl(dl); + movzx(edx, dl); + mov(RD_x, rdx); + } + return false; +} + +inline bool mips3_x64::SLTU(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RS_x); + cmp(rax, RT_x); + setb(dl); + movzx(edx, dl); + mov(RD_x, rdx); + } + return false; +} + +inline bool mips3_x64::SLTI(uint32_t opcode) +{ + if (RTNUM) { + mov(rax, RS_x); + cmp(rax, IMM_s64); + setl(dl); + movzx(edx, dl); + mov(RT_x, rdx); + } + return false; +} + +inline bool mips3_x64::SLTIU(uint32_t opcode) +{ + if (RTNUM) { + mov(rax, RS_x); + cmp(rax, IMM); + setb(dl); + movzx(edx, dl); + mov(RT_x, rdx); + } + return false; +} + +inline bool mips3_x64::MFHI(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, HI_x); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::MTHI(uint32_t opcode) +{ + mov(rax, RS_x); + mov(HI_x, rax); + return false; +} + +inline bool mips3_x64::MFLO(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, LO_x); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::MTLO(uint32_t opcode) +{ + mov(rax, RS_x); + mov(LO_x, rax); + return false; +} + + +} + +#endif // MIPS3_X64_MISC + diff --git a/src/cpu/mips3/x64/mips3_x64_rw.h b/src/cpu/mips3/x64/mips3_x64_rw.h new file mode 100644 index 000000000..093d4e54c --- /dev/null +++ b/src/cpu/mips3/x64/mips3_x64_rw.h @@ -0,0 +1,393 @@ +/* + * Copyright (c) 2015, Marcos Medeiros + * Licensed under BSD 3-clause. + */ +#ifndef MIPS3_X64_RW +#define MIPS3_X64_RW + + +#include "../mips3.h" +#include "mips3_x64_defs.h" +#include "mips3_x64.h" +#include "xbyak/xbyak.h" +#include "../memory.h" + + +namespace mips +{ + +#define GET_EADDR_IN_RDX(ignore) \ + do { \ + auto eaddr = ptr[rbp-8];\ + mov(rdi, (size_t) m_core);\ + mov(rsi, (int64_t)(int32_t)SIMM);\ + add(rsi, RS_x);\ + and_(rsi, ~((uint64_t)ignore));\ + lea(rdx, eaddr);\ + mov(rax, F_ADR(mips3::translate));\ + call(rax);\ + mov(rdx, eaddr);\ + } while (0) + +#define GET_EA_RDX_VA_RCX(ignore) \ + do { \ + auto eaddr = ptr[rbp-8];\ + auto vaddr = ptr[rbp-16];\ + mov(rdi, (size_t) m_core);\ + mov(rsi, (int64_t)(int32_t)SIMM);\ + add(rsi, RS_x);\ + and_(rsi, ~((uint64_t)ignore));\ + lea(rdx, eaddr);\ + mov(vaddr, rsi);\ + mov(rax, F_ADR(mips3::translate));\ + call(rax);\ + mov(rdx, eaddr);\ + mov(rcx, vaddr);\ + } while (0) + +bool mips3_x64::LUI(uint32_t opcode) +{ + if (RTNUM) { + int64_t value = (int32_t)(IMM << 16); + mov(RT_q, value); + } + return false; +} + +bool mips3_x64::SB(uint32_t opcode) +{ + GET_EADDR_IN_RDX(0); + mov(rdi, rdx); + mov(rsi, RT_x); + mov(rax, F_ADR(mem::write_byte)); + call(rax); + return false; +} + +bool mips3_x64::SH(uint32_t opcode) +{ + GET_EADDR_IN_RDX(1); + mov(rdi, rdx); + mov(rsi, RT_x); + mov(rax, F_ADR(mem::write_half)); + call(rax); + return false; +} + +bool mips3_x64::SW(uint32_t opcode) +{ + GET_EADDR_IN_RDX(3); + mov(rdi, rdx); + mov(rsi, RT_x); + mov(rax, F_ADR(mem::write_word)); + call(rax); + return false; +} + +bool mips3_x64::SD(uint32_t opcode) +{ + GET_EADDR_IN_RDX(7); + mov(rdi, rdx); + mov(rsi, RT_x); + mov(rax, F_ADR(mem::write_dword)); + call(rax); + return false; +} + +// TODO: FIX IT +bool mips3_x64::SDL(uint32_t opcode) +{ + return false; +} + +// TODO: FIX IT +bool mips3_x64::SDR(uint32_t opcode) +{ + return false; +} + + +bool mips3_x64::LWL(uint32_t opcode) +{ + if (RTNUM) { + GET_EA_RDX_VA_RCX(3); + auto shift = ptr[rbp-8]; + auto mask = ptr[rbp-16]; + + mov(rax, rcx); + and_(rax, 3); + xor_(rax, 3); + shl(rax, 3); + mov(shift, rax); + + mov(rax, 0); + not_(rax); + mov(rcx, shift); + shl(rax, cl); + mov(mask, rax); + + mov(rdi, rdx); + mov(rax, F_ADR(mem::read_dword)); + call(rax); + + mov(rcx, shift); + shl(rax, cl); + + mov(rcx, RT_x); + mov(rdi, mask); + not_(rdi); + and_(rcx, rdi); + or_(rax, rcx); + + cdqe(); + mov(RT_x, rax); + + } + return false; +} + + +bool mips3_x64::LWR(uint32_t opcode) +{ + if (RTNUM) { + GET_EA_RDX_VA_RCX(3); + + auto shift = ptr[rbp-8]; + auto mask = ptr[rbp-16]; + + mov(rax, rcx); + and_(rax, 3); + shl(rax, 3); + mov(shift, rax); + + xor_(rax, rax); + not_(rax); + mov(rcx, shift); + shr(rax, cl); + mov(mask, rax); + + mov(rdi, rdx); + mov(rax, F_ADR(mem::read_word)); + call(rax); + + mov(rcx, shift); + shr(rax, cl); + + mov(rcx, RT_x); + mov(rdi, mask); + not_(rdi); + and_(rcx, rdi); + or_(rax, rcx); + + cdqe(); + mov(RT_x, rax); + + } + return false; +} + + +// Válido apenas para little endian. +bool mips3_x64::LDL(uint32_t opcode) +{ + if (RTNUM) { + GET_EA_RDX_VA_RCX(7); + auto shift = ptr[rbp-8]; + auto mask = ptr[rbp-16]; + + mov(rax, rcx); + and_(rax, 7); + xor_(rax, 7); + shl(rax, 3); + mov(shift, rax); + + mov(rax, 0); + not_(rax); + mov(rcx, shift); + shl(rax, cl); + mov(mask, rax); + + mov(rdi, rdx); + mov(rax, F_ADR(mem::read_dword)); + call(rax); + + mov(rcx, shift); + shl(rax, cl); + + mov(rcx, RT_x); + mov(rdi, mask); + not_(rdi); + and_(rcx, rdi); + or_(rax, rcx); + + mov(RT_x, rax); + + } + return false; +} + +// Válido apenas para little endian. +/* + uint32_t vaddr = ((int32_t)SIMM) + RS; + + int shift = (vaddr & 7) * 8; + uint64_t mask = (0xFFFFFFFFFFFFFFFFULL >> shift); + + addr_t eaddr; + if (translate(vaddr & ~7, &eaddr)) { + } + //d18 + auto data = mem::read_dword(eaddr); + + if (RTNUM) + RT = (RT & ~mask) | (data >> shift);*/ +bool mips3_x64::LDR(uint32_t opcode) +{ + if (RTNUM) { + GET_EA_RDX_VA_RCX(7); + + auto shift = ptr[rbp-8]; + auto mask = ptr[rbp-16]; + + mov(rax, rcx); + and_(rax, 7); + shl(rax, 3); + mov(shift, rax); + + xor_(rax, rax); + not_(rax); + mov(rcx, shift); + shr(rax, cl); + mov(mask, rax); + + mov(rdi, rdx); + mov(rax, F_ADR(mem::read_dword)); + call(rax); + + mov(rcx, shift); + shr(rax, cl); + + mov(rcx, RT_x); + mov(rdi, mask); + not_(rdi); + and_(rcx, rdi); + or_(rax, rcx); + + mov(RT_x, rax); + } + return false; +} + + +bool mips3_x64::LW(uint32_t opcode) +{ + if (RTNUM) { + GET_EADDR_IN_RDX(3); + mov(rdi, rdx); + mov(rax, F_ADR(mem::read_word)); + call(rax); + cdqe(); + mov(RT_x, rax); + } + return false; +} + +bool mips3_x64::LWU(uint32_t opcode) +{ + if (RTNUM) { + GET_EADDR_IN_RDX(3); + mov(rdi, rdx); + mov(rax, F_ADR(mem::read_word)); + call(rax); + mov(RT_x, rax); + } + return false; +} + +bool mips3_x64::LD(uint32_t opcode) +{ + if (RTNUM) { + GET_EADDR_IN_RDX(7); + mov(rdi, rdx); + mov(rax, F_ADR(mem::read_dword)); + call(rax); + mov(RT_x, rax); + } + return false; +} + +bool mips3_x64::LL(uint32_t opcode) +{ + return false; +} + +// TODO: FIX IT +bool mips3_x64::LWC1(uint32_t opcode) +{ + return false; +} + +// TODO: FIX IT +bool mips3_x64::SWC1(uint32_t opcode) +{ + return false; +} + +bool mips3_x64::LB(uint32_t opcode) +{ + if (RTNUM) { + GET_EADDR_IN_RDX(0); + mov(rdi, rdx); + mov(rax, F_ADR(mem::read_byte)); + call(rax); + movsx(eax, al); + cdqe(); + mov(RT_x, rax); + } + return false; +} + +bool mips3_x64::LBU(uint32_t opcode) +{ + if (RTNUM) { + GET_EADDR_IN_RDX(0); + mov(rdi, rdx); + mov(rax, F_ADR(mem::read_byte)); + call(rax); + movzx(eax, al); + mov(RT_x, rax); + } + return false; +} + +bool mips3_x64::LH(uint32_t opcode) +{ + if (RTNUM) { + GET_EADDR_IN_RDX(1); + mov(rdi, rdx); + mov(rax, F_ADR(mem::read_half)); + call(rax); + movsx(eax, ax); + cdqe(); + mov(RT_x, rax); + } + return false; +} + +bool mips3_x64::LHU(uint32_t opcode) +{ + if (RTNUM) { + GET_EADDR_IN_RDX(1); + mov(rdi, rdx); + mov(rax, F_ADR(mem::read_half)); + call(rax); + movzx(eax, ax); + mov(RT_x, rax); + } + return false; +} + +} + +#endif // MIPS3_X64_RW + diff --git a/src/cpu/mips3/x64/mips3_x64_shift.h b/src/cpu/mips3/x64/mips3_x64_shift.h new file mode 100644 index 000000000..da95f5aca --- /dev/null +++ b/src/cpu/mips3/x64/mips3_x64_shift.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2015, Marcos Medeiros + * Licensed under BSD 3-clause. + */ +#ifndef MIPS3_X64_SHIFT +#define MIPS3_X64_SHIFT + + +#include "../mips3.h" +#include "mips3_x64_defs.h" +#include "mips3_x64.h" +#include "xbyak/xbyak.h" + + +namespace mips +{ + +inline bool mips3_x64::SLL(uint32_t opcode) +{ + if (RDNUM && SHAMT) { + mov(eax, RT_x); + shl(eax, SHAMT); + cdqe(); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::SRL(uint32_t opcode) +{ + if (RDNUM && SHAMT) { + mov(eax, RT_x); + shr(eax, SHAMT); + cdqe(); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::SRA(uint32_t opcode) +{ + if (RDNUM) { + mov(eax, RT_x); + sar(eax, SHAMT); + cdqe(); + mov(RD_x, rax); + } + return false; +} + + +inline bool mips3_x64::SLLV(uint32_t opcode) +{ + if (RDNUM) { + mov(eax, RT_x); + mov(ecx, RS_x); + and_(ecx, 31); + shl(eax, cl); + cdqe(); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::SRLV(uint32_t opcode) +{ + if (RDNUM) { + mov(eax, RT_x); + mov(ecx, RS_x); + and_(ecx, 31); + shr(eax, cl); + cdqe(); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::SRAV(uint32_t opcode) +{ + if (RDNUM) { + mov(eax, RT_x); + mov(ecx, RS_x); + and_(ecx, 31); + sar(eax, cl); + cdqe(); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::DSLLV(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RT_x); + mov(rcx, RS_x); + and_(rcx, 63); + shl(rax, cl); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::DSLRV(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RT_x); + mov(rcx, RS_x); + and_(rcx, 63); + shr(rax, cl); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::DSRAV(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RT_x); + mov(rcx, RS_x); + and_(rcx, 63); + sar(rax, cl); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::DSLL(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RT_x); + shl(rax, SHAMT); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::DSRL(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RT_x); + shr(rax, SHAMT); + mov(RD_x, rax); + + } + return false; +} + +inline bool mips3_x64::DSRA(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RT_x); + sar(rax, SHAMT & 63); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::DSLL32(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RT_x); + shl(rax, SHAMT + 32); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::DSRL32(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RT_x); + shr(rax, SHAMT + 32); + mov(RD_x, rax); + } + return false; +} + +inline bool mips3_x64::DSRA32(uint32_t opcode) +{ + if (RDNUM) { + mov(rax, RT_x); + sar(rax, SHAMT + 32); + mov(RD_x, rax); + } + return false; +} + + + +} + +#endif // MIPS3_X64_SHIFT + diff --git a/src/cpu/mips3/x64/xbyak/xbyak.h b/src/cpu/mips3/x64/xbyak/xbyak.h new file mode 100644 index 000000000..19a89568f --- /dev/null +++ b/src/cpu/mips3/x64/xbyak/xbyak.h @@ -0,0 +1,2092 @@ +#pragma once +#ifndef XBYAK_XBYAK_H_ +#define XBYAK_XBYAK_H_ +/*! + @file xbyak.h + @brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++ + @author herumi + @url https://github.com/herumi/xbyak, http://homepage1.nifty.com/herumi/soft/xbyak_e.html + @note modified new BSD license + http://opensource.org/licenses/BSD-3-Clause +*/ +#ifndef XBYAK_NO_OP_NAMES + #if not +0 // trick to detect whether 'not' is operator or not + #error "use -fno-operator-names option if you want to use and(), or(), xor(), not() as function names, Or define XBYAK_NO_OP_NAMES and use and_(), or_(), xor_(), not_()." + #endif +#endif + +#include // for debug print +#include +#include +#include +#include +#ifndef NDEBUG +#include +#endif + +//#define XBYAK_USE_MMAP_ALLOCATOR +#ifndef __GNUC__ + #undef XBYAK_USE_MMAP_ALLOCATOR +#endif + +// This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft. +#if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\ + ((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__))) + #include + #define XBYAK_STD_UNORDERED_MAP std::unordered_map + #define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap + +// Clang/llvm-gcc and ICC-EDG in 'GCC-mode' always claim to be GCC 4.2, using +// libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version). +// These headers have been expanded/fixed in various forks. +// In F.S.F. 'real' GCC, issues with the tr headers were resolved in GCC 4.5. +#elif defined(__GNUC__) && (__GNUC__ >= 4) && ((__GNUC_MINOR__ >= 5) || \ + ((__GLIBCXX__ >= 20070719) && (__GNUC_MINOR__ >= 2) && \ + (defined(__INTEL_COMPILER) || defined(__llvm__)))) + #include + #define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map + #define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap + +#elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600) + #include + #define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map + #define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap + +#else + #include + #define XBYAK_STD_UNORDERED_MAP std::map + #define XBYAK_STD_UNORDERED_MULTIMAP std::multimap +#endif +#ifdef _WIN32 + #include + #include +#elif defined(__GNUC__) + #include + #include + #include +#endif +#if !defined(_MSC_VER) || (_MSC_VER >= 1600) + #include +#endif + +#if defined(_WIN64) || defined(__MINGW64__) || (defined(__CYGWIN__) && defined(__x86_64__)) + #define XBYAK64_WIN +#elif defined(__x86_64__) + #define XBYAK64_GCC +#endif +#if !defined(XBYAK64) && !defined(XBYAK32) + #if defined(XBYAK64_GCC) || defined(XBYAK64_WIN) + #define XBYAK64 + #else + #define XBYAK32 + #endif +#endif + +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable : 4514) /* remove inline function */ + #pragma warning(disable : 4786) /* identifier is too long */ + #pragma warning(disable : 4503) /* name is too long */ + #pragma warning(disable : 4127) /* constant expresison */ +#endif + +namespace Xbyak { + +#include "xbyak_bin2hex.h" + +enum { + DEFAULT_MAX_CODE_SIZE = 4096, + VERSION = 0x4710 /* 0xABCD = A.BC(D) */ +}; + +#ifndef MIE_INTEGER_TYPE_DEFINED +#define MIE_INTEGER_TYPE_DEFINED +#ifdef _MSC_VER + typedef unsigned __int64 uint64; + typedef __int64 sint64; +#else + typedef uint64_t uint64; + typedef int64_t sint64; +#endif +typedef unsigned int uint32; +typedef unsigned short uint16; +typedef unsigned char uint8; +#endif + +#ifndef MIE_ALIGN + #ifdef _MSC_VER + #define MIE_ALIGN(x) __declspec(align(x)) + #else + #define MIE_ALIGN(x) __attribute__((aligned(x))) + #endif +#endif +#ifndef MIE_PACK // for shufps + #define MIE_PACK(x, y, z, w) ((x) * 64 + (y) * 16 + (z) * 4 + (w)) +#endif + +enum { + ERR_NONE = 0, + ERR_BAD_ADDRESSING, + ERR_CODE_IS_TOO_BIG, + ERR_BAD_SCALE, + ERR_ESP_CANT_BE_INDEX, + ERR_BAD_COMBINATION, + ERR_BAD_SIZE_OF_REGISTER, + ERR_IMM_IS_TOO_BIG, + ERR_BAD_ALIGN, + ERR_LABEL_IS_REDEFINED, + ERR_LABEL_IS_TOO_FAR, + ERR_LABEL_IS_NOT_FOUND, + ERR_CODE_ISNOT_COPYABLE, + ERR_BAD_PARAMETER, + ERR_CANT_PROTECT, + ERR_CANT_USE_64BIT_DISP, + ERR_OFFSET_IS_TOO_BIG, + ERR_MEM_SIZE_IS_NOT_SPECIFIED, + ERR_BAD_MEM_SIZE, + ERR_BAD_ST_COMBINATION, + ERR_OVER_LOCAL_LABEL, // not used + ERR_UNDER_LOCAL_LABEL, + ERR_CANT_ALLOC, + ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW, + ERR_BAD_PROTECT_MODE, + ERR_BAD_PNUM, + ERR_BAD_TNUM, + ERR_BAD_VSIB_ADDRESSING, + ERR_CANT_CONVERT, + ERR_LABEL_ISNOT_SET_BY_L, + ERR_LABEL_IS_ALREADY_SET_BY_L, + ERR_BAD_LABEL_STR, + ERR_MUNMAP, + ERR_INTERNAL +}; + +class Error : public std::exception { + int err_; +public: + explicit Error(int err) : err_(err) + { + if (err_ < 0 || err_ > ERR_INTERNAL) { + fprintf(stderr, "bad err=%d in Xbyak::Error\n", err_); + exit(1); + } + } + operator int() const { return err_; } + const char *what() const throw() + { + static const char *errTbl[] = { + "none", + "bad addressing", + "code is too big", + "bad scale", + "esp can't be index", + "bad combination", + "bad size of register", + "imm is too big", + "bad align", + "label is redefined", + "label is too far", + "label is not found", + "code is not copyable", + "bad parameter", + "can't protect", + "can't use 64bit disp(use (void*))", + "offset is too big", + "MEM size is not specified", + "bad mem size", + "bad st combination", + "over local label", + "under local label", + "can't alloc", + "T_SHORT is not supported in AutoGrow", + "bad protect mode", + "bad pNum", + "bad tNum", + "bad vsib addressing", + "can't convert", + "label is not set by L()", + "label is already set by L()", + "bad label string", + "err munmap", + "internal error", + }; + assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl)); + return errTbl[err_]; + } +}; + +inline const char *ConvertErrorToString(Error err) +{ + return err.what(); +} + +inline void *AlignedMalloc(size_t size, size_t alignment) +{ +#ifdef __MINGW32__ + return __mingw_aligned_malloc(size, alignment); +#elif defined(_WIN32) + return _aligned_malloc(size, alignment); +#else + void *p; + int ret = posix_memalign(&p, alignment, size); + return (ret == 0) ? p : 0; +#endif +} + +inline void AlignedFree(void *p) +{ +#ifdef __MINGW32__ + __mingw_aligned_free(p); +#elif defined(_MSC_VER) + _aligned_free(p); +#else + free(p); +#endif +} + +template +inline const To CastTo(From p) throw() +{ + return (const To)(size_t)(p); +} +namespace inner { + +static const size_t ALIGN_PAGE_SIZE = 4096; + +inline bool IsInDisp8(uint32 x) { return 0xFFFFFF80 <= x || x <= 0x7F; } +inline bool IsInInt32(uint64 x) { return ~uint64(0x7fffffffu) <= x || x <= 0x7FFFFFFFU; } + +inline uint32 VerifyInInt32(uint64 x) +{ +#ifdef XBYAK64 + if (!IsInInt32(x)) throw Error(ERR_OFFSET_IS_TOO_BIG); +#endif + return static_cast(x); +} + +enum LabelMode { + LasIs, // as is + Labs, // absolute + LaddTop // (addr + top) for mov(reg, label) with AutoGrow +}; + +} // inner + +/* + custom allocator +*/ +struct Allocator { + virtual uint8 *alloc(size_t size) { return reinterpret_cast(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); } + virtual void free(uint8 *p) { AlignedFree(p); } + virtual ~Allocator() {} + /* override to return false if you call protect() manually */ + virtual bool useProtect() const { return true; } +}; + +#ifdef __GNUC__ +class MmapAllocator : Allocator { + typedef XBYAK_STD_UNORDERED_MAP SizeList; + SizeList sizeList_; +public: + uint8 *alloc(size_t size) + { + const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1; + size = (size + alignedSizeM1) & ~alignedSizeM1; +#ifdef MAP_ANONYMOUS + const int mode = MAP_PRIVATE | MAP_ANONYMOUS; +#elif defined(MAP_ANON) + const int mode = MAP_PRIVATE | MAP_ANON; +#else + #error "not supported" +#endif + void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, mode, -1, 0); + if (p == MAP_FAILED) throw Error(ERR_CANT_ALLOC); + assert(p); + sizeList_[(uintptr_t)p] = size; + return (uint8*)p; + } + void free(uint8 *p) + { + if (p == 0) return; + SizeList::iterator i = sizeList_.find((uintptr_t)p); + if (i == sizeList_.end()) throw Error(ERR_BAD_PARAMETER); + if (munmap((void*)i->first, i->second) < 0) throw Error(ERR_MUNMAP); + sizeList_.erase(i); + } +}; +#endif + +class Operand { +private: + uint8 idx_; // 0..15, MSB = 1 if spl/bpl/sil/dil + uint8 kind_; + uint16 bit_; +public: + enum Kind { + NONE = 0, + MEM = 1 << 1, + IMM = 1 << 2, + REG = 1 << 3, + MMX = 1 << 4, + XMM = 1 << 5, + FPU = 1 << 6, + YMM = 1 << 7 + }; + enum Code { +#ifdef XBYAK64 + RAX = 0, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15, + R8D = 8, R9D, R10D, R11D, R12D, R13D, R14D, R15D, + R8W = 8, R9W, R10W, R11W, R12W, R13W, R14W, R15W, + R8B = 8, R9B, R10B, R11B, R12B, R13B, R14B, R15B, + SPL = 4, BPL, SIL, DIL, +#endif + EAX = 0, ECX, EDX, EBX, ESP, EBP, ESI, EDI, + AX = 0, CX, DX, BX, SP, BP, SI, DI, + AL = 0, CL, DL, BL, AH, CH, DH, BH + }; + Operand() : idx_(0), kind_(0), bit_(0) { } + Operand(int idx, Kind kind, int bit, bool ext8bit = 0) + : idx_(static_cast(idx | (ext8bit ? 0x80 : 0))) + , kind_(static_cast(kind)) + , bit_(static_cast(bit)) + { + assert((bit_ & (bit_ - 1)) == 0); // bit must be power of two + } + Kind getKind() const { return static_cast(kind_); } + int getIdx() const { return idx_ & 15; } + bool isNone() const { return kind_ == 0; } + bool isMMX() const { return is(MMX); } + bool isXMM() const { return is(XMM); } + bool isYMM() const { return is(YMM); } + bool isREG(int bit = 0) const { return is(REG, bit); } + bool isMEM(int bit = 0) const { return is(MEM, bit); } + bool isFPU() const { return is(FPU); } + bool isExt8bit() const { return (idx_ & 0x80) != 0; } + // ah, ch, dh, bh? + bool isHigh8bit() const + { + if (!isBit(8)) return false; + if (isExt8bit()) return false; + const int idx = getIdx(); + return AH <= idx && idx <= BH; + } + // any bit is accetable if bit == 0 + bool is(int kind, uint32 bit = 0) const + { + return (kind_ & kind) && (bit == 0 || (bit_ & bit)); // cf. you can set (8|16) + } + bool isBit(uint32 bit) const { return (bit_ & bit) != 0; } + uint32 getBit() const { return bit_; } + const char *toString() const + { + const int idx = getIdx(); + if (kind_ == REG) { + if (isExt8bit()) { + static const char *tbl[4] = { "spl", "bpl", "sil", "dil" }; + return tbl[idx - 4]; + } + static const char *tbl[4][16] = { + { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" }, + { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" }, + { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" }, + { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" }, + }; + return tbl[bit_ == 8 ? 0 : bit_ == 16 ? 1 : bit_ == 32 ? 2 : 3][idx]; + } else if (isYMM()) { + static const char *tbl[16] = { "ym0", "ym1", "ym2", "ym3", "ym4", "ym5", "ym6", "ym7", "ym8", "ym9", "ym10", "ym11", "ym12", "ym13", "ym14", "ym15" }; + return tbl[idx]; + } else if (isXMM()) { + static const char *tbl[16] = { "xm0", "xm1", "xm2", "xm3", "xm4", "xm5", "xm6", "xm7", "xm8", "xm9", "xm10", "xm11", "xm12", "xm13", "xm14", "xm15" }; + return tbl[idx]; + } else if (isMMX()) { + static const char *tbl[8] = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }; + return tbl[idx]; + } else if (isFPU()) { + static const char *tbl[8] = { "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7" }; + return tbl[idx]; + } + throw Error(ERR_INTERNAL); + } + bool operator==(const Operand& rhs) const { return idx_ == rhs.idx_ && kind_ == rhs.kind_ && bit_ == rhs.bit_; } + bool operator!=(const Operand& rhs) const { return !operator==(rhs); } +}; + +struct Reg8; +struct Reg16; +struct Reg32; +#ifdef XBYAK64 +struct Reg64; +#endif +class Reg : public Operand { + bool hasRex() const { return isExt8bit() | isREG(64) | isExtIdx(); } +public: + Reg() { } + Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { } + Reg changeBit(int bit) const { return Reg(getIdx(), getKind(), bit, isExt8bit()); } + bool isExtIdx() const { return getIdx() > 7; } + uint8 getRex(const Reg& base = Reg()) const + { + return (hasRex() || base.hasRex()) ? uint8(0x40 | ((isREG(64) | base.isREG(64)) ? 8 : 0) | (isExtIdx() ? 4 : 0)| (base.isExtIdx() ? 1 : 0)) : 0; + } + Reg8 cvt8() const; + Reg16 cvt16() const; + Reg32 cvt32() const; +#ifdef XBYAK64 + Reg64 cvt64() const; +#endif +}; + +struct Reg8 : public Reg { + explicit Reg8(int idx = 0, bool ext8bit = false) : Reg(idx, Operand::REG, 8, ext8bit) { } +}; + +struct Reg16 : public Reg { + explicit Reg16(int idx = 0) : Reg(idx, Operand::REG, 16) { } +}; + +struct Mmx : public Reg { + explicit Mmx(int idx = 0, Kind kind = Operand::MMX, int bit = 64) : Reg(idx, kind, bit) { } +}; + +struct Xmm : public Mmx { + explicit Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { } +}; + +struct Ymm : public Xmm { + explicit Ymm(int idx = 0) : Xmm(idx, Operand::YMM, 256) { } +}; + +struct Fpu : public Reg { + explicit Fpu(int idx = 0) : Reg(idx, Operand::FPU, 32) { } +}; + +struct Reg32e : public Reg { + explicit Reg32e(int idx, int bit) : Reg(idx, Operand::REG, bit) {} +}; +struct Reg32 : public Reg32e { + explicit Reg32(int idx = 0) : Reg32e(idx, 32) {} +}; +#ifdef XBYAK64 +struct Reg64 : public Reg32e { + explicit Reg64(int idx = 0) : Reg32e(idx, 64) {} +}; +struct RegRip { + uint32 disp_; + explicit RegRip(unsigned int disp = 0) : disp_(disp) {} + friend const RegRip operator+(const RegRip& r, unsigned int disp) { + return RegRip(r.disp_ + disp); + } + friend const RegRip operator-(const RegRip& r, unsigned int disp) { + return RegRip(r.disp_ - disp); + } +}; +#endif + +inline Reg8 Reg::cvt8() const +{ + const int idx = getIdx(); + if (isBit(8)) return Reg8(idx, isExt8bit()); +#ifdef XBYAK32 + if (idx >= 4) throw Error(ERR_CANT_CONVERT); +#endif + return Reg8(idx, 4 <= idx && idx < 8); +} + +inline Reg16 Reg::cvt16() const +{ + const int idx = getIdx(); + if (isBit(8) && (4 <= idx && idx < 8) && !isExt8bit()) throw Error(ERR_CANT_CONVERT); + return Reg16(idx); +} + +inline Reg32 Reg::cvt32() const +{ + const int idx = getIdx(); + if (isBit(8) && (4 <= idx && idx < 8) && !isExt8bit()) throw Error(ERR_CANT_CONVERT); + return Reg32(idx); +} + +#ifdef XBYAK64 +inline Reg64 Reg::cvt64() const +{ + const int idx = getIdx(); + if (isBit(8) && (4 <= idx && idx < 8) && !isExt8bit()) throw Error(ERR_CANT_CONVERT); + return Reg64(idx); +} +#endif + +class RegExp { +public: + struct SReg { + uint16 bit:9; // 32/64/128/256 none if 0 + uint16 idx:7; + SReg() : bit(0), idx(0) { } + void set(const Reg& r) { this->bit = uint16(r.getBit()); this->idx = uint16(r.getIdx()); } + bool operator==(const SReg& rhs) const { return bit == rhs.bit && idx == rhs.idx; } + }; + RegExp(size_t disp = 0) : disp_(disp), scale_(0) { } + RegExp(const Reg& r, int scale = 1) + : disp_(0) + , scale_(scale) + { + if (!r.is(Reg::REG, 32|64) && !r.is(Reg::XMM|Reg::YMM)) throw Error(ERR_BAD_SIZE_OF_REGISTER); + if (scale != 1 && scale != 2 && scale != 4 && scale != 8) throw Error(ERR_BAD_SCALE); + if (r.getBit() >= 128 || scale != 1) { // xmm/ymm is always index + index_.set(r); + } else { + base_.set(r); + } + } + bool isVsib() const { return index_.bit >= 128; } + bool isYMM() const { return index_.bit >= 256; } + RegExp optimize() const // select smaller size + { + // [reg * 2] => [reg + reg] + if (!isVsib() && !base_.bit && index_.bit && scale_ == 2) { + RegExp ret = *this; + ret.base_ = index_; + ret.scale_ = 1; + return ret; + } + return *this; + } + bool operator==(const RegExp& rhs) const + { + return base_ == rhs.base_ && index_ == rhs.index_ && disp_ == rhs.disp_; + } + const SReg& getBase() const { return base_; } + const SReg& getIndex() const { return index_; } + int getScale() const { return scale_; } + uint32 getDisp() const { return uint32(disp_); } + void verify() const + { + if (base_.bit >= 128) throw Error(ERR_BAD_SIZE_OF_REGISTER); + if (index_.bit && index_.bit <= 64) { + if (index_.idx == Operand::ESP) throw Error(ERR_ESP_CANT_BE_INDEX); + if (base_.bit && base_.bit != index_.bit) throw Error(ERR_BAD_SIZE_OF_REGISTER); + } + } +private: + friend RegExp operator+(const RegExp& a, const RegExp& b); + friend RegExp operator-(const RegExp& e, size_t disp); + /* + [base_ + index_ * scale_ + disp_] + base : Reg32e, index : Reg32e(w/o esp), Xmm, Ymm + */ + size_t disp_; + int scale_; + SReg base_; + SReg index_; +}; + +inline RegExp operator+(const RegExp& a, const RegExp& b) +{ + if (a.index_.bit && b.index_.bit) throw Error(ERR_BAD_ADDRESSING); + RegExp ret = a; + if (!ret.index_.bit) { ret.index_ = b.index_; ret.scale_ = b.scale_; } + if (b.base_.bit) { + if (ret.base_.bit) { + if (ret.index_.bit) throw Error(ERR_BAD_ADDRESSING); + // base + base => base + index * 1 + ret.index_ = b.base_; + // [reg + esp] => [esp + reg] + if (ret.index_.idx == Operand::ESP) std::swap(ret.base_, ret.index_); + ret.scale_ = 1; + } else { + ret.base_ = b.base_; + } + } + ret.disp_ += b.disp_; + return ret; +} +inline RegExp operator*(const Reg& r, int scale) +{ + return RegExp(r, scale); +} +inline RegExp operator-(const RegExp& e, size_t disp) +{ + RegExp ret = e; + ret.disp_ -= disp; + return ret; +} + +// 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc) +void *const AutoGrow = (void*)1; + +class CodeArray { + enum Type { + USER_BUF = 1, // use userPtr(non alignment, non protect) + ALLOC_BUF, // use new(alignment, protect) + AUTO_GROW // automatically move and grow memory if necessary + }; + CodeArray(const CodeArray& rhs); + void operator=(const CodeArray&); + bool isAllocType() const { return type_ == ALLOC_BUF || type_ == AUTO_GROW; } + struct AddrInfo { + size_t codeOffset; // position to write + size_t jmpAddr; // value to write + int jmpSize; // size of jmpAddr + inner::LabelMode mode; + AddrInfo(size_t _codeOffset, size_t _jmpAddr, int _jmpSize, inner::LabelMode _mode) + : codeOffset(_codeOffset), jmpAddr(_jmpAddr), jmpSize(_jmpSize), mode(_mode) {} + uint64 getVal(const uint8 *top) const + { + uint64 disp = (mode == inner::LaddTop) ? jmpAddr + size_t(top) : (mode == inner::LasIs) ? jmpAddr : jmpAddr - size_t(top); + if (jmpSize == 4) disp = inner::VerifyInInt32(disp); + return disp; + } + }; + typedef std::list AddrInfoList; + AddrInfoList addrInfoList_; + const Type type_; +#ifdef XBYAK_USE_MMAP_ALLOCATOR + MmapAllocator defaultAllocator_; +#else + Allocator defaultAllocator_; +#endif + Allocator *alloc_; +protected: + size_t maxSize_; + uint8 *top_; + size_t size_; + + /* + allocate new memory and copy old data to the new area + */ + void growMemory() + { + const size_t newSize = (std::max)(DEFAULT_MAX_CODE_SIZE, maxSize_ * 2); + uint8 *newTop = alloc_->alloc(newSize); + if (newTop == 0) throw Error(ERR_CANT_ALLOC); + for (size_t i = 0; i < size_; i++) newTop[i] = top_[i]; + alloc_->free(top_); + top_ = newTop; + maxSize_ = newSize; + } + /* + calc jmp address for AutoGrow mode + */ + void calcJmpAddress() + { + for (AddrInfoList::const_iterator i = addrInfoList_.begin(), ie = addrInfoList_.end(); i != ie; ++i) { + uint64 disp = i->getVal(top_); + rewrite(i->codeOffset, disp, i->jmpSize); + } + if (alloc_->useProtect() && !protect(top_, size_, true)) throw Error(ERR_CANT_PROTECT); + } +public: + explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0) + : type_(userPtr == AutoGrow ? AUTO_GROW : userPtr ? USER_BUF : ALLOC_BUF) + , alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_) + , maxSize_(maxSize) + , top_(type_ == USER_BUF ? reinterpret_cast(userPtr) : alloc_->alloc((std::max)(maxSize, 1))) + , size_(0) + { + if (maxSize_ > 0 && top_ == 0) throw Error(ERR_CANT_ALLOC); + if ((type_ == ALLOC_BUF && alloc_->useProtect()) && !protect(top_, maxSize, true)) { + alloc_->free(top_); + throw Error(ERR_CANT_PROTECT); + } + } + virtual ~CodeArray() + { + if (isAllocType()) { + if (alloc_->useProtect()) protect(top_, maxSize_, false); + alloc_->free(top_); + } + } + void resetSize() + { + size_ = 0; + addrInfoList_.clear(); + } + void db(int code) + { + if (size_ >= maxSize_) { + if (type_ == AUTO_GROW) { + growMemory(); + } else { + throw Error(ERR_CODE_IS_TOO_BIG); + } + } + top_[size_++] = static_cast(code); + } + void db(const uint8 *code, int codeSize) + { + for (int i = 0; i < codeSize; i++) db(code[i]); + } + void db(uint64 code, int codeSize) + { + if (codeSize > 8) throw Error(ERR_BAD_PARAMETER); + for (int i = 0; i < codeSize; i++) db(static_cast(code >> (i * 8))); + } + void dw(uint32 code) { db(code, 2); } + void dd(uint32 code) { db(code, 4); } + const uint8 *getCode() const { return top_; } + template + const F getCode() const { return CastTo(top_); } + const uint8 *getCurr() const { return &top_[size_]; } + template + const F getCurr() const { return CastTo(&top_[size_]); } + size_t getSize() const { return size_; } + void setSize(size_t size) + { + if (size >= maxSize_) throw Error(ERR_OFFSET_IS_TOO_BIG); + size_ = size; + } + void dump() const + { + const uint8 *p = getCode(); + size_t bufSize = getSize(); + size_t remain = bufSize; + for (int i = 0; i < 4; i++) { + size_t disp = 16; + if (remain < 16) { + disp = remain; + } + for (size_t j = 0; j < 16; j++) { + if (j < disp) { + printf("%02X", p[i * 16 + j]); + } + } + putchar('\n'); + remain -= disp; + if (remain <= 0) { + break; + } + } + } + /* + @param offset [in] offset from top + @param disp [in] offset from the next of jmp + @param size [in] write size(1, 2, 4, 8) + */ + void rewrite(size_t offset, uint64 disp, size_t size) + { + assert(offset < maxSize_); + if (size != 1 && size != 2 && size != 4 && size != 8) throw Error(ERR_BAD_PARAMETER); + uint8 *const data = top_ + offset; + for (size_t i = 0; i < size; i++) { + data[i] = static_cast(disp >> (i * 8)); + } + } + void save(size_t offset, size_t val, int size, inner::LabelMode mode) + { + addrInfoList_.push_back(AddrInfo(offset, val, size, mode)); + } + bool isAutoGrow() const { return type_ == AUTO_GROW; } + /** + change exec permission of memory + @param addr [in] buffer address + @param size [in] buffer size + @param canExec [in] true(enable to exec), false(disable to exec) + @return true(success), false(failure) + */ + static inline bool protect(const void *addr, size_t size, bool canExec) + { +#if defined(_WIN32) + DWORD oldProtect; + return VirtualProtect(const_cast(addr), size, canExec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldProtect) != 0; +#elif defined(__GNUC__) + size_t pageSize = sysconf(_SC_PAGESIZE); + size_t iaddr = reinterpret_cast(addr); + size_t roundAddr = iaddr & ~(pageSize - static_cast(1)); + int mode = PROT_READ | PROT_WRITE | (canExec ? PROT_EXEC : 0); + return mprotect(reinterpret_cast(roundAddr), size + (iaddr - roundAddr), mode) == 0; +#else + return true; +#endif + } + /** + get aligned memory pointer + @param addr [in] address + @param alingedSize [in] power of two + @return aligned addr by alingedSize + */ + static inline uint8 *getAlignedAddress(uint8 *addr, size_t alignedSize = 16) + { + return reinterpret_cast((reinterpret_cast(addr) + alignedSize - 1) & ~(alignedSize - static_cast(1))); + } +}; + +class Address : public Operand { + mutable uint8 top_[6]; // 6 = 1(ModRM) + 1(SIB) + 4(disp) + uint8 size_; + uint8 rex_; + uint64 disp_; + bool isOnlyDisp_; + bool is64bitDisp_; + bool is32bit_; + mutable bool isVsib_; + bool isYMM_; + void verify() const { if (isVsib_) throw Error(ERR_BAD_VSIB_ADDRESSING); } +public: + Address(uint32 sizeBit, bool isOnlyDisp, uint64 disp, bool is32bit, bool is64bitDisp = false, bool isVsib = false, bool isYMM = false) + : Operand(0, MEM, sizeBit) + , size_(0) + , rex_(0) + , disp_(disp) + , isOnlyDisp_(isOnlyDisp) + , is64bitDisp_(is64bitDisp) + , is32bit_(is32bit) + , isVsib_(isVsib) + , isYMM_(isYMM) + { + } + void db(int code) + { + if (size_ >= sizeof(top_)) throw Error(ERR_CODE_IS_TOO_BIG); + top_[size_++] = static_cast(code); + } + void dd(uint32 code) { for (int i = 0; i < 4; i++) db(code >> (i * 8)); } + const uint8 *getCode() const { return top_; } + size_t getSize() const { return size_; } + void updateRegField(uint8 regIdx) const + { + *top_ = (*top_ & B11000111) | ((regIdx << 3) & B00111000); + } + void setVsib(bool isVsib) const { isVsib_ = isVsib; } + bool isVsib() const { return isVsib_; } + bool isYMM() const { return isYMM_; } + bool is32bit() const { verify(); return is32bit_; } + bool isOnlyDisp() const { verify(); return isOnlyDisp_; } // for mov eax + uint64 getDisp() const { verify(); return disp_; } + uint8 getRex() const { verify(); return rex_; } + bool is64bitDisp() const { verify(); return is64bitDisp_; } // for moffset + void setRex(uint8 rex) { rex_ = rex; } +}; + +class AddressFrame { +private: + void operator=(const AddressFrame&); + Address makeAddress(const RegExp& e) const + { + e.verify(); + const bool isVsib = e.isVsib(); + const bool isYMM = e.isYMM(); + const RegExp::SReg& base = e.getBase(); + const RegExp::SReg& index = e.getIndex(); + const uint32 disp = e.getDisp(); + Address frame(bit_, (!base.bit && !index.bit), disp, base.bit == 32 || index.bit == 32, false, isVsib, isYMM); + enum { + mod00 = 0, mod01 = 1, mod10 = 2 + }; + int mod; + if (!base.bit || ((base.idx & 7) != Operand::EBP && disp == 0)) { + mod = mod00; + } else if (inner::IsInDisp8(disp)) { + mod = mod01; + } else { + mod = mod10; + } + const int baseIdx = base.bit ? (base.idx & 7) : Operand::EBP; + /* ModR/M = [2:3:3] = [Mod:reg/code:R/M] */ + bool hasSIB = index.bit || (base.idx & 7) == Operand::ESP; +#ifdef XBYAK64 + if (!base.bit && !index.bit) hasSIB = true; +#endif + if (hasSIB) { + frame.db((mod << 6) | Operand::ESP); + /* SIB = [2:3:3] = [SS:index:base(=rm)] */ + const int indexIdx = index.bit ? (index.idx & 7) : Operand::ESP; + const int scale = e.getScale(); + const int ss = (scale == 8) ? 3 : (scale == 4) ? 2 : (scale == 2) ? 1 : 0; + frame.db((ss << 6) | (indexIdx << 3) | baseIdx); + } else { + frame.db((mod << 6) | baseIdx); + } + if (mod == mod01) { + frame.db(disp); + } else if (mod == mod10 || (mod == mod00 && !base.bit)) { + frame.dd(disp); + } + int rex = ((index.idx >> 3) << 1) | (base.idx >> 3); + if (rex) rex |= 0x40; + frame.setRex(uint8(rex)); + return frame; + } +public: + const uint32 bit_; + explicit AddressFrame(uint32 bit) : bit_(bit) { } + Address operator[](const void *disp) const + { + size_t adr = reinterpret_cast(disp); +#ifdef XBYAK64 + if (adr > 0xFFFFFFFFU) throw Error(ERR_OFFSET_IS_TOO_BIG); +#endif + RegExp e(static_cast(adr)); + return operator[](e); + } +#ifdef XBYAK64 + Address operator[](uint64 disp) const + { + return Address(64, true, disp, false, true); + } + Address operator[](const RegRip& addr) const + { + Address frame(bit_, true, addr.disp_, false); + frame.db(B00000101); + frame.dd(addr.disp_); + return frame; + } +#endif + Address operator[](const RegExp& e) const + { + return makeAddress(e.optimize()); + } +}; + +struct JmpLabel { + size_t endOfJmp; /* offset from top to the end address of jmp */ + int jmpSize; + inner::LabelMode mode; +}; + +class LabelManager; + +class Label { + mutable LabelManager *mgr; + mutable int id; + friend class LabelManager; +public: + Label() : mgr(0), id(0) {} + Label(const Label& rhs); + Label& operator=(const Label& rhs); + ~Label(); + int getId() const { return id; } + + // backward compatibility + static std::string toStr(int num) + { + char buf[16]; +#ifdef _MSC_VER + _snprintf_s +#else + snprintf +#endif + (buf, sizeof(buf), ".%08x", num); + return buf; + } +}; + +class LabelManager { + // for string label + struct SlabelVal { + size_t offset; + SlabelVal(size_t offset) : offset(offset) {} + }; + typedef XBYAK_STD_UNORDERED_MAP SlabelDefList; + typedef XBYAK_STD_UNORDERED_MULTIMAP SlabelUndefList; + struct SlabelState { + SlabelDefList defList; + SlabelUndefList undefList; + }; + typedef std::list StateList; + // for Label class + struct ClabelVal { + ClabelVal(size_t offset = 0) : offset(offset), refCount(1) {} + size_t offset; + int refCount; + }; + typedef XBYAK_STD_UNORDERED_MAP ClabelDefList; + typedef XBYAK_STD_UNORDERED_MULTIMAP ClabelUndefList; + + CodeArray *base_; + // global : stateList_.front(), local : stateList_.back() + StateList stateList_; + mutable int labelId_; + ClabelDefList clabelDefList_; + ClabelUndefList clabelUndefList_; + + int getId(const Label& label) const + { + if (label.id == 0) label.id = labelId_++; + return label.id; + } + template + void define_inner(DefList& defList, UndefList& undefList, const T& labelId, size_t addrOffset) + { + // add label + typename DefList::value_type item(labelId, addrOffset); + std::pair ret = defList.insert(item); + if (!ret.second) throw Error(ERR_LABEL_IS_REDEFINED); + // search undefined label + for (;;) { + typename UndefList::iterator itr = undefList.find(labelId); + if (itr == undefList.end()) break; + const JmpLabel *jmp = &itr->second; + const size_t offset = jmp->endOfJmp - jmp->jmpSize; + size_t disp; + if (jmp->mode == inner::LaddTop) { + disp = addrOffset; + } else if (jmp->mode == inner::Labs) { + disp = size_t(base_->getCurr()); + } else { + disp = addrOffset - jmp->endOfJmp; +#ifdef XBYAK64 + if (jmp->jmpSize <= 4 && !inner::IsInInt32(disp)) throw Error(ERR_OFFSET_IS_TOO_BIG); +#endif + if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32)disp)) throw Error(ERR_LABEL_IS_TOO_FAR); + } + if (base_->isAutoGrow()) { + base_->save(offset, disp, jmp->jmpSize, jmp->mode); + } else { + base_->rewrite(offset, disp, jmp->jmpSize); + } + undefList.erase(itr); + } + } + template + bool getOffset_inner(const DefList& defList, size_t *offset, const T& label) const + { + typename DefList::const_iterator i = defList.find(label); + if (i == defList.end()) return false; + *offset = i->second.offset; + return true; + } + friend class Label; + void incRefCount(int id) { clabelDefList_[id].refCount++; } + void decRefCount(int id) + { + ClabelDefList::iterator i = clabelDefList_.find(id); + if (i == clabelDefList_.end()) return; + if (i->second.refCount == 1) { + clabelDefList_.erase(id); + } else { + --i->second.refCount; + } + } + template + bool hasUndefinedLabel_inner(const T& list) const + { +#ifndef NDEBUG + for (typename T::const_iterator i = list.begin(); i != list.end(); ++i) { + std::cerr << "undefined label:" << i->first << std::endl; + } +#endif + return !list.empty(); + } +public: + LabelManager() + { + reset(); + } + void reset() + { + base_ = 0; + labelId_ = 1; + stateList_.clear(); + stateList_.push_back(SlabelState()); + stateList_.push_back(SlabelState()); + } + void enterLocal() + { + stateList_.push_back(SlabelState()); + } + void leaveLocal() + { + if (stateList_.size() <= 2) throw Error(ERR_UNDER_LOCAL_LABEL); + if (hasUndefinedLabel_inner(stateList_.back().undefList)) throw Error(ERR_LABEL_IS_NOT_FOUND); + stateList_.pop_back(); + } + void set(CodeArray *base) { base_ = base; } + void defineSlabel(std::string label) + { + if (label == "@b" || label == "@f") throw Error(ERR_BAD_LABEL_STR); + if (label == "@@") { + SlabelDefList& defList = stateList_.front().defList; + SlabelDefList::iterator i = defList.find("@f"); + if (i != defList.end()) { + defList.erase(i); + label = "@b"; + } else { + i = defList.find("@b"); + if (i != defList.end()) { + defList.erase(i); + } + label = "@f"; + } + } + SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front(); + define_inner(st.defList, st.undefList, label, base_->getSize()); + } + void defineClabel(const Label& label) + { + define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize()); + label.mgr = this; + } + void assign(Label& dst, const Label& src) + { + ClabelDefList::const_iterator i = clabelDefList_.find(src.id); + if (i == clabelDefList_.end()) throw Error(ERR_LABEL_ISNOT_SET_BY_L); + define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset); + dst.mgr = this; + } + bool getOffset(size_t *offset, std::string& label) const + { + const SlabelDefList& defList = stateList_.front().defList; + if (label == "@b") { + if (defList.find("@f") != defList.end()) { + label = "@f"; + } else if (defList.find("@b") == defList.end()) { + throw Error(ERR_LABEL_IS_NOT_FOUND); + } + } else if (label == "@f") { + if (defList.find("@f") != defList.end()) { + label = "@b"; + } + } + const SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front(); + return getOffset_inner(st.defList, offset, label); + } + bool getOffset(size_t *offset, const Label& label) const + { + return getOffset_inner(clabelDefList_, offset, getId(label)); + } + void addUndefinedLabel(const std::string& label, const JmpLabel& jmp) + { + SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front(); + st.undefList.insert(SlabelUndefList::value_type(label, jmp)); + } + void addUndefinedLabel(const Label& label, const JmpLabel& jmp) + { + clabelUndefList_.insert(ClabelUndefList::value_type(label.id, jmp)); + } + bool hasUndefSlabel() const + { + for (StateList::const_iterator i = stateList_.begin(), ie = stateList_.end(); i != ie; ++i) { + if (hasUndefinedLabel_inner(i->undefList)) return true; + } + return false; + } + bool hasUndefClabel() const { return hasUndefinedLabel_inner(clabelUndefList_); } +}; + +inline Label::Label(const Label& rhs) +{ + id = rhs.id; + mgr = rhs.mgr; + if (mgr) mgr->incRefCount(id); +} +inline Label& Label::operator=(const Label& rhs) +{ + if (id) throw Error(ERR_LABEL_IS_ALREADY_SET_BY_L); + id = rhs.id; + mgr = rhs.mgr; + if (mgr) mgr->incRefCount(id); + return *this; +} +inline Label::~Label() +{ + if (id && mgr) mgr->decRefCount(id); +} + +class CodeGenerator : public CodeArray { +public: + enum LabelType { + T_SHORT, + T_NEAR, + T_AUTO // T_SHORT if possible + }; +private: + CodeGenerator operator=(const CodeGenerator&); // don't call +#ifdef XBYAK64 + enum { i32e = 32 | 64, BIT = 64 }; + static const size_t dummyAddr = (size_t(0x11223344) << 32) | 55667788; + typedef Reg64 NativeReg; +#else + enum { i32e = 32, BIT = 32 }; + static const size_t dummyAddr = 0x12345678; + typedef Reg32 NativeReg; +#endif + // (XMM, XMM|MEM) + static inline bool isXMM_XMMorMEM(const Operand& op1, const Operand& op2) + { + return op1.isXMM() && (op2.isXMM() || op2.isMEM()); + } + // (MMX, MMX|MEM) or (XMM, XMM|MEM) + static inline bool isXMMorMMX_MEM(const Operand& op1, const Operand& op2) + { + return (op1.isMMX() && (op2.isMMX() || op2.isMEM())) || isXMM_XMMorMEM(op1, op2); + } + // (XMM, MMX|MEM) + static inline bool isXMM_MMXorMEM(const Operand& op1, const Operand& op2) + { + return op1.isXMM() && (op2.isMMX() || op2.isMEM()); + } + // (MMX, XMM|MEM) + static inline bool isMMX_XMMorMEM(const Operand& op1, const Operand& op2) + { + return op1.isMMX() && (op2.isXMM() || op2.isMEM()); + } + // (XMM, REG32|MEM) + static inline bool isXMM_REG32orMEM(const Operand& op1, const Operand& op2) + { + return op1.isXMM() && (op2.isREG(i32e) || op2.isMEM()); + } + // (REG32, XMM|MEM) + static inline bool isREG32_XMMorMEM(const Operand& op1, const Operand& op2) + { + return op1.isREG(i32e) && (op2.isXMM() || op2.isMEM()); + } + // (REG32, REG32|MEM) + static inline bool isREG32_REG32orMEM(const Operand& op1, const Operand& op2) + { + return op1.isREG(i32e) && ((op2.isREG(i32e) && op1.getBit() == op2.getBit()) || op2.isMEM()); + } + void rex(const Operand& op1, const Operand& op2 = Operand()) + { + uint8 rex = 0; + const Operand *p1 = &op1, *p2 = &op2; + if (p1->isMEM()) std::swap(p1, p2); + if (p1->isMEM()) throw Error(ERR_BAD_COMBINATION); + if (p2->isMEM()) { + const Address& addr = static_cast(*p2); + if (BIT == 64 && addr.is32bit()) db(0x67); + rex = addr.getRex() | static_cast(*p1).getRex(); + } else { + // ModRM(reg, base); + rex = static_cast(op2).getRex(static_cast(op1)); + } + // except movsx(16bit, 32/64bit) + if ((op1.isBit(16) && !op2.isBit(i32e)) || (op2.isBit(16) && !op1.isBit(i32e))) db(0x66); + if (rex) db(rex); + } + enum AVXtype { + PP_NONE = 1 << 0, + PP_66 = 1 << 1, + PP_F3 = 1 << 2, + PP_F2 = 1 << 3, + MM_RESERVED = 1 << 4, + MM_0F = 1 << 5, + MM_0F38 = 1 << 6, + MM_0F3A = 1 << 7 + }; + void vex(bool r, int idx, bool is256, int type, bool x = false, bool b = false, int w = 1) + { + uint32 pp = (type & PP_66) ? 1 : (type & PP_F3) ? 2 : (type & PP_F2) ? 3 : 0; + uint32 vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp; + if (!b && !x && !w && (type & MM_0F)) { + db(0xC5); db((r ? 0 : 0x80) | vvvv); + } else { + uint32 mmmm = (type & MM_0F) ? 1 : (type & MM_0F38) ? 2 : (type & MM_0F3A) ? 3 : 0; + db(0xC4); db((r ? 0 : 0x80) | (x ? 0 : 0x40) | (b ? 0 : 0x20) | mmmm); db((w << 7) | vvvv); + } + } + LabelManager labelMgr_; + bool isInDisp16(uint32 x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; } + uint8 getModRM(int mod, int r1, int r2) const { return static_cast((mod << 6) | ((r1 & 7) << 3) | (r2 & 7)); } + void opModR(const Reg& reg1, const Reg& reg2, int code0, int code1 = NONE, int code2 = NONE) + { + rex(reg2, reg1); + db(code0 | (reg1.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2); + db(getModRM(3, reg1.getIdx(), reg2.getIdx())); + } + void opModM(const Address& addr, const Reg& reg, int code0, int code1 = NONE, int code2 = NONE) + { + if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP); + rex(addr, reg); + db(code0 | (reg.isBit(8) ? 0 : 1)); if (code1 != NONE) db(code1); if (code2 != NONE) db(code2); + addr.updateRegField(static_cast(reg.getIdx())); + db(addr.getCode(), static_cast(addr.getSize())); + } + void makeJmp(uint32 disp, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref) + { + const int shortJmpSize = 2; + const int longHeaderSize = longPref ? 2 : 1; + const int longJmpSize = longHeaderSize + 4; + if (type != T_NEAR && inner::IsInDisp8(disp - shortJmpSize)) { + db(shortCode); db(disp - shortJmpSize); + } else { + if (type == T_SHORT) throw Error(ERR_LABEL_IS_TOO_FAR); + if (longPref) db(longPref); + db(longCode); dd(disp - longJmpSize); + } + } + template + void opJmp(T& label, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref) + { + if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); /* avoid splitting code of jmp */ + size_t offset = 0; + if (labelMgr_.getOffset(&offset, label)) { /* label exists */ + makeJmp(inner::VerifyInInt32(offset - size_), type, shortCode, longCode, longPref); + } else { + JmpLabel jmp; + if (type == T_NEAR) { + jmp.jmpSize = 4; + if (longPref) db(longPref); + db(longCode); dd(0); + } else { + jmp.jmpSize = 1; + db(shortCode); db(0); + } + jmp.mode = inner::LasIs; + jmp.endOfJmp = size_; + labelMgr_.addUndefinedLabel(label, jmp); + } + } + void opJmpAbs(const void *addr, LabelType type, uint8 shortCode, uint8 longCode) + { + if (isAutoGrow()) { + if (type != T_NEAR) throw Error(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW); + if (size_ + 16 >= maxSize_) growMemory(); + db(longCode); + dd(0); + save(size_ - 4, size_t(addr) - size_, 4, inner::Labs); + } else { + makeJmp(inner::VerifyInInt32(reinterpret_cast(addr) - getCurr()), type, shortCode, longCode, 0); + } + + } + /* preCode is for SSSE3/SSE4 */ + void opGen(const Operand& reg, const Operand& op, int code, int pref, bool isValid(const Operand&, const Operand&), int imm8 = NONE, int preCode = NONE) + { + if (isValid && !isValid(reg, op)) throw Error(ERR_BAD_COMBINATION); + if (pref != NONE) db(pref); + if (op.isMEM()) { + opModM(static_cast(op), static_cast(reg), 0x0F, preCode, code); + } else { + opModR(static_cast(reg), static_cast(op), 0x0F, preCode, code); + } + if (imm8 != NONE) db(imm8); + } + void opMMX_IMM(const Mmx& mmx, int imm8, int code, int ext) + { + if (mmx.isXMM()) db(0x66); + opModR(Reg32(ext), mmx, 0x0F, code); + db(imm8); + } + void opMMX(const Mmx& mmx, const Operand& op, int code, int pref = 0x66, int imm8 = NONE, int preCode = NONE) + { + opGen(mmx, op, code, mmx.isXMM() ? pref : NONE, isXMMorMMX_MEM, imm8, preCode); + } + void opMovXMM(const Operand& op1, const Operand& op2, int code, int pref) + { + if (pref != NONE) db(pref); + if (op1.isXMM() && op2.isMEM()) { + opModM(static_cast(op2), static_cast(op1), 0x0F, code); + } else if (op1.isMEM() && op2.isXMM()) { + opModM(static_cast(op1), static_cast(op2), 0x0F, code | 1); + } else { + throw Error(ERR_BAD_COMBINATION); + } + } + void opExt(const Operand& op, const Mmx& mmx, int code, int imm, bool hasMMX2 = false) + { + if (hasMMX2 && op.isREG(i32e)) { /* pextrw is special */ + if (mmx.isXMM()) db(0x66); + opModR(static_cast(op), mmx, 0x0F, B11000101); db(imm); + } else { + opGen(mmx, op, code, 0x66, isXMM_REG32orMEM, imm, B00111010); + } + } + void opR_ModM(const Operand& op, int bit, int ext, int code0, int code1 = NONE, int code2 = NONE, bool disableRex = false) + { + int opBit = op.getBit(); + if (disableRex && opBit == 64) opBit = 32; + if (op.isREG(bit)) { + opModR(Reg(ext, Operand::REG, opBit), static_cast(op).changeBit(opBit), code0, code1, code2); + } else if (op.isMEM()) { + opModM(static_cast(op), Reg(ext, Operand::REG, opBit), code0, code1, code2); + } else { + throw Error(ERR_BAD_COMBINATION); + } + } + void opShift(const Operand& op, int imm, int ext) + { + verifyMemHasSize(op); + opR_ModM(op, 0, ext, (B11000000 | ((imm == 1 ? 1 : 0) << 4))); + if (imm != 1) db(imm); + } + void opShift(const Operand& op, const Reg8& cl, int ext) + { + if (cl.getIdx() != Operand::CL) throw Error(ERR_BAD_COMBINATION); + opR_ModM(op, 0, ext, B11010010); + } + void opModRM(const Operand& op1, const Operand& op2, bool condR, bool condM, int code0, int code1 = NONE, int code2 = NONE) + { + if (condR) { + opModR(static_cast(op1), static_cast(op2), code0, code1, code2); + } else if (condM) { + opModM(static_cast(op2), static_cast(op1), code0, code1, code2); + } else { + throw Error(ERR_BAD_COMBINATION); + } + } + void opShxd(const Operand& op, const Reg& reg, uint8 imm, int code, const Reg8 *cl = 0) + { + if (cl && cl->getIdx() != Operand::CL) throw Error(ERR_BAD_COMBINATION); + opModRM(reg, op, (op.isREG(16 | i32e) && op.getBit() == reg.getBit()), op.isMEM() && (reg.isREG(16 | i32e)), 0x0F, code | (cl ? 1 : 0)); + if (!cl) db(imm); + } + // (REG, REG|MEM), (MEM, REG) + void opRM_RM(const Operand& op1, const Operand& op2, int code) + { + if (op1.isREG() && op2.isMEM()) { + opModM(static_cast(op2), static_cast(op1), code | 2); + } else { + opModRM(op2, op1, op1.isREG() && op1.getKind() == op2.getKind(), op1.isMEM() && op2.isREG(), code); + } + } + // (REG|MEM, IMM) + void opRM_I(const Operand& op, uint32 imm, int code, int ext) + { + verifyMemHasSize(op); + uint32 immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32; + if (op.isBit(8)) immBit = 8; + if (op.getBit() < immBit) throw Error(ERR_IMM_IS_TOO_BIG); + if (op.isBit(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */ + if (op.isREG() && op.getIdx() == 0 && (op.getBit() == immBit || (op.isBit(64) && immBit == 32))) { // rax, eax, ax, al + rex(op); + db(code | 4 | (immBit == 8 ? 0 : 1)); + } else { + int tmp = immBit < (std::min)(op.getBit(), 32U) ? 2 : 0; + opR_ModM(op, 0, ext, B10000000 | tmp); + } + db(imm, immBit / 8); + } + void opIncDec(const Operand& op, int code, int ext) + { + verifyMemHasSize(op); +#ifndef XBYAK64 + if (op.isREG() && !op.isBit(8)) { + rex(op); db(code | op.getIdx()); + return; + } +#endif + code = B11111110; + if (op.isREG()) { + opModR(Reg(ext, Operand::REG, op.getBit()), static_cast(op), code); + } else { + opModM(static_cast(op), Reg(ext, Operand::REG, op.getBit()), code); + } + } + void opPushPop(const Operand& op, int code, int ext, int alt) + { + if (op.isREG()) { + if (op.isBit(16)) db(0x66); + if (static_cast(op).getIdx() >= 8) db(0x41); + db(alt | (op.getIdx() & 7)); + } else if (op.isMEM()) { + opModM(static_cast(op), Reg(ext, Operand::REG, op.getBit()), code); + } else { + throw Error(ERR_BAD_COMBINATION); + } + } + void verifyMemHasSize(const Operand& op) const + { + if (op.isMEM() && op.getBit() == 0) throw Error(ERR_MEM_SIZE_IS_NOT_SPECIFIED); + } + void opMovxx(const Reg& reg, const Operand& op, uint8 code) + { + if (op.isBit(32)) throw Error(ERR_BAD_COMBINATION); + int w = op.isBit(16); +#ifdef XBYAK64 + if (op.isHigh8bit()) throw Error(ERR_BAD_COMBINATION); +#endif + bool cond = reg.isREG() && (reg.getBit() > op.getBit()); + opModRM(reg, op, cond && op.isREG(), cond && op.isMEM(), 0x0F, code | w); + } + void opFpuMem(const Address& addr, uint8 m16, uint8 m32, uint8 m64, uint8 ext, uint8 m64ext) + { + if (addr.is64bitDisp()) throw Error(ERR_CANT_USE_64BIT_DISP); + uint8 code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0; + if (!code) throw Error(ERR_BAD_MEM_SIZE); + if (m64ext && addr.isBit(64)) ext = m64ext; + + rex(addr, st0); + db(code); + addr.updateRegField(ext); + db(addr.getCode(), static_cast(addr.getSize())); + } + // use code1 if reg1 == st0 + // use code2 if reg1 != st0 && reg2 == st0 + void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32 code1, uint32 code2) + { + uint32 code = reg1.getIdx() == 0 ? code1 : reg2.getIdx() == 0 ? code2 : 0; + if (!code) throw Error(ERR_BAD_ST_COMBINATION); + db(uint8(code >> 8)); + db(uint8(code | (reg1.getIdx() | reg2.getIdx()))); + } + void opFpu(const Fpu& reg, uint8 code1, uint8 code2) + { + db(code1); db(code2 | reg.getIdx()); + } + void opVex(const Reg& r, const Operand *p1, const Operand *p2, int type, int code, int w) + { + bool x, b; + if (p2->isMEM()) { + const Address& addr = static_cast(*p2); + uint8 rex = addr.getRex(); + x = (rex & 2) != 0; + b = (rex & 1) != 0; + if (BIT == 64 && addr.is32bit()) db(0x67); + if (BIT == 64 && w == -1) w = (rex & 4) ? 1 : 0; + } else { + x = false; + b = static_cast(*p2).isExtIdx(); + } + if (w == -1) w = 0; + vex(r.isExtIdx(), p1 ? p1->getIdx() : 0, r.isYMM(), type, x, b, w); + db(code); + if (p2->isMEM()) { + const Address& addr = static_cast(*p2); + addr.updateRegField(static_cast(r.getIdx())); + db(addr.getCode(), static_cast(addr.getSize())); + } else { + db(getModRM(3, r.getIdx(), p2->getIdx())); + } + } + // (r, r, r/m) if isR_R_RM + // (r, r/m, r) + void opGpr(const Reg32e& r, const Operand& op1, const Operand& op2, int type, uint8 code, bool isR_R_RM) + { + const Operand *p1 = &op1; + const Operand *p2 = &op2; + if (!isR_R_RM) std::swap(p1, p2); + const unsigned int bit = r.getBit(); + if (p1->getBit() != bit || (p2->isREG() && p2->getBit() != bit)) throw Error(ERR_BAD_COMBINATION); + int w = bit == 64; + opVex(r, p1, p2, type, code, w); + } + void opAVX_X_X_XM(const Xmm& x1, const Operand& op1, const Operand& op2, int type, int code0, bool supportYMM, int w = -1) + { + const Xmm *x2; + const Operand *op; + if (op2.isNone()) { + x2 = &x1; + op = &op1; + } else { + if (!(op1.isXMM() || (supportYMM && op1.isYMM()))) throw Error(ERR_BAD_COMBINATION); + x2 = static_cast(&op1); + op = &op2; + } + // (x1, x2, op) + if (!((x1.isXMM() && x2->isXMM()) || (supportYMM && x1.isYMM() && x2->isYMM()))) throw Error(ERR_BAD_COMBINATION); + opVex(x1, x2, op, type, code0, w); + } + // if cvt then return pointer to Xmm(idx) (or Ymm(idx)), otherwise return op + void opAVX_X_X_XMcvt(const Xmm& x1, const Operand& op1, const Operand& op2, bool cvt, Operand::Kind kind, int type, int code0, bool supportYMM, int w = -1) + { + // use static_cast to avoid calling unintentional copy constructor on gcc + opAVX_X_X_XM(x1, op1, cvt ? kind == Operand::XMM ? static_cast(Xmm(op2.getIdx())) : static_cast(Ymm(op2.getIdx())) : op2, type, code0, supportYMM, w); + } + // support (x, x/m, imm), (y, y/m, imm) + void opAVX_X_XM_IMM(const Xmm& x, const Operand& op, int type, int code, bool supportYMM, int w = -1, int imm = NONE) + { + opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, op, type, code, supportYMM, w); if (imm != NONE) db((uint8)imm); + } + // QQQ:need to refactor + void opSp1(const Reg& reg, const Operand& op, uint8 pref, uint8 code0, uint8 code1) + { + if (reg.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); + bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM()); + if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) throw Error(ERR_BAD_COMBINATION); + if (is16bit) db(0x66); + db(pref); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, code0, code1); + } + void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8 code, int w, int mode) + { + if (!addr.isVsib()) throw Error(ERR_BAD_VSIB_ADDRESSING); + const int y_vx_y = 0; + const int y_vy_y = 1; +// const int x_vy_x = 2; + const bool isAddrYMM = addr.isYMM(); + if (!x1.isXMM() || isAddrYMM || !x2.isXMM()) { + bool isOK = false; + if (mode == y_vx_y) { + isOK = x1.isYMM() && !isAddrYMM && x2.isYMM(); + } else if (mode == y_vy_y) { + isOK = x1.isYMM() && isAddrYMM && x2.isYMM(); + } else { // x_vy_x + isOK = !x1.isYMM() && isAddrYMM && !x2.isYMM(); + } + if (!isOK) throw Error(ERR_BAD_VSIB_ADDRESSING); + } + addr.setVsib(false); + opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type, code, true, w); + addr.setVsib(true); + } +public: + unsigned int getVersion() const { return VERSION; } + using CodeArray::db; + const Mmx mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7; + const Xmm xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; + const Ymm ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7; + const Xmm &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7; + const Ymm &ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7; + const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi; + const Reg16 ax, cx, dx, bx, sp, bp, si, di; + const Reg8 al, cl, dl, bl, ah, ch, dh, bh; + const AddressFrame ptr, byte, word, dword, qword; + const Fpu st0, st1, st2, st3, st4, st5, st6, st7; +#ifdef XBYAK64 + const Reg64 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15; + const Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d; + const Reg16 r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w; + const Reg8 r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b; + const Reg8 spl, bpl, sil, dil; + const Xmm xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15; + const Ymm ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15; + const Xmm &xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15; // for my convenience + const Ymm &ym8, &ym9, &ym10, &ym11, &ym12, &ym13, &ym14, &ym15; + const RegRip rip; +#endif + void L(const std::string& label) { labelMgr_.defineSlabel(label); } + void L(const Label& label) { labelMgr_.defineClabel(label); } + /* + assign src to dst + require + dst : does not used by L() + src : used by L() + */ + void assignL(Label& dst, const Label& src) { labelMgr_.assign(dst, src); } + void inLocalLabel() { labelMgr_.enterLocal(); } + void outLocalLabel() { labelMgr_.leaveLocal(); } + void jmp(std::string label, LabelType type = T_AUTO) + { + opJmp(label, type, B11101011, B11101001, 0); + } + void jmp(const Label& label, LabelType type = T_AUTO) + { + opJmp(label, type, B11101011, B11101001, 0); + } + void jmp(const char *label, LabelType type = T_AUTO) { jmp(std::string(label), type); } + void jmp(const void *addr, LabelType type = T_AUTO) + { + opJmpAbs(addr, type, B11101011, B11101001); + } + void jmp(const Operand& op) + { + opR_ModM(op, BIT, 4, 0xFF, NONE, NONE, true); + } + void call(const Operand& op) + { + opR_ModM(op, 16 | i32e, 2, 0xFF, NONE, NONE, true); + } + // (REG|MEM, REG) + void test(const Operand& op, const Reg& reg) + { + opModRM(reg, op, op.isREG() && (op.getKind() == reg.getKind()), op.isMEM(), B10000100); + } + // (REG|MEM, IMM) + void test(const Operand& op, uint32 imm) + { + verifyMemHasSize(op); + if (op.isREG() && op.getIdx() == 0) { // al, ax, eax + rex(op); + db(B10101000 | (op.isBit(8) ? 0 : 1)); + } else { + opR_ModM(op, 0, 0, B11110110); + } + db(imm, (std::min)(op.getBit() / 8, 4U)); + } + void ret(int imm = 0) + { + if (imm) { + db(B11000010); dw(imm); + } else { + db(B11000011); + } + } + // (REG16|REG32, REG16|REG32|MEM) + void imul(const Reg& reg, const Operand& op) + { + opModRM(reg, op, op.isREG() && (reg.getKind() == op.getKind()), op.isMEM(), 0x0F, B10101111); + } + void imul(const Reg& reg, const Operand& op, int imm) + { + int s = inner::IsInDisp8(imm) ? 1 : 0; + opModRM(reg, op, op.isREG() && (reg.getKind() == op.getKind()), op.isMEM(), B01101001 | (s << 1)); + int size = s ? 1 : reg.isREG(16) ? 2 : 4; + db(imm, size); + } + void pop(const Operand& op) + { + opPushPop(op, B10001111, 0, B01011000); + } + void push(const Operand& op) + { + opPushPop(op, B11111111, 6, B01010000); + } + void push(const AddressFrame& af, uint32 imm) + { + if (af.bit_ == 8 && inner::IsInDisp8(imm)) { + db(B01101010); db(imm); + } else if (af.bit_ == 16 && isInDisp16(imm)) { + db(0x66); db(B01101000); dw(imm); + } else { + db(B01101000); dd(imm); + } + } + /* use "push(word, 4)" if you want "push word 4" */ + void push(uint32 imm) + { + if (inner::IsInDisp8(imm)) { + push(byte, imm); + } else { + push(dword, imm); + } + } + void bswap(const Reg32e& reg) + { + opModR(Reg32(1), reg, 0x0F); + } + void mov(const Operand& reg1, const Operand& reg2) + { + const Reg *reg = 0; + const Address *addr = 0; + uint8 code = 0; + if (reg1.isREG() && reg1.getIdx() == 0 && reg2.isMEM()) { // mov eax|ax|al, [disp] + reg = &static_cast(reg1); + addr= &static_cast(reg2); + code = B10100000; + } else + if (reg1.isMEM() && reg2.isREG() && reg2.getIdx() == 0) { // mov [disp], eax|ax|al + reg = &static_cast(reg2); + addr= &static_cast(reg1); + code = B10100010; + } +#ifdef XBYAK64 + if (addr && addr->is64bitDisp()) { + if (code) { + rex(*reg); + db(reg1.isREG(8) ? 0xA0 : reg1.isREG() ? 0xA1 : reg2.isREG(8) ? 0xA2 : 0xA3); + db(addr->getDisp(), 8); + } else { + throw Error(ERR_BAD_COMBINATION); + } + } else +#else + if (code && addr->isOnlyDisp()) { + rex(*reg, *addr); + db(code | (reg->isBit(8) ? 0 : 1)); + dd(static_cast(addr->getDisp())); + } else +#endif + { + opRM_RM(reg1, reg2, B10001000); + } + } +private: + /* + mov(r, imm) = db(imm, mov_imm(r, imm)) + */ + int mov_imm(const Reg& reg, size_t imm) + { + int bit = reg.getBit(); + const int idx = reg.getIdx(); + int code = B10110000 | ((bit == 8 ? 0 : 1) << 3); + if (bit == 64 && (imm & ~size_t(0xffffffffu)) == 0) { + rex(Reg32(idx)); + bit = 32; + } else { + rex(reg); + if (bit == 64 && inner::IsInInt32(imm)) { + db(B11000111); + code = B11000000; + bit = 32; + } + } + db(code | (idx & 7)); + return bit / 8; + } + template + void putL_inner(T& label) + { + const int jmpSize = (int)sizeof(size_t); + if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); + size_t offset = 0; + if (labelMgr_.getOffset(&offset, label)) { + if (isAutoGrow()) { + db(uint64(0), jmpSize); + save(size_ - jmpSize, offset, jmpSize, inner::LaddTop); + } else { + db(size_t(top_) + offset, jmpSize); + } + return; + } + db(uint64(0), jmpSize); + JmpLabel jmp; + jmp.endOfJmp = size_; + jmp.jmpSize = jmpSize; + jmp.mode = isAutoGrow() ? inner::LaddTop : inner::Labs; + labelMgr_.addUndefinedLabel(label, jmp); + } +public: + void mov(const Operand& op, size_t imm) + { + verifyMemHasSize(op); + if (op.isREG()) { + const int size = mov_imm(static_cast(op), imm); + db(imm, size); + } else if (op.isMEM()) { + opModM(static_cast(op), Reg(0, Operand::REG, op.getBit()), B11000110); + int size = op.getBit() / 8; if (size > 4) size = 4; + db(static_cast(imm), size); + } else { + throw Error(ERR_BAD_COMBINATION); + } + } + void mov(const NativeReg& reg, const char *label) // can't use std::string + { + if (label == 0) { + mov(static_cast(reg), 0); // call imm + return; + } + mov_imm(reg, dummyAddr); + putL(label); + } + void mov(const NativeReg& reg, const Label& label) + { + mov_imm(reg, dummyAddr); + putL(label); + } + /* + put address of label to buffer + @note the put size is 4(32-bit), 8(64-bit) + */ + void putL(std::string label) { putL_inner(label); } + void putL(const Label& label) { putL_inner(label); } + void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); } + void adox(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0xF3, isREG32_REG32orMEM, NONE, 0x38); } + void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, B11000111); } +#ifdef XBYAK64 + void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, B11000111); } +#endif + void xadd(const Operand& op, const Reg& reg) + { + opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, B11000000 | (reg.isBit(8) ? 0 : 1)); + } + void cmpxchg(const Operand& op, const Reg& reg) + { + opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xb0 | (reg.isBit(8) ? 0 : 1)); + } + void xchg(const Operand& op1, const Operand& op2) + { + const Operand *p1 = &op1, *p2 = &op2; + if (p1->isMEM() || (p2->isREG(16 | i32e) && p2->getIdx() == 0)) { + p1 = &op2; p2 = &op1; + } + if (p1->isMEM()) throw Error(ERR_BAD_COMBINATION); + if (p2->isREG() && (p1->isREG(16 | i32e) && p1->getIdx() == 0) +#ifdef XBYAK64 + && (p2->getIdx() != 0 || !p1->isREG(32)) +#endif + ) { + rex(*p2, *p1); db(0x90 | (p2->getIdx() & 7)); + return; + } + opModRM(*p1, *p2, (p1->isREG() && p2->isREG() && (p1->getBit() == p2->getBit())), p2->isMEM(), B10000110 | (p1->isBit(8) ? 0 : 1)); + } + void call(std::string label) { opJmp(label, T_NEAR, 0, B11101000, 0); } + // call(string label) + void call(const char *label) { call(std::string(label)); } + void call(const Label& label) { opJmp(label, T_NEAR, 0, B11101000, 0); } + // call(function pointer) + void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, B11101000); } + // special case + void movd(const Address& addr, const Mmx& mmx) + { + if (mmx.isXMM()) db(0x66); + opModM(addr, mmx, 0x0F, B01111110); + } + void movd(const Reg32& reg, const Mmx& mmx) + { + if (mmx.isXMM()) db(0x66); + opModR(mmx, reg, 0x0F, B01111110); + } + void movd(const Mmx& mmx, const Address& addr) + { + if (mmx.isXMM()) db(0x66); + opModM(addr, mmx, 0x0F, B01101110); + } + void movd(const Mmx& mmx, const Reg32& reg) + { + if (mmx.isXMM()) db(0x66); + opModR(mmx, reg, 0x0F, B01101110); + } + void movq2dq(const Xmm& xmm, const Mmx& mmx) + { + db(0xF3); opModR(xmm, mmx, 0x0F, B11010110); + } + void movdq2q(const Mmx& mmx, const Xmm& xmm) + { + db(0xF2); opModR(mmx, xmm, 0x0F, B11010110); + } + void movq(const Mmx& mmx, const Operand& op) + { + if (mmx.isXMM()) db(0xF3); + opModRM(mmx, op, (mmx.getKind() == op.getKind()), op.isMEM(), 0x0F, mmx.isXMM() ? B01111110 : B01101111); + } + void movq(const Address& addr, const Mmx& mmx) + { + if (mmx.isXMM()) db(0x66); + opModM(addr, mmx, 0x0F, mmx.isXMM() ? B11010110 : B01111111); + } +#ifdef XBYAK64 + void movq(const Reg64& reg, const Mmx& mmx) + { + if (mmx.isXMM()) db(0x66); + opModR(mmx, reg, 0x0F, B01111110); + } + void movq(const Mmx& mmx, const Reg64& reg) + { + if (mmx.isXMM()) db(0x66); + opModR(mmx, reg, 0x0F, B01101110); + } + void pextrq(const Operand& op, const Xmm& xmm, uint8 imm) + { + if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); + opGen(Reg64(xmm.getIdx()), op, 0x16, 0x66, 0, imm, B00111010); // force to 64bit + } + void pinsrq(const Xmm& xmm, const Operand& op, uint8 imm) + { + if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); + opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, B00111010); // force to 64bit + } + void movsxd(const Reg64& reg, const Operand& op) + { + if (!op.isBit(32)) throw Error(ERR_BAD_COMBINATION); + opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); + } +#endif + // MMX2 : pextrw : reg, mmx/xmm, imm + // SSE4 : pextrw, pextrb, pextrd, extractps : reg/mem, mmx/xmm, imm + void pextrw(const Operand& op, const Mmx& xmm, uint8 imm) { opExt(op, xmm, 0x15, imm, true); } + void pextrb(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x14, imm); } + void pextrd(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x16, imm); } + void extractps(const Operand& op, const Xmm& xmm, uint8 imm) { opExt(op, xmm, 0x17, imm); } + void pinsrw(const Mmx& mmx, const Operand& op, int imm) + { + if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); + opGen(mmx, op, B11000100, mmx.isXMM() ? 0x66 : NONE, 0, imm); + } + void insertps(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, imm, B00111010); } + void pinsrb(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x20, 0x66, isXMM_REG32orMEM, imm, B00111010); } + void pinsrd(const Xmm& xmm, const Operand& op, uint8 imm) { opGen(xmm, op, 0x22, 0x66, isXMM_REG32orMEM, imm, B00111010); } + + void pmovmskb(const Reg32e& reg, const Mmx& mmx) + { + if (mmx.isXMM()) db(0x66); + opModR(reg, mmx, 0x0F, B11010111); + } + void maskmovq(const Mmx& reg1, const Mmx& reg2) + { + if (!reg1.isMMX() || !reg2.isMMX()) throw Error(ERR_BAD_COMBINATION); + opModR(reg1, reg2, 0x0F, B11110111); + } + void lea(const Reg32e& reg, const Address& addr) { opModM(addr, reg, B10001101); } + + void movmskps(const Reg32e& reg, const Xmm& xmm) { opModR(reg, xmm, 0x0F, B01010000); } + void movmskpd(const Reg32e& reg, const Xmm& xmm) { db(0x66); movmskps(reg, xmm); } + void movntps(const Address& addr, const Xmm& xmm) { opModM(addr, Mmx(xmm.getIdx()), 0x0F, B00101011); } + void movntdqa(const Xmm& xmm, const Address& addr) { db(0x66); opModM(addr, xmm, 0x0F, 0x38, 0x2A); } + void lddqu(const Xmm& xmm, const Address& addr) { db(0xF2); opModM(addr, xmm, 0x0F, B11110000); } + void movnti(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, B11000011); } + void movntq(const Address& addr, const Mmx& mmx) + { + if (!mmx.isMMX()) throw Error(ERR_BAD_COMBINATION); + opModM(addr, mmx, 0x0F, B11100111); + } + void crc32(const Reg32e& reg, const Operand& op) + { + if (reg.isBit(32) && op.isBit(16)) db(0x66); + db(0xF2); + opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1)); + } + void rdrand(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0f, 0xc7); } + void rdseed(const Reg& r) { if (r.isBit(8)) throw Error(ERR_BAD_SIZE_OF_REGISTER); opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0f, 0xc7); } + void rorx(const Reg32e& r, const Operand& op, uint8 imm) { opGpr(r, op, Reg32e(0, r.getBit()), MM_0F3A | PP_F2, 0xF0, false); db(imm); } + enum { NONE = 256 }; + CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0, Allocator *allocator = 0) + : CodeArray(maxSize, userPtr, allocator) + , mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7) + , xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7) + , ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7) + , xm0(xmm0), xm1(xmm1), xm2(xmm2), xm3(xmm3), xm4(xmm4), xm5(xmm5), xm6(xmm6), xm7(xmm7) // for my convenience + , ym0(ymm0), ym1(ymm1), ym2(ymm2), ym3(ymm3), ym4(ymm4), ym5(ymm5), ym6(ymm6), ym7(ymm7) // for my convenience + , eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI) + , ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI) + , al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH) + , ptr(0), byte(8), word(16), dword(32), qword(64) + , st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7) +#ifdef XBYAK64 + , rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15) + , r8d(Operand::R8D), r9d(Operand::R9D), r10d(Operand::R10D), r11d(Operand::R11D), r12d(Operand::R12D), r13d(Operand::R13D), r14d(Operand::R14D), r15d(Operand::R15D) + , r8w(Operand::R8W), r9w(Operand::R9W), r10w(Operand::R10W), r11w(Operand::R11W), r12w(Operand::R12W), r13w(Operand::R13W), r14w(Operand::R14W), r15w(Operand::R15W) + , r8b(Operand::R8B), r9b(Operand::R9B), r10b(Operand::R10B), r11b(Operand::R11B), r12b(Operand::R12B), r13b(Operand::R13B), r14b(Operand::R14B), r15b(Operand::R15B) + , spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true) + , xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15) + , ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15) + , xm8(xmm8), xm9(xmm9), xm10(xmm10), xm11(xmm11), xm12(xmm12), xm13(xmm13), xm14(xmm14), xm15(xmm15) // for my convenience + , ym8(ymm8), ym9(ymm9), ym10(ymm10), ym11(ymm11), ym12(ymm12), ym13(ymm13), ym14(ymm14), ym15(ymm15) // for my convenience + , rip() +#endif + { + labelMgr_.set(this); + } + void reset() + { + resetSize(); + labelMgr_.reset(); + labelMgr_.set(this); + } + bool hasUndefinedLabel() const { return labelMgr_.hasUndefSlabel() || labelMgr_.hasUndefClabel(); } + /* + call ready() to complete generating code on AutoGrow + */ + void ready() + { + if (hasUndefinedLabel()) throw Error(ERR_LABEL_IS_NOT_FOUND); + calcJmpAddress(); + } +#ifdef XBYAK_TEST + void dump(bool doClear = true) + { + CodeArray::dump(); + if (doClear) size_ = 0; + } +#endif + +#ifndef XBYAK_DONT_READ_LIST +#include "xbyak_mnemonic.h" + void align(int x = 16) + { + if (x == 1) return; + if (x < 1 || (x & (x - 1))) throw Error(ERR_BAD_ALIGN); + if (isAutoGrow() && x > (int)inner::ALIGN_PAGE_SIZE) fprintf(stderr, "warning:autoGrow mode does not support %d align\n", x); + while (size_t(getCurr()) % x) { + nop(); + } + } +#endif +}; + +namespace util { +static const Mmx mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7); +static const Xmm xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7); +static const Ymm ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7); +static const Reg32 eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI); +static const Reg16 ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI); +static const Reg8 al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH); +static const AddressFrame ptr(0), byte(8), word(16), dword(32), qword(64); +static const Fpu st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7); +#ifdef XBYAK64 +static const Reg64 rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15); +static const Reg32 r8d(Operand::R8D), r9d(Operand::R9D), r10d(Operand::R10D), r11d(Operand::R11D), r12d(Operand::R12D), r13d(Operand::R13D), r14d(Operand::R14D), r15d(Operand::R15D); +static const Reg16 r8w(Operand::R8W), r9w(Operand::R9W), r10w(Operand::R10W), r11w(Operand::R11W), r12w(Operand::R12W), r13w(Operand::R13W), r14w(Operand::R14W), r15w(Operand::R15W); +static const Reg8 r8b(Operand::R8B), r9b(Operand::R9B), r10b(Operand::R10B), r11b(Operand::R11B), r12b(Operand::R12B), r13b(Operand::R13B), r14b(Operand::R14B), r15b(Operand::R15B), spl(Operand::SPL, 1), bpl(Operand::BPL, 1), sil(Operand::SIL, 1), dil(Operand::DIL, 1); +static const Xmm xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15); +static const Ymm ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15); +static const RegRip rip; +#endif +} // util + +#ifdef _MSC_VER + #pragma warning(pop) +#endif + +} // end of namespace + +#endif // XBYAK_XBYAK_H_ diff --git a/src/cpu/mips3/x64/xbyak/xbyak_bin2hex.h b/src/cpu/mips3/x64/xbyak/xbyak_bin2hex.h new file mode 100644 index 000000000..69ecdbfed --- /dev/null +++ b/src/cpu/mips3/x64/xbyak/xbyak_bin2hex.h @@ -0,0 +1,258 @@ +enum { + B00000000= 0, + B00000001= 1, + B00000010= 2, + B00000011= 3, + B00000100= 4, + B00000101= 5, + B00000110= 6, + B00000111= 7, + B00001000= 8, + B00001001= 9, + B00001010= 10, + B00001011= 11, + B00001100= 12, + B00001101= 13, + B00001110= 14, + B00001111= 15, + B00010000= 16, + B00010001= 17, + B00010010= 18, + B00010011= 19, + B00010100= 20, + B00010101= 21, + B00010110= 22, + B00010111= 23, + B00011000= 24, + B00011001= 25, + B00011010= 26, + B00011011= 27, + B00011100= 28, + B00011101= 29, + B00011110= 30, + B00011111= 31, + B00100000= 32, + B00100001= 33, + B00100010= 34, + B00100011= 35, + B00100100= 36, + B00100101= 37, + B00100110= 38, + B00100111= 39, + B00101000= 40, + B00101001= 41, + B00101010= 42, + B00101011= 43, + B00101100= 44, + B00101101= 45, + B00101110= 46, + B00101111= 47, + B00110000= 48, + B00110001= 49, + B00110010= 50, + B00110011= 51, + B00110100= 52, + B00110101= 53, + B00110110= 54, + B00110111= 55, + B00111000= 56, + B00111001= 57, + B00111010= 58, + B00111011= 59, + B00111100= 60, + B00111101= 61, + B00111110= 62, + B00111111= 63, + B01000000= 64, + B01000001= 65, + B01000010= 66, + B01000011= 67, + B01000100= 68, + B01000101= 69, + B01000110= 70, + B01000111= 71, + B01001000= 72, + B01001001= 73, + B01001010= 74, + B01001011= 75, + B01001100= 76, + B01001101= 77, + B01001110= 78, + B01001111= 79, + B01010000= 80, + B01010001= 81, + B01010010= 82, + B01010011= 83, + B01010100= 84, + B01010101= 85, + B01010110= 86, + B01010111= 87, + B01011000= 88, + B01011001= 89, + B01011010= 90, + B01011011= 91, + B01011100= 92, + B01011101= 93, + B01011110= 94, + B01011111= 95, + B01100000= 96, + B01100001= 97, + B01100010= 98, + B01100011= 99, + B01100100= 100, + B01100101= 101, + B01100110= 102, + B01100111= 103, + B01101000= 104, + B01101001= 105, + B01101010= 106, + B01101011= 107, + B01101100= 108, + B01101101= 109, + B01101110= 110, + B01101111= 111, + B01110000= 112, + B01110001= 113, + B01110010= 114, + B01110011= 115, + B01110100= 116, + B01110101= 117, + B01110110= 118, + B01110111= 119, + B01111000= 120, + B01111001= 121, + B01111010= 122, + B01111011= 123, + B01111100= 124, + B01111101= 125, + B01111110= 126, + B01111111= 127, + B10000000= 128, + B10000001= 129, + B10000010= 130, + B10000011= 131, + B10000100= 132, + B10000101= 133, + B10000110= 134, + B10000111= 135, + B10001000= 136, + B10001001= 137, + B10001010= 138, + B10001011= 139, + B10001100= 140, + B10001101= 141, + B10001110= 142, + B10001111= 143, + B10010000= 144, + B10010001= 145, + B10010010= 146, + B10010011= 147, + B10010100= 148, + B10010101= 149, + B10010110= 150, + B10010111= 151, + B10011000= 152, + B10011001= 153, + B10011010= 154, + B10011011= 155, + B10011100= 156, + B10011101= 157, + B10011110= 158, + B10011111= 159, + B10100000= 160, + B10100001= 161, + B10100010= 162, + B10100011= 163, + B10100100= 164, + B10100101= 165, + B10100110= 166, + B10100111= 167, + B10101000= 168, + B10101001= 169, + B10101010= 170, + B10101011= 171, + B10101100= 172, + B10101101= 173, + B10101110= 174, + B10101111= 175, + B10110000= 176, + B10110001= 177, + B10110010= 178, + B10110011= 179, + B10110100= 180, + B10110101= 181, + B10110110= 182, + B10110111= 183, + B10111000= 184, + B10111001= 185, + B10111010= 186, + B10111011= 187, + B10111100= 188, + B10111101= 189, + B10111110= 190, + B10111111= 191, + B11000000= 192, + B11000001= 193, + B11000010= 194, + B11000011= 195, + B11000100= 196, + B11000101= 197, + B11000110= 198, + B11000111= 199, + B11001000= 200, + B11001001= 201, + B11001010= 202, + B11001011= 203, + B11001100= 204, + B11001101= 205, + B11001110= 206, + B11001111= 207, + B11010000= 208, + B11010001= 209, + B11010010= 210, + B11010011= 211, + B11010100= 212, + B11010101= 213, + B11010110= 214, + B11010111= 215, + B11011000= 216, + B11011001= 217, + B11011010= 218, + B11011011= 219, + B11011100= 220, + B11011101= 221, + B11011110= 222, + B11011111= 223, + B11100000= 224, + B11100001= 225, + B11100010= 226, + B11100011= 227, + B11100100= 228, + B11100101= 229, + B11100110= 230, + B11100111= 231, + B11101000= 232, + B11101001= 233, + B11101010= 234, + B11101011= 235, + B11101100= 236, + B11101101= 237, + B11101110= 238, + B11101111= 239, + B11110000= 240, + B11110001= 241, + B11110010= 242, + B11110011= 243, + B11110100= 244, + B11110101= 245, + B11110110= 246, + B11110111= 247, + B11111000= 248, + B11111001= 249, + B11111010= 250, + B11111011= 251, + B11111100= 252, + B11111101= 253, + B11111110= 254, + B11111111= 255 +}; diff --git a/src/cpu/mips3/x64/xbyak/xbyak_mnemonic.h b/src/cpu/mips3/x64/xbyak/xbyak_mnemonic.h new file mode 100644 index 000000000..d460e4cef --- /dev/null +++ b/src/cpu/mips3/x64/xbyak/xbyak_mnemonic.h @@ -0,0 +1,1461 @@ +const char *getVersionString() const { return "4.71"; } +void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); } +void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); } +void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); } +void pand(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDB); } +void pandn(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDF); } +void pmaddwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF5); } +void pmulhuw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE4); } +void pmulhw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE5); } +void pmullw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD5); } +void por(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEB); } +void punpckhbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x68); } +void punpckhwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x69); } +void punpckhdq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6A); } +void punpcklbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x60); } +void punpcklwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x61); } +void punpckldq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x62); } +void pxor(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEF); } +void pavgb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE0); } +void pavgw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE3); } +void pmaxsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEE); } +void pmaxub(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDE); } +void pminsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEA); } +void pminub(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDA); } +void psadbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF6); } +void paddq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD4); } +void pmuludq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF4); } +void psubq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFB); } +void paddb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFC); } +void paddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFD); } +void paddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFE); } +void paddsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEC); } +void paddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xED); } +void paddusb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDC); } +void paddusw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDD); } +void pcmpeqb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x74); } +void pcmpeqw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x75); } +void pcmpeqd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x76); } +void pcmpgtb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x64); } +void pcmpgtw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x65); } +void pcmpgtd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x66); } +void psllw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF1); } +void pslld(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF2); } +void psllq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF3); } +void psraw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE1); } +void psrad(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE2); } +void psrlw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD1); } +void psrld(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD2); } +void psrlq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD3); } +void psubb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF8); } +void psubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF9); } +void psubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFA); } +void psubsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE8); } +void psubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE9); } +void psubusb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD8); } +void psubusw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD9); } +void psllw(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 6); } +void pslld(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 6); } +void psllq(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x73, 6); } +void psraw(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 4); } +void psrad(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 4); } +void psrlw(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 2); } +void psrld(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 2); } +void psrlq(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x73, 2); } +void pslldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x73, 7); } +void psrldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x73, 3); } +void pshufw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0x00, imm8); } +void pshuflw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0xF2, imm8); } +void pshufhw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0xF3, imm8); } +void pshufd(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0x66, imm8); } +void movdqa(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0x66); } +void movdqa(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x7F); } +void movdqu(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0xF3); } +void movdqu(const Address& addr, const Xmm& xmm) { db(0xF3); opModM(addr, xmm, 0x0F, 0x7F); } +void movaps(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, 0x100); } +void movaps(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x29); } +void movss(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0xF3); } +void movss(const Address& addr, const Xmm& xmm) { db(0xF3); opModM(addr, xmm, 0x0F, 0x11); } +void movups(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0x100); } +void movups(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x11); } +void movapd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, 0x66); } +void movapd(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x29); } +void movsd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0xF2); } +void movsd(const Address& addr, const Xmm& xmm) { db(0xF2); opModM(addr, xmm, 0x0F, 0x11); } +void movupd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0x66); } +void movupd(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x11); } +void addps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0x100, isXMM_XMMorMEM); } +void addss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0xF3, isXMM_XMMorMEM); } +void addpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0x66, isXMM_XMMorMEM); } +void addsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0xF2, isXMM_XMMorMEM); } +void andnps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x100, isXMM_XMMorMEM); } +void andnpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x66, isXMM_XMMorMEM); } +void andps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x100, isXMM_XMMorMEM); } +void andpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x66, isXMM_XMMorMEM); } +void cmpps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); } +void cmpss(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); } +void cmppd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); } +void cmpsd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); } +void divps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0x100, isXMM_XMMorMEM); } +void divss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF3, isXMM_XMMorMEM); } +void divpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0x66, isXMM_XMMorMEM); } +void divsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF2, isXMM_XMMorMEM); } +void maxps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x100, isXMM_XMMorMEM); } +void maxss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0xF3, isXMM_XMMorMEM); } +void maxpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x66, isXMM_XMMorMEM); } +void maxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0xF2, isXMM_XMMorMEM); } +void minps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0x100, isXMM_XMMorMEM); } +void minss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF3, isXMM_XMMorMEM); } +void minpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0x66, isXMM_XMMorMEM); } +void minsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF2, isXMM_XMMorMEM); } +void mulps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0x100, isXMM_XMMorMEM); } +void mulss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF3, isXMM_XMMorMEM); } +void mulpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0x66, isXMM_XMMorMEM); } +void mulsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF2, isXMM_XMMorMEM); } +void orps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x100, isXMM_XMMorMEM); } +void orpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x66, isXMM_XMMorMEM); } +void rcpps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x53, 0x100, isXMM_XMMorMEM); } +void rcpss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x53, 0xF3, isXMM_XMMorMEM); } +void rsqrtps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x52, 0x100, isXMM_XMMorMEM); } +void rsqrtss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x52, 0xF3, isXMM_XMMorMEM); } +void shufps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC6, 0x100, isXMM_XMMorMEM, imm8); } +void shufpd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC6, 0x66, isXMM_XMMorMEM, imm8); } +void sqrtps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0x100, isXMM_XMMorMEM); } +void sqrtss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0xF3, isXMM_XMMorMEM); } +void sqrtpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0x66, isXMM_XMMorMEM); } +void sqrtsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0xF2, isXMM_XMMorMEM); } +void subps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x100, isXMM_XMMorMEM); } +void subss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF3, isXMM_XMMorMEM); } +void subpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x66, isXMM_XMMorMEM); } +void subsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF2, isXMM_XMMorMEM); } +void unpckhps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x100, isXMM_XMMorMEM); } +void unpckhpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM); } +void unpcklps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x100, isXMM_XMMorMEM); } +void unpcklpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM); } +void xorps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x57, 0x100, isXMM_XMMorMEM); } +void xorpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x57, 0x66, isXMM_XMMorMEM); } +void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { db(0x66); opModR(reg1, reg2, 0x0F, 0xF7); } +void movhlps(const Xmm& reg1, const Xmm& reg2) { opModR(reg1, reg2, 0x0F, 0x12); } +void movlhps(const Xmm& reg1, const Xmm& reg2) { opModR(reg1, reg2, 0x0F, 0x16); } +void punpckhqdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x6D, 0x66, isXMM_XMMorMEM); } +void punpcklqdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x6C, 0x66, isXMM_XMMorMEM); } +void comiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2F, 0x100, isXMM_XMMorMEM); } +void ucomiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x100, isXMM_XMMorMEM); } +void comisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2F, 0x66, isXMM_XMMorMEM); } +void ucomisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x66, isXMM_XMMorMEM); } +void cvtpd2ps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0x66, isXMM_XMMorMEM); } +void cvtps2pd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0x100, isXMM_XMMorMEM); } +void cvtsd2ss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0xF2, isXMM_XMMorMEM); } +void cvtss2sd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0xF3, isXMM_XMMorMEM); } +void cvtpd2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xE6, 0xF2, isXMM_XMMorMEM); } +void cvttpd2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xE6, 0x66, isXMM_XMMorMEM); } +void cvtdq2pd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xE6, 0xF3, isXMM_XMMorMEM); } +void cvtps2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5B, 0x66, isXMM_XMMorMEM); } +void cvttps2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5B, 0xF3, isXMM_XMMorMEM); } +void cvtdq2ps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5B, 0x100, isXMM_XMMorMEM); } +void addsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xD0, 0x66, isXMM_XMMorMEM); } +void addsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xD0, 0xF2, isXMM_XMMorMEM); } +void haddpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0x66, isXMM_XMMorMEM); } +void haddps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0xF2, isXMM_XMMorMEM); } +void hsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0x66, isXMM_XMMorMEM); } +void hsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0xF2, isXMM_XMMorMEM); } +void movddup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x12, 0xF2, isXMM_XMMorMEM); } +void movshdup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x16, 0xF3, isXMM_XMMorMEM); } +void movsldup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x12, 0xF3, isXMM_XMMorMEM); } +void cvtpi2ps(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0x100, isXMM_MMXorMEM); } +void cvtps2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0x100, isMMX_XMMorMEM); } +void cvtsi2ss(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0xF3, isXMM_REG32orMEM); } +void cvtss2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0xF3, isREG32_XMMorMEM); } +void cvttps2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0x100, isMMX_XMMorMEM); } +void cvttss2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0xF3, isREG32_XMMorMEM); } +void cvtpi2pd(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0x66, isXMM_MMXorMEM); } +void cvtpd2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0x66, isMMX_XMMorMEM); } +void cvtsi2sd(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0xF2, isXMM_REG32orMEM); } +void cvtsd2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0xF2, isREG32_XMMorMEM); } +void cvttpd2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0x66, isMMX_XMMorMEM); } +void cvttsd2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0xF2, isREG32_XMMorMEM); } +void prefetcht0(const Address& addr) { opModM(addr, Reg32(1), 0x0F, B00011000); } +void prefetcht1(const Address& addr) { opModM(addr, Reg32(2), 0x0F, B00011000); } +void prefetcht2(const Address& addr) { opModM(addr, Reg32(3), 0x0F, B00011000); } +void prefetchnta(const Address& addr) { opModM(addr, Reg32(0), 0x0F, B00011000); } +void movhps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x16, 0x100); } +void movlps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x12, 0x100); } +void movhpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x16, 0x66); } +void movlpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x12, 0x66); } +void cmovo(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 0); } +void jo(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x70, 0x80, 0x0F); } +void jo(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x70, 0x80, 0x0F); } +void seto(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 0); } +void cmovno(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 1); } +void jno(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x71, 0x81, 0x0F); } +void jno(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x71, 0x81, 0x0F); } +void setno(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 1); } +void cmovb(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); } +void jb(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); } +void jb(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); } +void setb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 2); } +void cmovc(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); } +void jc(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); } +void jc(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); } +void setc(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 2); } +void cmovnae(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); } +void jnae(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); } +void jnae(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); } +void setnae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 2); } +void cmovnb(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); } +void jnb(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); } +void jnb(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); } +void setnb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 3); } +void cmovae(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); } +void jae(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); } +void jae(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); } +void setae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 3); } +void cmovnc(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); } +void jnc(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); } +void jnc(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); } +void setnc(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 3); } +void cmove(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 4); } +void je(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); } +void je(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); } +void sete(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 4); } +void cmovz(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 4); } +void jz(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); } +void jz(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); } +void setz(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 4); } +void cmovne(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 5); } +void jne(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); } +void jne(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); } +void setne(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 5); } +void cmovnz(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 5); } +void jnz(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); } +void jnz(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); } +void setnz(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 5); } +void cmovbe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 6); } +void jbe(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); } +void jbe(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); } +void setbe(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 6); } +void cmovna(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 6); } +void jna(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); } +void jna(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); } +void setna(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 6); } +void cmovnbe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 7); } +void jnbe(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); } +void jnbe(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); } +void setnbe(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 7); } +void cmova(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 7); } +void ja(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); } +void ja(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); } +void seta(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 7); } +void cmovs(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 8); } +void js(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x78, 0x88, 0x0F); } +void js(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x78, 0x88, 0x0F); } +void sets(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 8); } +void cmovns(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 9); } +void jns(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x79, 0x89, 0x0F); } +void jns(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x79, 0x89, 0x0F); } +void setns(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 9); } +void cmovp(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 10); } +void jp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); } +void jp(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); } +void setp(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 10); } +void cmovpe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 10); } +void jpe(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); } +void jpe(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); } +void setpe(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 10); } +void cmovnp(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 11); } +void jnp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); } +void jnp(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); } +void setnp(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 11); } +void cmovpo(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 11); } +void jpo(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); } +void jpo(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); } +void setpo(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 11); } +void cmovl(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 12); } +void jl(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); } +void jl(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); } +void setl(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 12); } +void cmovnge(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 12); } +void jnge(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); } +void jnge(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); } +void setnge(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 12); } +void cmovnl(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 13); } +void jnl(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); } +void jnl(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); } +void setnl(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 13); } +void cmovge(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 13); } +void jge(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); } +void jge(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); } +void setge(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 13); } +void cmovle(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 14); } +void jle(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); } +void jle(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); } +void setle(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 14); } +void cmovng(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 14); } +void jng(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); } +void jng(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); } +void setng(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 14); } +void cmovnle(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 15); } +void jnle(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); } +void jnle(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); } +void setnle(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 15); } +void cmovg(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 15); } +void jg(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); } +void jg(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); } +void setg(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 15); } +#ifdef XBYAK32 +void jcxz(std::string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } +void jcxz(const Label& label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } +void jecxz(std::string label) { opJmp(label, T_SHORT, 0xe3, 0, 0); } +void jecxz(const Label& label) { opJmp(label, T_SHORT, 0xe3, 0, 0); } +#else +void jecxz(std::string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } +void jecxz(const Label& label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } +void jrcxz(std::string label) { opJmp(label, T_SHORT, 0xe3, 0, 0); } +void jrcxz(const Label& label) { opJmp(label, T_SHORT, 0xe3, 0, 0); } +#endif +#ifdef XBYAK64 +void cdqe() { db(0x48); db(0x98); } +void cqo() { db(0x48); db(0x99); } +#else +void aaa() { db(0x37); } +void aad() { db(0xD5); db(0x0A); } +void aam() { db(0xD4); db(0x0A); } +void aas() { db(0x3F); } +void daa() { db(0x27); } +void das() { db(0x2F); } +void popad() { db(0x61); } +void popfd() { db(0x9D); } +void pusha() { db(0x60); } +void pushad() { db(0x60); } +void pushfd() { db(0x9C); } +void popa() { db(0x61); } +#endif +void cbw() { db(0x66); db(0x98); } +void cdq() { db(0x99); } +void clc() { db(0xF8); } +void cld() { db(0xFC); } +void cli() { db(0xFA); } +void cmc() { db(0xF5); } +void cpuid() { db(0x0F); db(0xA2); } +void cwd() { db(0x66); db(0x99); } +void cwde() { db(0x98); } +void lahf() { db(0x9F); } +void lock() { db(0xF0); } +void nop() { db(0x90); } +void sahf() { db(0x9E); } +void stc() { db(0xF9); } +void std() { db(0xFD); } +void sti() { db(0xFB); } +void emms() { db(0x0F); db(0x77); } +void pause() { db(0xF3); db(0x90); } +void sfence() { db(0x0F); db(0xAE); db(0xF8); } +void lfence() { db(0x0F); db(0xAE); db(0xE8); } +void mfence() { db(0x0F); db(0xAE); db(0xF0); } +void monitor() { db(0x0F); db(0x01); db(0xC8); } +void mwait() { db(0x0F); db(0x01); db(0xC9); } +void rdmsr() { db(0x0F); db(0x32); } +void rdpmc() { db(0x0F); db(0x33); } +void rdtsc() { db(0x0F); db(0x31); } +void rdtscp() { db(0x0F); db(0x01); db(0xF9); } +void ud2() { db(0x0F); db(0x0B); } +void wait() { db(0x9B); } +void fwait() { db(0x9B); } +void wbinvd() { db(0x0F); db(0x09); } +void wrmsr() { db(0x0F); db(0x30); } +void xlatb() { db(0xD7); } +void popf() { db(0x9D); } +void pushf() { db(0x9C); } +void stac() { db(0x0F); db(0x01); db(0xCB); } +void vzeroall() { db(0xC5); db(0xFC); db(0x77); } +void vzeroupper() { db(0xC5); db(0xF8); db(0x77); } +void xgetbv() { db(0x0F); db(0x01); db(0xD0); } +void f2xm1() { db(0xD9); db(0xF0); } +void fabs() { db(0xD9); db(0xE1); } +void faddp() { db(0xDE); db(0xC1); } +void fchs() { db(0xD9); db(0xE0); } +void fcom() { db(0xD8); db(0xD1); } +void fcomp() { db(0xD8); db(0xD9); } +void fcompp() { db(0xDE); db(0xD9); } +void fcos() { db(0xD9); db(0xFF); } +void fdecstp() { db(0xD9); db(0xF6); } +void fdivp() { db(0xDE); db(0xF9); } +void fdivrp() { db(0xDE); db(0xF1); } +void fincstp() { db(0xD9); db(0xF7); } +void finit() { db(0x9B); db(0xDB); db(0xE3); } +void fninit() { db(0xDB); db(0xE3); } +void fld1() { db(0xD9); db(0xE8); } +void fldl2t() { db(0xD9); db(0xE9); } +void fldl2e() { db(0xD9); db(0xEA); } +void fldpi() { db(0xD9); db(0xEB); } +void fldlg2() { db(0xD9); db(0xEC); } +void fldln2() { db(0xD9); db(0xED); } +void fldz() { db(0xD9); db(0xEE); } +void fmulp() { db(0xDE); db(0xC9); } +void fnop() { db(0xD9); db(0xD0); } +void fpatan() { db(0xD9); db(0xF3); } +void fprem() { db(0xD9); db(0xF8); } +void fprem1() { db(0xD9); db(0xF5); } +void fptan() { db(0xD9); db(0xF2); } +void frndint() { db(0xD9); db(0xFC); } +void fscale() { db(0xD9); db(0xFD); } +void fsin() { db(0xD9); db(0xFE); } +void fsincos() { db(0xD9); db(0xFB); } +void fsqrt() { db(0xD9); db(0xFA); } +void fsubp() { db(0xDE); db(0xE9); } +void fsubrp() { db(0xDE); db(0xE1); } +void ftst() { db(0xD9); db(0xE4); } +void fucom() { db(0xDD); db(0xE1); } +void fucomp() { db(0xDD); db(0xE9); } +void fucompp() { db(0xDA); db(0xE9); } +void fxam() { db(0xD9); db(0xE5); } +void fxch() { db(0xD9); db(0xC9); } +void fxtract() { db(0xD9); db(0xF4); } +void fyl2x() { db(0xD9); db(0xF1); } +void fyl2xp1() { db(0xD9); db(0xF9); } +void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); } +void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); } +void add(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x00); } +void add(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x00, 0); } +void and_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x20); } +void and_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x20, 4); } +#ifndef XBYAK_NO_OP_NAMES +void and(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x20); } +void and(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x20, 4); } +#endif +void cmp(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x38); } +void cmp(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x38, 7); } +void or_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); } +void or_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x08, 1); } +#ifndef XBYAK_NO_OP_NAMES +void or(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); } +void or(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x08, 1); } +#endif +void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); } +void sbb(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x18, 3); } +void sub(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x28); } +void sub(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x28, 5); } +void xor_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x30); } +void xor_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x30, 6); } +#ifndef XBYAK_NO_OP_NAMES +void xor(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x30); } +void xor(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x30, 6); } +#endif +void dec(const Operand& op) { opIncDec(op, 0x48, 1); } +void inc(const Operand& op) { opIncDec(op, 0x40, 0); } +void bt(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xa3); } +void bt(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 4, 0x0f, 0xba); db(imm); } +void bts(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xab); } +void bts(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 5, 0x0f, 0xba); db(imm); } +void btr(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xb3); } +void btr(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 6, 0x0f, 0xba); db(imm); } +void btc(const Operand& op, const Reg& reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xbb); } +void btc(const Operand& op, uint8 imm) { opR_ModM(op, 16|32|64, 7, 0x0f, 0xba); db(imm); } +void div(const Operand& op) { opR_ModM(op, 0, 6, 0xF6); } +void idiv(const Operand& op) { opR_ModM(op, 0, 7, 0xF6); } +void imul(const Operand& op) { opR_ModM(op, 0, 5, 0xF6); } +void mul(const Operand& op) { opR_ModM(op, 0, 4, 0xF6); } +void neg(const Operand& op) { opR_ModM(op, 0, 3, 0xF6); } +void not_(const Operand& op) { opR_ModM(op, 0, 2, 0xF6); } +#ifndef XBYAK_NO_OP_NAMES +void not(const Operand& op) { opR_ModM(op, 0, 2, 0xF6); } +#endif +void rcl(const Operand& op, int imm) { opShift(op, imm, 2); } +void rcl(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 2); } +void rcr(const Operand& op, int imm) { opShift(op, imm, 3); } +void rcr(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 3); } +void rol(const Operand& op, int imm) { opShift(op, imm, 0); } +void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 0); } +void ror(const Operand& op, int imm) { opShift(op, imm, 1); } +void ror(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 1); } +void sar(const Operand& op, int imm) { opShift(op, imm, 7); } +void sar(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 7); } +void shl(const Operand& op, int imm) { opShift(op, imm, 4); } +void shl(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 4); } +void shr(const Operand& op, int imm) { opShift(op, imm, 5); } +void shr(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 5); } +void sal(const Operand& op, int imm) { opShift(op, imm, 4); } +void sal(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 4); } +void shld(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0xA4); } +void shld(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0xA4, &_cl); } +void shrd(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0xAC); } +void shrd(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(op, reg, 0, 0xAC, &_cl); } +void bsf(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBC); } +void bsr(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBD); } +void popcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xB8); } +void tzcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBC); } +void lzcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBD); } +void pshufb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x00, 0x66, NONE, 0x38); } +void phaddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x01, 0x66, NONE, 0x38); } +void phaddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x02, 0x66, NONE, 0x38); } +void phaddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x03, 0x66, NONE, 0x38); } +void pmaddubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x04, 0x66, NONE, 0x38); } +void phsubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x05, 0x66, NONE, 0x38); } +void phsubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x06, 0x66, NONE, 0x38); } +void phsubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x07, 0x66, NONE, 0x38); } +void psignb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x08, 0x66, NONE, 0x38); } +void psignw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x09, 0x66, NONE, 0x38); } +void psignd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0A, 0x66, NONE, 0x38); } +void pmulhrsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0B, 0x66, NONE, 0x38); } +void pabsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1C, 0x66, NONE, 0x38); } +void pabsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1D, 0x66, NONE, 0x38); } +void pabsd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1E, 0x66, NONE, 0x38); } +void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast(imm), 0x3a); } +void blendvpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void blendvps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void packusdw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2B, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pblendvb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x10, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pcmpeqq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x29, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void ptest(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x17, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pmovsxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x20, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pmovsxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pmovsxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x22, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pmovsxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x23, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pmovsxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x24, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pmovsxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x25, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pmovzxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x30, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pmovzxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x31, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pmovzxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x32, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pmovzxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x33, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pmovzxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x34, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pmovzxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x35, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pminsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x38, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pminsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x39, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pminuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3A, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pminud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3B, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pmaxsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3C, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pmaxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3D, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pmaxuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3E, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pmaxud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3F, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pmuldq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x28, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pmulld(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void phminposuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void pcmpgtq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x37, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void aesdec(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDE, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void aesdeclast(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDF, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void aesenc(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDC, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void aesenclast(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDD, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void aesimc(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xDB, 0x66, isXMM_XMMorMEM, NONE, 0x38); } +void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void dpps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void mpsadbw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x42, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void pblendw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0E, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void roundps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x08, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void roundpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x09, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void roundss(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0A, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void roundsd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0B, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void pcmpestrm(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x60, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void pcmpestri(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x61, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void pcmpistrm(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x62, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void pcmpistri(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x63, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void pclmulqdq(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x44, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void aeskeygenassist(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xDF, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void pclmullqlqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x00); } +void pclmulhqlqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x01); } +void pclmullqhdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x10); } +void pclmulhqhdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x11); } +void ldmxcsr(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0xAE); } +void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); } +void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); } +void fldcw(const Address& addr) { opModM(addr, Reg32(5), 0xD9, 0x100); } +void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, NONE); } +void movntpd(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0x2B); } +void movntdq(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0xE7); } +void movsx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xBE); } +void movzx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xB6); } +void fadd(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 0, 0); } +void fiadd(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 0, 0); } +void fcom(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 2, 0); } +void fcomp(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 3, 0); } +void fdiv(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 6, 0); } +void fidiv(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 6, 0); } +void fdivr(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 7, 0); } +void fidivr(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 7, 0); } +void ficom(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 2, 0); } +void ficomp(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 3, 0); } +void fild(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDF, 0, 5); } +void fist(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0x00, 2, 0); } +void fistp(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDF, 3, 7); } +void fisttp(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDD, 1, 0); } +void fld(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 0, 0); } +void fmul(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 1, 0); } +void fimul(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 1, 0); } +void fst(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 2, 0); } +void fstp(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 3, 0); } +void fsub(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 4, 0); } +void fisub(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 4, 0); } +void fsubr(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 5, 0); } +void fisubr(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 5, 0); } +void fadd(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C0, 0xDCC0); } +void fadd(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8C0, 0xDCC0); } +void faddp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC0); } +void faddp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC0); } +void fcmovb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC0, 0x00C0); } +void fcmovb(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAC0, 0x00C0); } +void fcmove(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC8, 0x00C8); } +void fcmove(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAC8, 0x00C8); } +void fcmovbe(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAD0, 0x00D0); } +void fcmovbe(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAD0, 0x00D0); } +void fcmovu(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAD8, 0x00D8); } +void fcmovu(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAD8, 0x00D8); } +void fcmovnb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBC0, 0x00C0); } +void fcmovnb(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBC0, 0x00C0); } +void fcmovne(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBC8, 0x00C8); } +void fcmovne(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBC8, 0x00C8); } +void fcmovnbe(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBD0, 0x00D0); } +void fcmovnbe(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBD0, 0x00D0); } +void fcmovnu(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBD8, 0x00D8); } +void fcmovnu(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBD8, 0x00D8); } +void fcomi(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBF0, 0x00F0); } +void fcomi(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBF0, 0x00F0); } +void fcomip(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDFF0, 0x00F0); } +void fcomip(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDFF0, 0x00F0); } +void fucomi(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBE8, 0x00E8); } +void fucomi(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBE8, 0x00E8); } +void fucomip(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDFE8, 0x00E8); } +void fucomip(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDFE8, 0x00E8); } +void fdiv(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8F0, 0xDCF8); } +void fdiv(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8F0, 0xDCF8); } +void fdivp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEF8); } +void fdivp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEF8); } +void fdivr(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8F8, 0xDCF0); } +void fdivr(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8F8, 0xDCF0); } +void fdivrp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEF0); } +void fdivrp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEF0); } +void fmul(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C8, 0xDCC8); } +void fmul(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8C8, 0xDCC8); } +void fmulp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC8); } +void fmulp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC8); } +void fsub(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E0, 0xDCE8); } +void fsub(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E0, 0xDCE8); } +void fsubp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEE8); } +void fsubp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEE8); } +void fsubr(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E8, 0xDCE0); } +void fsubr(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E8, 0xDCE0); } +void fsubrp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEE0); } +void fsubrp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEE0); } +void fcom(const Fpu& reg) { opFpu(reg, 0xD8, 0xD0); } +void fcomp(const Fpu& reg) { opFpu(reg, 0xD8, 0xD8); } +void ffree(const Fpu& reg) { opFpu(reg, 0xDD, 0xC0); } +void fld(const Fpu& reg) { opFpu(reg, 0xD9, 0xC0); } +void fst(const Fpu& reg) { opFpu(reg, 0xDD, 0xD0); } +void fstp(const Fpu& reg) { opFpu(reg, 0xDD, 0xD8); } +void fucom(const Fpu& reg) { opFpu(reg, 0xDD, 0xE0); } +void fucomp(const Fpu& reg) { opFpu(reg, 0xDD, 0xE8); } +void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); } +void vaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x58, true); } +void vaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x58, true); } +void vaddsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x58, false); } +void vaddss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x58, false); } +void vsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x5C, true); } +void vsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x5C, true); } +void vsubsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x5C, false); } +void vsubss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x5C, false); } +void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x59, true); } +void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x59, true); } +void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x59, false); } +void vmulss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x59, false); } +void vdivpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x5E, true); } +void vdivps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x5E, true); } +void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x5E, false); } +void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x5E, false); } +void vmaxpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x5F, true); } +void vmaxps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x5F, true); } +void vmaxsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x5F, false); } +void vmaxss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x5F, false); } +void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x5D, true); } +void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x5D, true); } +void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x5D, false); } +void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F3, 0x5D, false); } +void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x54, true); } +void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x54, true); } +void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x55, true); } +void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x55, true); } +void vorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x56, true); } +void vorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x56, true); } +void vxorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x57, true); } +void vxorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F, 0x57, true); } +void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x0D, true, 0); db(imm); } +void vblendpd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0D, true, 0); db(imm); } +void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x0C, true, 0); db(imm); } +void vblendps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0C, true, 0); db(imm); } +void vdppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x41, false, 0); db(imm); } +void vdppd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x41, false, 0); db(imm); } +void vdpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); } +void vdpps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); } +void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x42, true, 0); db(imm); } +void vmpsadbw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x42, true, 0); db(imm); } +void vpblendw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x0E, true, 0); db(imm); } +void vpblendw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0E, true, 0); db(imm); } +void vpblendd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x02, true, 0); db(imm); } +void vpblendd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x02, true, 0); db(imm); } +void vroundsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); } +void vroundsd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); } +void vroundss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x0A, false, 0); db(imm); } +void vroundss(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0A, false, 0); db(imm); } +void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x44, false, 0); db(imm); } +void vpclmulqdq(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x44, false, 0); db(imm); } +void vpermilps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x0C, true, 0); } +void vpermilpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x0D, true, 0); } +void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x47, true, 0); } +void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x47, true, 1); } +void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x46, true, 0); } +void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x45, true, 0); } +void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x45, true, 1); } +void vcmppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xC2, true, -1); db(imm); } +void vcmppd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xC2, true, -1); db(imm); } +void vcmpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F, 0xC2, true, -1); db(imm); } +void vcmpps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F, 0xC2, true, -1); db(imm); } +void vcmpsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F2, 0xC2, false, -1); db(imm); } +void vcmpsd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F2, 0xC2, false, -1); db(imm); } +void vcmpss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F3, 0xC2, false, -1); db(imm); } +void vcmpss(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0xC2, false, -1); db(imm); } +void vcvtsd2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F2, 0x5A, false, -1); } +void vcvtsd2ss(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F2, 0x5A, false, -1); } +void vcvtss2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F3, 0x5A, false, -1); } +void vcvtss2sd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x5A, false, -1); } +void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x21, false, 0); db(imm); } +void vinsertps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x21, false, 0); db(imm); } +void vpacksswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x63, true, -1); } +void vpacksswb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x63, true, -1); } +void vpackssdw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x6B, true, -1); } +void vpackssdw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6B, true, -1); } +void vpackuswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x67, true, -1); } +void vpackuswb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x67, true, -1); } +void vpackusdw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x2B, true, -1); } +void vpackusdw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x2B, true, -1); } +void vpaddb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xFC, true, -1); } +void vpaddb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFC, true, -1); } +void vpaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xFD, true, -1); } +void vpaddw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFD, true, -1); } +void vpaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xFE, true, -1); } +void vpaddd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFE, true, -1); } +void vpaddq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xD4, true, -1); } +void vpaddq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD4, true, -1); } +void vpaddsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xEC, true, -1); } +void vpaddsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEC, true, -1); } +void vpaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xED, true, -1); } +void vpaddsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xED, true, -1); } +void vpaddusb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xDC, true, -1); } +void vpaddusb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDC, true, -1); } +void vpaddusw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xDD, true, -1); } +void vpaddusw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDD, true, -1); } +void vpalignr(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x0F, true, -1); db(imm); } +void vpalignr(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0F, true, -1); db(imm); } +void vpand(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xDB, true, -1); } +void vpand(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDB, true, -1); } +void vpandn(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xDF, true, -1); } +void vpandn(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDF, true, -1); } +void vpavgb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xE0, true, -1); } +void vpavgb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE0, true, -1); } +void vpavgw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xE3, true, -1); } +void vpavgw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE3, true, -1); } +void vpcmpeqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x74, true, -1); } +void vpcmpeqb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x74, true, -1); } +void vpcmpeqw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x75, true, -1); } +void vpcmpeqw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x75, true, -1); } +void vpcmpeqd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x76, true, -1); } +void vpcmpeqd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x76, true, -1); } +void vpcmpeqq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x29, true, -1); } +void vpcmpeqq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x29, true, -1); } +void vpcmpgtb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x64, true, -1); } +void vpcmpgtb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x64, true, -1); } +void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x65, true, -1); } +void vpcmpgtw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x65, true, -1); } +void vpcmpgtd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x66, true, -1); } +void vpcmpgtd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x66, true, -1); } +void vpcmpgtq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x37, true, -1); } +void vpcmpgtq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x37, true, -1); } +void vphaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x01, true, -1); } +void vphaddw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x01, true, -1); } +void vphaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x02, true, -1); } +void vphaddd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x02, true, -1); } +void vphaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x03, true, -1); } +void vphaddsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x03, true, -1); } +void vphsubw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x05, true, -1); } +void vphsubw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x05, true, -1); } +void vphsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x06, true, -1); } +void vphsubd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x06, true, -1); } +void vphsubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x07, true, -1); } +void vphsubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x07, true, -1); } +void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xF5, true, -1); } +void vpmaddwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF5, true, -1); } +void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x04, true, -1); } +void vpmaddubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x04, true, -1); } +void vpmaxsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x3C, true, -1); } +void vpmaxsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3C, true, -1); } +void vpmaxsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xEE, true, -1); } +void vpmaxsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEE, true, -1); } +void vpmaxsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x3D, true, -1); } +void vpmaxsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3D, true, -1); } +void vpmaxub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xDE, true, -1); } +void vpmaxub(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDE, true, -1); } +void vpmaxuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x3E, true, -1); } +void vpmaxuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3E, true, -1); } +void vpmaxud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x3F, true, -1); } +void vpmaxud(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3F, true, -1); } +void vpminsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x38, true, -1); } +void vpminsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x38, true, -1); } +void vpminsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xEA, true, -1); } +void vpminsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEA, true, -1); } +void vpminsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x39, true, -1); } +void vpminsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x39, true, -1); } +void vpminub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xDA, true, -1); } +void vpminub(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDA, true, -1); } +void vpminuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x3A, true, -1); } +void vpminuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3A, true, -1); } +void vpminud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x3B, true, -1); } +void vpminud(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3B, true, -1); } +void vpmulhuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xE4, true, -1); } +void vpmulhuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE4, true, -1); } +void vpmulhrsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x0B, true, -1); } +void vpmulhrsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x0B, true, -1); } +void vpmulhw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xE5, true, -1); } +void vpmulhw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE5, true, -1); } +void vpmullw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xD5, true, -1); } +void vpmullw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD5, true, -1); } +void vpmulld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x40, true, -1); } +void vpmulld(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x40, true, -1); } +void vpmuludq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xF4, false, -1); } +void vpmuludq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF4, false, -1); } +void vpmuldq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x28, true, -1); } +void vpmuldq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x28, true, -1); } +void vpor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xEB, true, -1); } +void vpor(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEB, true, -1); } +void vpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xF6, true, -1); } +void vpsadbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF6, true, -1); } +void vpshufb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x00, true, -1); } +void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x08, true, -1); } +void vpsignb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x08, true, -1); } +void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x09, true, -1); } +void vpsignw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x09, true, -1); } +void vpsignd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F38 | PP_66, 0x0A, true, -1); } +void vpsignd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x0A, true, -1); } +void vpsllw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xF1, true, -1); } +void vpsllw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF1, true, -1); } +void vpslld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xF2, true, -1); } +void vpslld(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF2, true, -1); } +void vpsllq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xF3, true, -1); } +void vpsllq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF3, true, -1); } +void vpsraw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xE1, true, -1); } +void vpsraw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE1, true, -1); } +void vpsrad(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xE2, true, -1); } +void vpsrad(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE2, true, -1); } +void vpsrlw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xD1, true, -1); } +void vpsrlw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD1, true, -1); } +void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xD2, true, -1); } +void vpsrld(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD2, true, -1); } +void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xD3, true, -1); } +void vpsrlq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD3, true, -1); } +void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xF8, true, -1); } +void vpsubb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF8, true, -1); } +void vpsubw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xF9, true, -1); } +void vpsubw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF9, true, -1); } +void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xFA, true, -1); } +void vpsubd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFA, true, -1); } +void vpsubq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xFB, true, -1); } +void vpsubq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFB, true, -1); } +void vpsubsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xE8, true, -1); } +void vpsubsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE8, true, -1); } +void vpsubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xE9, true, -1); } +void vpsubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE9, true, -1); } +void vpsubusb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xD8, true, -1); } +void vpsubusb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD8, true, -1); } +void vpsubusw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xD9, true, -1); } +void vpsubusw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD9, true, -1); } +void vpunpckhbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x68, true, -1); } +void vpunpckhbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x68, true, -1); } +void vpunpckhwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x69, true, -1); } +void vpunpckhwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x69, true, -1); } +void vpunpckhdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x6A, true, -1); } +void vpunpckhdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6A, true, -1); } +void vpunpckhqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x6D, true, -1); } +void vpunpckhqdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6D, true, -1); } +void vpunpcklbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x60, true, -1); } +void vpunpcklbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x60, true, -1); } +void vpunpcklwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x61, true, -1); } +void vpunpcklwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x61, true, -1); } +void vpunpckldq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x62, true, -1); } +void vpunpckldq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x62, true, -1); } +void vpunpcklqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x6C, true, -1); } +void vpunpcklqdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6C, true, -1); } +void vpxor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xEF, true, -1); } +void vpxor(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEF, true, -1); } +void vrcpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F3, 0x53, false, -1); } +void vrcpss(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x53, false, -1); } +void vrsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F3, 0x52, false, -1); } +void vrsqrtss(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x52, false, -1); } +void vshufpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0xC6, true, -1); db(imm); } +void vshufpd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xC6, true, -1); db(imm); } +void vshufps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, MM_0F, 0xC6, true, -1); db(imm); } +void vshufps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F, 0xC6, true, -1); db(imm); } +void vsqrtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F2, 0x51, false, -1); } +void vsqrtsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F2, 0x51, false, -1); } +void vsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F3, 0x51, false, -1); } +void vsqrtss(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x51, false, -1); } +void vunpckhpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x15, true, -1); } +void vunpckhpd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x15, true, -1); } +void vunpckhps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F, 0x15, true, -1); } +void vunpckhps(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F, 0x15, true, -1); } +void vunpcklpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F | PP_66, 0x14, true, -1); } +void vunpcklpd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x14, true, -1); } +void vunpcklps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, MM_0F, 0x14, true, -1); } +void vunpcklps(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F, 0x14, true, -1); } +void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0xDF, false, 0, imm); } +void vroundpd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x09, true, 0, imm); } +void vroundps(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x08, true, 0, imm); } +void vpermilpd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x05, true, 0, imm); } +void vpermilps(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x04, true, 0, imm); } +void vpcmpestri(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x61, false, 0, imm); } +void vpcmpestrm(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x60, false, 0, imm); } +void vpcmpistri(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x63, false, 0, imm); } +void vpcmpistrm(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F3A | PP_66, 0x62, false, 0, imm); } +void vtestps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x0E, true, 0); } +void vtestpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x0F, true, 0); } +void vcomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x2F, false, -1); } +void vcomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x2F, false, -1); } +void vcvtdq2ps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x5B, true, -1); } +void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x5B, true, -1); } +void vcvttps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x5B, true, -1); } +void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x28, true, -1); } +void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x28, true, -1); } +void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F2, 0x12, true, -1); } +void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x6F, true, -1); } +void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x6F, true, -1); } +void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x16, true, -1); } +void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x12, true, -1); } +void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x10, true, -1); } +void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x10, true, -1); } +void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1C, true, -1); } +void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1D, true, -1); } +void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1E, true, -1); } +void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x41, false, -1); } +void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x20, true, -1); } +void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x21, true, -1); } +void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x22, true, -1); } +void vpmovsxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x23, true, -1); } +void vpmovsxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x24, true, -1); } +void vpmovsxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x25, true, -1); } +void vpmovzxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x30, true, -1); } +void vpmovzxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x31, true, -1); } +void vpmovzxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x32, true, -1); } +void vpmovzxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x33, true, -1); } +void vpmovzxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x34, true, -1); } +void vpmovzxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x35, true, -1); } +void vpshufd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x70, true, -1, imm); } +void vpshufhw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x70, true, -1, imm); } +void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F2, 0x70, true, -1, imm); } +void vptest(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x17, false, -1); } +void vrcpps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x53, true, -1); } +void vrsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x52, true, -1); } +void vsqrtpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x51, true, -1); } +void vsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x51, true, -1); } +void vucomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x2E, false, -1); } +void vucomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x2E, false, -1); } +void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F | PP_66, 0x29, true, -1); } +void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F, 0x29, true, -1); } +void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F | PP_66, 0x7F, true, -1); } +void vmovdqu(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F | PP_F3, 0x7F, true, -1); } +void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F | PP_66, 0x11, true, -1); } +void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, MM_0F, 0x11, true, -1); } +void vaddsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0xD0, true, -1); } +void vaddsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0xD0, true, -1); } +void vhaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x7C, true, -1); } +void vhaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x7C, true, -1); } +void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_66, 0x7D, true, -1); } +void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F | PP_F2, 0x7D, true, -1); } +void vaesenc(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xDC, false, 0); } +void vaesenclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xDD, false, 0); } +void vaesdec(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xDE, false, 0); } +void vaesdeclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xDF, false, 0); } +void vmaskmovps(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, MM_0F38 | PP_66, 0x2C, true, 0); } +void vmaskmovps(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, MM_0F38 | PP_66, 0x2E, true, 0); } +void vmaskmovpd(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, MM_0F38 | PP_66, 0x2D, true, 0); } +void vmaskmovpd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, MM_0F38 | PP_66, 0x2F, true, 0); } +void vpmaskmovd(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, MM_0F38 | PP_66, 0x8C, true, 0); } +void vpmaskmovd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, MM_0F38 | PP_66, 0x8E, true, 0); } +void vpmaskmovq(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, MM_0F38 | PP_66, 0x8C, true, 1); } +void vpmaskmovq(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, MM_0F38 | PP_66, 0x8E, true, 1); } +void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, MM_0F38 | PP_66, 0x36, true, 0); } +void vpermps(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, MM_0F38 | PP_66, 0x16, true, 0); } +void vpermq(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, MM_0F3A | PP_66, 0x00, true, 1, imm); } +void vpermpd(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, MM_0F3A | PP_66, 0x01, true, 1, imm); } +void cmpeqpd(const Xmm& x, const Operand& op) { cmppd(x, op, 0); } +void vcmpeqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 0); } +void vcmpeqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 0); } +void cmpltpd(const Xmm& x, const Operand& op) { cmppd(x, op, 1); } +void vcmpltpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 1); } +void vcmpltpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 1); } +void cmplepd(const Xmm& x, const Operand& op) { cmppd(x, op, 2); } +void vcmplepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 2); } +void vcmplepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 2); } +void cmpunordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 3); } +void vcmpunordpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 3); } +void vcmpunordpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 3); } +void cmpneqpd(const Xmm& x, const Operand& op) { cmppd(x, op, 4); } +void vcmpneqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 4); } +void vcmpneqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 4); } +void cmpnltpd(const Xmm& x, const Operand& op) { cmppd(x, op, 5); } +void vcmpnltpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 5); } +void vcmpnltpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 5); } +void cmpnlepd(const Xmm& x, const Operand& op) { cmppd(x, op, 6); } +void vcmpnlepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 6); } +void vcmpnlepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 6); } +void cmpordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 7); } +void vcmpordpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 7); } +void vcmpordpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 7); } +void vcmpeq_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 8); } +void vcmpeq_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 8); } +void vcmpngepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 9); } +void vcmpngepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 9); } +void vcmpngtpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 10); } +void vcmpngtpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 10); } +void vcmpfalsepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 11); } +void vcmpfalsepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 11); } +void vcmpneq_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 12); } +void vcmpneq_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 12); } +void vcmpgepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 13); } +void vcmpgepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 13); } +void vcmpgtpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 14); } +void vcmpgtpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 14); } +void vcmptruepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 15); } +void vcmptruepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 15); } +void vcmpeq_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 16); } +void vcmpeq_ospd(const Xmm& x, const Operand& op) { vcmppd(x, op, 16); } +void vcmplt_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 17); } +void vcmplt_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 17); } +void vcmple_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 18); } +void vcmple_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 18); } +void vcmpunord_spd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 19); } +void vcmpunord_spd(const Xmm& x, const Operand& op) { vcmppd(x, op, 19); } +void vcmpneq_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 20); } +void vcmpneq_uspd(const Xmm& x, const Operand& op) { vcmppd(x, op, 20); } +void vcmpnlt_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 21); } +void vcmpnlt_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 21); } +void vcmpnle_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 22); } +void vcmpnle_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 22); } +void vcmpord_spd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 23); } +void vcmpord_spd(const Xmm& x, const Operand& op) { vcmppd(x, op, 23); } +void vcmpeq_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 24); } +void vcmpeq_uspd(const Xmm& x, const Operand& op) { vcmppd(x, op, 24); } +void vcmpnge_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 25); } +void vcmpnge_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 25); } +void vcmpngt_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 26); } +void vcmpngt_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 26); } +void vcmpfalse_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 27); } +void vcmpfalse_ospd(const Xmm& x, const Operand& op) { vcmppd(x, op, 27); } +void vcmpneq_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 28); } +void vcmpneq_ospd(const Xmm& x, const Operand& op) { vcmppd(x, op, 28); } +void vcmpge_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 29); } +void vcmpge_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 29); } +void vcmpgt_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 30); } +void vcmpgt_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 30); } +void vcmptrue_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 31); } +void vcmptrue_uspd(const Xmm& x, const Operand& op) { vcmppd(x, op, 31); } +void cmpeqps(const Xmm& x, const Operand& op) { cmpps(x, op, 0); } +void vcmpeqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 0); } +void vcmpeqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 0); } +void cmpltps(const Xmm& x, const Operand& op) { cmpps(x, op, 1); } +void vcmpltps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 1); } +void vcmpltps(const Xmm& x, const Operand& op) { vcmpps(x, op, 1); } +void cmpleps(const Xmm& x, const Operand& op) { cmpps(x, op, 2); } +void vcmpleps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 2); } +void vcmpleps(const Xmm& x, const Operand& op) { vcmpps(x, op, 2); } +void cmpunordps(const Xmm& x, const Operand& op) { cmpps(x, op, 3); } +void vcmpunordps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 3); } +void vcmpunordps(const Xmm& x, const Operand& op) { vcmpps(x, op, 3); } +void cmpneqps(const Xmm& x, const Operand& op) { cmpps(x, op, 4); } +void vcmpneqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 4); } +void vcmpneqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 4); } +void cmpnltps(const Xmm& x, const Operand& op) { cmpps(x, op, 5); } +void vcmpnltps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 5); } +void vcmpnltps(const Xmm& x, const Operand& op) { vcmpps(x, op, 5); } +void cmpnleps(const Xmm& x, const Operand& op) { cmpps(x, op, 6); } +void vcmpnleps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 6); } +void vcmpnleps(const Xmm& x, const Operand& op) { vcmpps(x, op, 6); } +void cmpordps(const Xmm& x, const Operand& op) { cmpps(x, op, 7); } +void vcmpordps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 7); } +void vcmpordps(const Xmm& x, const Operand& op) { vcmpps(x, op, 7); } +void vcmpeq_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 8); } +void vcmpeq_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 8); } +void vcmpngeps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 9); } +void vcmpngeps(const Xmm& x, const Operand& op) { vcmpps(x, op, 9); } +void vcmpngtps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 10); } +void vcmpngtps(const Xmm& x, const Operand& op) { vcmpps(x, op, 10); } +void vcmpfalseps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 11); } +void vcmpfalseps(const Xmm& x, const Operand& op) { vcmpps(x, op, 11); } +void vcmpneq_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 12); } +void vcmpneq_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 12); } +void vcmpgeps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 13); } +void vcmpgeps(const Xmm& x, const Operand& op) { vcmpps(x, op, 13); } +void vcmpgtps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 14); } +void vcmpgtps(const Xmm& x, const Operand& op) { vcmpps(x, op, 14); } +void vcmptrueps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 15); } +void vcmptrueps(const Xmm& x, const Operand& op) { vcmpps(x, op, 15); } +void vcmpeq_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 16); } +void vcmpeq_osps(const Xmm& x, const Operand& op) { vcmpps(x, op, 16); } +void vcmplt_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 17); } +void vcmplt_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 17); } +void vcmple_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 18); } +void vcmple_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 18); } +void vcmpunord_sps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 19); } +void vcmpunord_sps(const Xmm& x, const Operand& op) { vcmpps(x, op, 19); } +void vcmpneq_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 20); } +void vcmpneq_usps(const Xmm& x, const Operand& op) { vcmpps(x, op, 20); } +void vcmpnlt_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 21); } +void vcmpnlt_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 21); } +void vcmpnle_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 22); } +void vcmpnle_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 22); } +void vcmpord_sps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 23); } +void vcmpord_sps(const Xmm& x, const Operand& op) { vcmpps(x, op, 23); } +void vcmpeq_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 24); } +void vcmpeq_usps(const Xmm& x, const Operand& op) { vcmpps(x, op, 24); } +void vcmpnge_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 25); } +void vcmpnge_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 25); } +void vcmpngt_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 26); } +void vcmpngt_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 26); } +void vcmpfalse_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 27); } +void vcmpfalse_osps(const Xmm& x, const Operand& op) { vcmpps(x, op, 27); } +void vcmpneq_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 28); } +void vcmpneq_osps(const Xmm& x, const Operand& op) { vcmpps(x, op, 28); } +void vcmpge_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 29); } +void vcmpge_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 29); } +void vcmpgt_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 30); } +void vcmpgt_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 30); } +void vcmptrue_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 31); } +void vcmptrue_usps(const Xmm& x, const Operand& op) { vcmpps(x, op, 31); } +void cmpeqsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 0); } +void vcmpeqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 0); } +void vcmpeqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 0); } +void cmpltsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 1); } +void vcmpltsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 1); } +void vcmpltsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 1); } +void cmplesd(const Xmm& x, const Operand& op) { cmpsd(x, op, 2); } +void vcmplesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 2); } +void vcmplesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 2); } +void cmpunordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 3); } +void vcmpunordsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 3); } +void vcmpunordsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 3); } +void cmpneqsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 4); } +void vcmpneqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 4); } +void vcmpneqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 4); } +void cmpnltsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 5); } +void vcmpnltsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 5); } +void vcmpnltsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 5); } +void cmpnlesd(const Xmm& x, const Operand& op) { cmpsd(x, op, 6); } +void vcmpnlesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 6); } +void vcmpnlesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 6); } +void cmpordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 7); } +void vcmpordsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 7); } +void vcmpordsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 7); } +void vcmpeq_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 8); } +void vcmpeq_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 8); } +void vcmpngesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 9); } +void vcmpngesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 9); } +void vcmpngtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 10); } +void vcmpngtsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 10); } +void vcmpfalsesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 11); } +void vcmpfalsesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 11); } +void vcmpneq_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 12); } +void vcmpneq_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 12); } +void vcmpgesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 13); } +void vcmpgesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 13); } +void vcmpgtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 14); } +void vcmpgtsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 14); } +void vcmptruesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 15); } +void vcmptruesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 15); } +void vcmpeq_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 16); } +void vcmpeq_ossd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 16); } +void vcmplt_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 17); } +void vcmplt_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 17); } +void vcmple_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 18); } +void vcmple_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 18); } +void vcmpunord_ssd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 19); } +void vcmpunord_ssd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 19); } +void vcmpneq_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 20); } +void vcmpneq_ussd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 20); } +void vcmpnlt_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 21); } +void vcmpnlt_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 21); } +void vcmpnle_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 22); } +void vcmpnle_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 22); } +void vcmpord_ssd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 23); } +void vcmpord_ssd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 23); } +void vcmpeq_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 24); } +void vcmpeq_ussd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 24); } +void vcmpnge_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 25); } +void vcmpnge_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 25); } +void vcmpngt_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 26); } +void vcmpngt_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 26); } +void vcmpfalse_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 27); } +void vcmpfalse_ossd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 27); } +void vcmpneq_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 28); } +void vcmpneq_ossd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 28); } +void vcmpge_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 29); } +void vcmpge_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 29); } +void vcmpgt_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 30); } +void vcmpgt_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 30); } +void vcmptrue_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 31); } +void vcmptrue_ussd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 31); } +void cmpeqss(const Xmm& x, const Operand& op) { cmpss(x, op, 0); } +void vcmpeqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 0); } +void vcmpeqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 0); } +void cmpltss(const Xmm& x, const Operand& op) { cmpss(x, op, 1); } +void vcmpltss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 1); } +void vcmpltss(const Xmm& x, const Operand& op) { vcmpss(x, op, 1); } +void cmpless(const Xmm& x, const Operand& op) { cmpss(x, op, 2); } +void vcmpless(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 2); } +void vcmpless(const Xmm& x, const Operand& op) { vcmpss(x, op, 2); } +void cmpunordss(const Xmm& x, const Operand& op) { cmpss(x, op, 3); } +void vcmpunordss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 3); } +void vcmpunordss(const Xmm& x, const Operand& op) { vcmpss(x, op, 3); } +void cmpneqss(const Xmm& x, const Operand& op) { cmpss(x, op, 4); } +void vcmpneqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 4); } +void vcmpneqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 4); } +void cmpnltss(const Xmm& x, const Operand& op) { cmpss(x, op, 5); } +void vcmpnltss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 5); } +void vcmpnltss(const Xmm& x, const Operand& op) { vcmpss(x, op, 5); } +void cmpnless(const Xmm& x, const Operand& op) { cmpss(x, op, 6); } +void vcmpnless(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 6); } +void vcmpnless(const Xmm& x, const Operand& op) { vcmpss(x, op, 6); } +void cmpordss(const Xmm& x, const Operand& op) { cmpss(x, op, 7); } +void vcmpordss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 7); } +void vcmpordss(const Xmm& x, const Operand& op) { vcmpss(x, op, 7); } +void vcmpeq_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 8); } +void vcmpeq_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 8); } +void vcmpngess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 9); } +void vcmpngess(const Xmm& x, const Operand& op) { vcmpss(x, op, 9); } +void vcmpngtss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 10); } +void vcmpngtss(const Xmm& x, const Operand& op) { vcmpss(x, op, 10); } +void vcmpfalsess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 11); } +void vcmpfalsess(const Xmm& x, const Operand& op) { vcmpss(x, op, 11); } +void vcmpneq_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 12); } +void vcmpneq_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 12); } +void vcmpgess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 13); } +void vcmpgess(const Xmm& x, const Operand& op) { vcmpss(x, op, 13); } +void vcmpgtss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 14); } +void vcmpgtss(const Xmm& x, const Operand& op) { vcmpss(x, op, 14); } +void vcmptruess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 15); } +void vcmptruess(const Xmm& x, const Operand& op) { vcmpss(x, op, 15); } +void vcmpeq_osss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 16); } +void vcmpeq_osss(const Xmm& x, const Operand& op) { vcmpss(x, op, 16); } +void vcmplt_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 17); } +void vcmplt_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 17); } +void vcmple_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 18); } +void vcmple_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 18); } +void vcmpunord_sss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 19); } +void vcmpunord_sss(const Xmm& x, const Operand& op) { vcmpss(x, op, 19); } +void vcmpneq_usss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 20); } +void vcmpneq_usss(const Xmm& x, const Operand& op) { vcmpss(x, op, 20); } +void vcmpnlt_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 21); } +void vcmpnlt_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 21); } +void vcmpnle_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 22); } +void vcmpnle_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 22); } +void vcmpord_sss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 23); } +void vcmpord_sss(const Xmm& x, const Operand& op) { vcmpss(x, op, 23); } +void vcmpeq_usss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 24); } +void vcmpeq_usss(const Xmm& x, const Operand& op) { vcmpss(x, op, 24); } +void vcmpnge_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 25); } +void vcmpnge_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 25); } +void vcmpngt_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 26); } +void vcmpngt_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 26); } +void vcmpfalse_osss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 27); } +void vcmpfalse_osss(const Xmm& x, const Operand& op) { vcmpss(x, op, 27); } +void vcmpneq_osss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 28); } +void vcmpneq_osss(const Xmm& x, const Operand& op) { vcmpss(x, op, 28); } +void vcmpge_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 29); } +void vcmpge_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 29); } +void vcmpgt_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 30); } +void vcmpgt_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 30); } +void vcmptrue_usss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 31); } +void vcmptrue_usss(const Xmm& x, const Operand& op) { vcmpss(x, op, 31); } +void vmovhpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, MM_0F | PP_66, 0x16, false); } +void vmovhpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x17, false); } +void vmovhps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, MM_0F, 0x16, false); } +void vmovhps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F, 0x17, false); } +void vmovlpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, MM_0F | PP_66, 0x12, false); } +void vmovlpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x13, false); } +void vmovlps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, MM_0F, 0x12, false); } +void vmovlps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F, 0x13, false); } +void vfmadd132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x98, true, 1); } +void vfmadd213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA8, true, 1); } +void vfmadd231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB8, true, 1); } +void vfmadd132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x98, true, 0); } +void vfmadd213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA8, true, 0); } +void vfmadd231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB8, true, 0); } +void vfmadd132sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x99, false, 1); } +void vfmadd213sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA9, false, 1); } +void vfmadd231sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB9, false, 1); } +void vfmadd132ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x99, false, 0); } +void vfmadd213ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA9, false, 0); } +void vfmadd231ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB9, false, 0); } +void vfmaddsub132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x96, true, 1); } +void vfmaddsub213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA6, true, 1); } +void vfmaddsub231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB6, true, 1); } +void vfmaddsub132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x96, true, 0); } +void vfmaddsub213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA6, true, 0); } +void vfmaddsub231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB6, true, 0); } +void vfmsubadd132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x97, true, 1); } +void vfmsubadd213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA7, true, 1); } +void vfmsubadd231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB7, true, 1); } +void vfmsubadd132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x97, true, 0); } +void vfmsubadd213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xA7, true, 0); } +void vfmsubadd231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xB7, true, 0); } +void vfmsub132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9A, true, 1); } +void vfmsub213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAA, true, 1); } +void vfmsub231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBA, true, 1); } +void vfmsub132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9A, true, 0); } +void vfmsub213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAA, true, 0); } +void vfmsub231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBA, true, 0); } +void vfmsub132sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9B, false, 1); } +void vfmsub213sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAB, false, 1); } +void vfmsub231sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBB, false, 1); } +void vfmsub132ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9B, false, 0); } +void vfmsub213ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAB, false, 0); } +void vfmsub231ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBB, false, 0); } +void vfnmadd132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9C, true, 1); } +void vfnmadd213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAC, true, 1); } +void vfnmadd231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBC, true, 1); } +void vfnmadd132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9C, true, 0); } +void vfnmadd213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAC, true, 0); } +void vfnmadd231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBC, true, 0); } +void vfnmadd132sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9D, false, 1); } +void vfnmadd213sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAD, false, 1); } +void vfnmadd231sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBD, false, 1); } +void vfnmadd132ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9D, false, 0); } +void vfnmadd213ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAD, false, 0); } +void vfnmadd231ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBD, false, 0); } +void vfnmsub132pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9E, true, 1); } +void vfnmsub213pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAE, true, 1); } +void vfnmsub231pd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBE, true, 1); } +void vfnmsub132ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9E, true, 0); } +void vfnmsub213ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAE, true, 0); } +void vfnmsub231ps(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBE, true, 0); } +void vfnmsub132sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9F, false, 1); } +void vfnmsub213sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAF, false, 1); } +void vfnmsub231sd(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBF, false, 1); } +void vfnmsub132ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0x9F, false, 0); } +void vfnmsub213ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xAF, false, 0); } +void vfnmsub231ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBF, false, 0); } +void vaesimc(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, MM_0F38 | PP_66, 0xDB, false, 0); } +void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, MM_0F38 | PP_66, 0x1A, true, 0); } +void vbroadcasti128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, MM_0F38 | PP_66, 0x5A, true, 0); } +void vbroadcastsd(const Ymm& y, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(y, op, MM_0F38 | PP_66, 0x19, true, 0); } +void vbroadcastss(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, MM_0F38 | PP_66, 0x18, true, 0); } +void vpbroadcastb(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, MM_0F38 | PP_66, 0x78, true, 0); } +void vpbroadcastw(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, MM_0F38 | PP_66, 0x79, true, 0); } +void vpbroadcastd(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, MM_0F38 | PP_66, 0x58, true, 0); } +void vpbroadcastq(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_XM_IMM(x, op, MM_0F38 | PP_66, 0x59, true, 0); } +void vextractf128(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_X_XMcvt(y, y.isXMM() ? xm0 : ym0, op, op.isXMM(), Operand::YMM, MM_0F3A | PP_66, 0x19, true, 0); db(imm); } +void vextracti128(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_X_XMcvt(y, y.isXMM() ? xm0 : ym0, op, op.isXMM(), Operand::YMM, MM_0F3A | PP_66, 0x39, true, 0); db(imm); } +void vextractps(const Operand& op, const Xmm& x, uint8 imm) { if (!(op.isREG(32) || op.isMEM()) || x.isYMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, x.isXMM() ? xm0 : ym0, op, op.isREG(), Operand::XMM, MM_0F3A | PP_66, 0x17, false, 0); db(imm); } +void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XMcvt(y1, y2, op, op.isXMM(), Operand::YMM, MM_0F3A | PP_66, 0x18, true, 0); db(imm); } +void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XMcvt(y1, y2, op, op.isXMM(), Operand::YMM, MM_0F3A | PP_66, 0x38, true, 0); db(imm); } +void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, MM_0F3A | PP_66, 0x06, true, 0); db(imm); } +void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, MM_0F3A | PP_66, 0x46, true, 0); db(imm); } +void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_F2, 0xF0, true, 0); } +void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, MM_0F, 0xAE, false, -1); } +void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, MM_0F, 0xAE, false, -1); } +void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); } +void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, xm0, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x14, false); db(imm); } +void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xC5, false, r.isBit(64) ? 1 : 0); db(imm); } +void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); } +void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, xm0, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x16, false, 0); db(imm); } +void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x1, x2, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x20, false); db(imm); } +void vpinsrb(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, x, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x20, false); db(imm); } +void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x1, x2, op, !op.isMEM(), Operand::XMM, MM_0F | PP_66, 0xC4, false); db(imm); } +void vpinsrw(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, x, op, !op.isMEM(), Operand::XMM, MM_0F | PP_66, 0xC4, false); db(imm); } +void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x1, x2, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x22, false, 0); db(imm); } +void vpinsrd(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, x, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x22, false, 0); db(imm); } +void vpmovmskb(const Reg32e& r, const Xmm& x) { bool isYMM= x.isYMM(); opAVX_X_X_XM(isYMM ? Ymm(r.getIdx()) : Xmm(r.getIdx()), isYMM ? ym0 : xm0, x, MM_0F | PP_66, 0xD7, true); } +void vpslldq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym7 : xm7, x1, x2, MM_0F | PP_66, 0x73, true); db(imm); } +void vpslldq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym7 : xm7, x, x, MM_0F | PP_66, 0x73, true); db(imm); } +void vpsrldq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym3 : xm3, x1, x2, MM_0F | PP_66, 0x73, true); db(imm); } +void vpsrldq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym3 : xm3, x, x, MM_0F | PP_66, 0x73, true); db(imm); } +void vpsllw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym6 : xm6, x1, x2, MM_0F | PP_66, 0x71, true); db(imm); } +void vpsllw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym6 : xm6, x, x, MM_0F | PP_66, 0x71, true); db(imm); } +void vpslld(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym6 : xm6, x1, x2, MM_0F | PP_66, 0x72, true); db(imm); } +void vpslld(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym6 : xm6, x, x, MM_0F | PP_66, 0x72, true); db(imm); } +void vpsllq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym6 : xm6, x1, x2, MM_0F | PP_66, 0x73, true); db(imm); } +void vpsllq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym6 : xm6, x, x, MM_0F | PP_66, 0x73, true); db(imm); } +void vpsraw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym4 : xm4, x1, x2, MM_0F | PP_66, 0x71, true); db(imm); } +void vpsraw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym4 : xm4, x, x, MM_0F | PP_66, 0x71, true); db(imm); } +void vpsrad(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym4 : xm4, x1, x2, MM_0F | PP_66, 0x72, true); db(imm); } +void vpsrad(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym4 : xm4, x, x, MM_0F | PP_66, 0x72, true); db(imm); } +void vpsrlw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym2 : xm2, x1, x2, MM_0F | PP_66, 0x71, true); db(imm); } +void vpsrlw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym2 : xm2, x, x, MM_0F | PP_66, 0x71, true); db(imm); } +void vpsrld(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym2 : xm2, x1, x2, MM_0F | PP_66, 0x72, true); db(imm); } +void vpsrld(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym2 : xm2, x, x, MM_0F | PP_66, 0x72, true); db(imm); } +void vpsrlq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym2 : xm2, x1, x2, MM_0F | PP_66, 0x73, true); db(imm); } +void vpsrlq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym2 : xm2, x, x, MM_0F | PP_66, 0x73, true); db(imm); } +void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x4B, true); db(x4.getIdx() << 4); } +void vblendvpd(const Xmm& x1, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x1, op, MM_0F3A | PP_66, 0x4B, true); db(x4.getIdx() << 4); } +void vblendvps(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x4A, true); db(x4.getIdx() << 4); } +void vblendvps(const Xmm& x1, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x1, op, MM_0F3A | PP_66, 0x4A, true); db(x4.getIdx() << 4); } +void vpblendvb(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x4C, false); db(x4.getIdx() << 4); } +void vpblendvb(const Xmm& x1, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x1, op, MM_0F3A | PP_66, 0x4C, false); db(x4.getIdx() << 4); } +void vmovd(const Xmm& x, const Reg32& reg) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x6E, false, 0); } +void vmovd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x6E, false, 0); } +void vmovd(const Reg32& reg, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x7E, false, 0); } +void vmovd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x7E, false, 0); } +void vmovq(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x7E, false, -1); } +void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0xD6, false, -1); } +void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_F3, 0x7E, false, -1); } +void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, MM_0F, 0x12, false); } +void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, MM_0F, 0x16, false); } +void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), x.isXMM() ? xm0 : ym0, x, MM_0F | PP_66, 0x50, true, 0); } +void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), x.isXMM() ? xm0 : ym0, x, MM_0F, 0x50, true, 0); } +void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0xE7, true); } +void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0x2B, true); } +void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F, 0x2B, true); } +void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ymm0, addr, MM_0F38 | PP_66, 0x2A, true); } +void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F2, 0x10, false); } +void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x10, false); } +void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x11, false); } +void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F3, 0x10, false); } +void vmovss(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x10, false); } +void vmovss(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x11, false); } +void vcvtss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2D, false, 0); } +void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2C, false, 0); } +void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2D, false, 0); } +void vcvttsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2C, false, 0); } +void vcvtsi2ss(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, op1, op2, op2.isREG(), Operand::XMM, MM_0F | PP_F3, 0x2A, false, (op1.isMEM() || op2.isMEM()) ? -1 : (op1.isREG(32) || op2.isREG(32)) ? 0 : 1); } +void vcvtsi2sd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, op1, op2, op2.isREG(), Operand::XMM, MM_0F | PP_F2, 0x2A, false, (op1.isMEM() || op2.isMEM()) ? -1 : (op1.isREG(32) || op2.isREG(32)) ? 0 : 1); } +void vcvtps2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, x.isXMM() ? xm0 : ym0, op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, MM_0F, 0x5A, true); } +void vcvtdq2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, x.isXMM() ? xm0 : ym0, op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, MM_0F | PP_F3, 0xE6, true); } +void vcvtpd2ps(const Xmm& x, const Operand& op) { if (x.isYMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_66, 0x5A, true); } +void vcvtpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_F2, 0xE6, true); } +void vcvttpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_66, 0xE6, true); } +void vcvtph2ps(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opVex(x, NULL, &op, MM_0F38 | PP_66, 0x13, 0); } +void vcvtps2ph(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isMEM() && !op.isXMM()) throw Error(ERR_BAD_COMBINATION); opVex(x, NULL, &op, MM_0F3A | PP_66, 0x1d, 0); db(imm); } +#ifdef XBYAK64 +void vmovq(const Xmm& x, const Reg64& reg) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x6E, false, 1); } +void vmovq(const Reg64& reg, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x7E, false, 1); } +void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, xm0, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x16, false, 1); db(imm); } +void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x1, x2, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x22, false, 1); db(imm); } +void vpinsrq(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw Error(ERR_BAD_COMBINATION); opAVX_X_X_XMcvt(x, x, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x22, false, 1); db(imm); } +void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2D, false, 1); } +void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2C, false, 1); } +void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2D, false, 1); } +void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2C, false, 1); } +#endif +void andn(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38, 0xf2, true); } +void mulx(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38 | PP_F2, 0xf6, true); } +void pdep(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38 | PP_F2, 0xf5, true); } +void pext(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38 | PP_F3, 0xf5, true); } +void bextr(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38, 0xf7, false); } +void bzhi(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38, 0xf5, false); } +void sarx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38 | PP_F3, 0xf7, false); } +void shlx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38 | PP_66, 0xf7, false); } +void shrx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38 | PP_F2, 0xf7, false); } +void blsi(const Reg32e& r, const Operand& op) { opGpr(Reg32e(3, r.getBit()), op, r, MM_0F38, 0xf3, false); } +void blsmsk(const Reg32e& r, const Operand& op) { opGpr(Reg32e(2, r.getBit()), op, r, MM_0F38, 0xf3, false); } +void blsr(const Reg32e& r, const Operand& op) { opGpr(Reg32e(1, r.getBit()), op, r, MM_0F38, 0xf3, false); } +void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x92, 1, 0); } +void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x93, 1, 1); } +void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x92, 0, 1); } +void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x93, 0, 2); } +void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x90, 0, 1); } +void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x91, 0, 2); } +void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x90, 1, 0); } +void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x91, 1, 1); } diff --git a/src/cpu/mips3/x64/xbyak/xbyak_util.h b/src/cpu/mips3/x64/xbyak/xbyak_util.h new file mode 100644 index 000000000..7f3e336ac --- /dev/null +++ b/src/cpu/mips3/x64/xbyak/xbyak_util.h @@ -0,0 +1,529 @@ +#ifndef XBYAK_XBYAK_UTIL_H_ +#define XBYAK_XBYAK_UTIL_H_ + +/** + utility class and functions for Xbyak + Xbyak::util::Clock ; rdtsc timer + Xbyak::util::Cpu ; detect CPU + @note this header is UNDER CONSTRUCTION! +*/ +#include "xbyak/xbyak.h" + +#ifdef _MSC_VER + #if (_MSC_VER < 1400) && defined(XBYAK32) + static inline __declspec(naked) void __cpuid(int[4], int) + { + __asm { + push ebx + push esi + mov eax, dword ptr [esp + 4 * 2 + 8] // eaxIn + cpuid + mov esi, dword ptr [esp + 4 * 2 + 4] // data + mov dword ptr [esi], eax + mov dword ptr [esi + 4], ebx + mov dword ptr [esi + 8], ecx + mov dword ptr [esi + 12], edx + pop esi + pop ebx + ret + } + } + #else + #include // for __cpuid + #endif +#else + #ifndef __GNUC_PREREQ + #define __GNUC_PREREQ(major, minor) ((((__GNUC__) << 16) + (__GNUC_MINOR__)) >= (((major) << 16) + (minor))) + #endif + #if __GNUC_PREREQ(4, 3) && !defined(__APPLE__) + #include + #else + #if defined(__APPLE__) && defined(XBYAK32) // avoid err : can't find a register in class `BREG' while reloading `asm' + #define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn)) + #define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn)) + #else + #define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn)) + #define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn)) + #endif + #endif +#endif + +#ifdef _MSC_VER +extern "C" unsigned __int64 __xgetbv(int); +#endif + +namespace Xbyak { namespace util { + +/** + CPU detection class +*/ +class Cpu { + uint64 type_; + unsigned int get32bitAsBE(const char *x) const + { + return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24); + } + unsigned int mask(int n) const + { + return (1U << n) - 1; + } + void setFamily() + { + unsigned int data[4]; + getCpuid(1, data); + stepping = data[0] & mask(4); + model = (data[0] >> 4) & mask(4); + family = (data[0] >> 8) & mask(4); + // type = (data[0] >> 12) & mask(2); + extModel = (data[0] >> 16) & mask(4); + extFamily = (data[0] >> 20) & mask(8); + if (family == 0x0f) { + displayFamily = family + extFamily; + } else { + displayFamily = family; + } + if (family == 6 || family == 0x0f) { + displayModel = (extModel << 4) + model; + } else { + displayModel = model; + } + } +public: + int model; + int family; + int stepping; + int extModel; + int extFamily; + int displayFamily; // family + extFamily + int displayModel; // model + extModel + static inline void getCpuid(unsigned int eaxIn, unsigned int data[4]) + { +#ifdef _MSC_VER + __cpuid(reinterpret_cast(data), eaxIn); +#else + __cpuid(eaxIn, data[0], data[1], data[2], data[3]); +#endif + } + static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4]) + { +#ifdef _MSC_VER + __cpuidex(reinterpret_cast(data), eaxIn, ecxIn); +#else + __cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]); +#endif + } + static inline uint64 getXfeature() + { +#ifdef _MSC_VER + return __xgetbv(0); +#else + unsigned int eax, edx; + // xgetvb is not support on gcc 4.2 +// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0)); + __asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0)); + return ((uint64)edx << 32) | eax; +#endif + } + typedef uint64 Type; + static const Type NONE = 0; + static const Type tMMX = 1 << 0; + static const Type tMMX2 = 1 << 1; + static const Type tCMOV = 1 << 2; + static const Type tSSE = 1 << 3; + static const Type tSSE2 = 1 << 4; + static const Type tSSE3 = 1 << 5; + static const Type tSSSE3 = 1 << 6; + static const Type tSSE41 = 1 << 7; + static const Type tSSE42 = 1 << 8; + static const Type tPOPCNT = 1 << 9; + static const Type tAESNI = 1 << 10; + static const Type tSSE5 = 1 << 11; + static const Type tOSXSAVE = 1 << 12; + static const Type tPCLMULQDQ = 1 << 13; + static const Type tAVX = 1 << 14; + static const Type tFMA = 1 << 15; + + static const Type t3DN = 1 << 16; + static const Type tE3DN = 1 << 17; + static const Type tSSE4a = 1 << 18; + static const Type tRDTSCP = 1 << 19; + static const Type tAVX2 = 1 << 20; + static const Type tBMI1 = 1 << 21; // andn, bextr, blsi, blsmsk, blsr, tzcnt + static const Type tBMI2 = 1 << 22; // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx + static const Type tLZCNT = 1 << 23; + + static const Type tINTEL = 1 << 24; + static const Type tAMD = 1 << 25; + + static const Type tENHANCED_REP = 1 << 26; // enhanced rep movsb/stosb + static const Type tRDRAND = 1 << 27; + static const Type tADX = 1 << 28; // adcx, adox + static const Type tRDSEED = 1 << 29; // rdseed + static const Type tSMAP = 1 << 30; // stac + static const Type tHLE = uint64(1) << 31; // xacquire, xrelease, xtest + static const Type tRTM = uint64(1) << 32; // xbegin, xend, xabort + + Cpu() + : type_(NONE) + { + unsigned int data[4]; + getCpuid(0, data); + const unsigned int maxNum = data[0]; + static const char intel[] = "ntel"; + static const char amd[] = "cAMD"; + if (data[2] == get32bitAsBE(amd)) { + type_ |= tAMD; + getCpuid(0x80000001, data); + if (data[3] & (1U << 31)) type_ |= t3DN; + if (data[3] & (1U << 15)) type_ |= tCMOV; + if (data[3] & (1U << 30)) type_ |= tE3DN; + if (data[3] & (1U << 22)) type_ |= tMMX2; + if (data[3] & (1U << 27)) type_ |= tRDTSCP; + } + if (data[2] == get32bitAsBE(intel)) { + type_ |= tINTEL; + getCpuid(0x80000001, data); + if (data[3] & (1U << 27)) type_ |= tRDTSCP; + if (data[2] & (1U << 5)) type_ |= tLZCNT; + } + getCpuid(1, data); + if (data[2] & (1U << 0)) type_ |= tSSE3; + if (data[2] & (1U << 9)) type_ |= tSSSE3; + if (data[2] & (1U << 19)) type_ |= tSSE41; + if (data[2] & (1U << 20)) type_ |= tSSE42; + if (data[2] & (1U << 23)) type_ |= tPOPCNT; + if (data[2] & (1U << 25)) type_ |= tAESNI; + if (data[2] & (1U << 1)) type_ |= tPCLMULQDQ; + if (data[2] & (1U << 27)) type_ |= tOSXSAVE; + if (data[2] & (1U << 30)) type_ |= tRDRAND; + + if (data[3] & (1U << 15)) type_ |= tCMOV; + if (data[3] & (1U << 23)) type_ |= tMMX; + if (data[3] & (1U << 25)) type_ |= tMMX2 | tSSE; + if (data[3] & (1U << 26)) type_ |= tSSE2; + + if (type_ & tOSXSAVE) { + // check XFEATURE_ENABLED_MASK[2:1] = '11b' + uint64 bv = getXfeature(); + if ((bv & 6) == 6) { + if (data[2] & (1U << 28)) type_ |= tAVX; + if (data[2] & (1U << 12)) type_ |= tFMA; + } + } + if (maxNum >= 7) { + getCpuidEx(7, 0, data); + if (type_ & tAVX && data[1] & 0x20) type_ |= tAVX2; + if (data[1] & (1U << 3)) type_ |= tBMI1; + if (data[1] & (1U << 8)) type_ |= tBMI2; + if (data[1] & (1U << 9)) type_ |= tENHANCED_REP; + if (data[1] & (1U << 18)) type_ |= tRDSEED; + if (data[1] & (1U << 19)) type_ |= tADX; + if (data[1] & (1U << 20)) type_ |= tSMAP; + if (data[1] & (1U << 4)) type_ |= tHLE; + if (data[1] & (1U << 11)) type_ |= tRTM; + } + setFamily(); + } + void putFamily() + { + printf("family=%d, model=%X, stepping=%d, extFamily=%d, extModel=%X\n", + family, model, stepping, extFamily, extModel); + printf("display:family=%X, model=%X\n", displayFamily, displayModel); + } + bool has(Type type) const + { + return (type & type_) != 0; + } +}; + +class Clock { +public: + static inline uint64 getRdtsc() + { +#ifdef _MSC_VER + return __rdtsc(); +#else + unsigned int eax, edx; + __asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx)); + return ((uint64)edx << 32) | eax; +#endif + } + Clock() + : clock_(0) + , count_(0) + { + } + void begin() + { + clock_ -= getRdtsc(); + } + void end() + { + clock_ += getRdtsc(); + count_++; + } + int getCount() const { return count_; } + uint64 getClock() const { return clock_; } + void clear() { count_ = 0; clock_ = 0; } +private: + uint64 clock_; + int count_; +}; + +#ifdef XBYAK64 +const int UseRCX = 1 << 6; +const int UseRDX = 1 << 7; + +class Pack { + static const size_t maxTblNum = 10; + const Xbyak::Reg64 *tbl_[maxTblNum]; + size_t n_; +public: + Pack() : n_(0) {} + Pack(const Xbyak::Reg64 *tbl, size_t n) { init(tbl, n); } + Pack(const Pack& rhs) + : n_(rhs.n_) + { + if (n_ > maxTblNum) throw Error(ERR_INTERNAL); + for (size_t i = 0; i < n_; i++) tbl_[i] = rhs.tbl_[i]; + } + Pack(const Xbyak::Reg64& t0) + { n_ = 1; tbl_[0] = &t0; } + Pack(const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 2; tbl_[0] = &t0; tbl_[1] = &t1; } + Pack(const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 3; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; } + Pack(const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 4; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; } + Pack(const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 5; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; } + Pack(const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 6; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; } + Pack(const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 7; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; } + Pack(const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 8; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; } + Pack(const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 9; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; } + Pack(const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 10; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; tbl_[9] = &t9; } + Pack& append(const Xbyak::Reg64& t) + { + if (n_ == 10) { + fprintf(stderr, "ERR Pack::can't append\n"); + throw Error(ERR_BAD_PARAMETER); + } + tbl_[n_++] = &t; + return *this; + } + void init(const Xbyak::Reg64 *tbl, size_t n) + { + if (n > maxTblNum) { + fprintf(stderr, "ERR Pack::init bad n=%d\n", (int)n); + throw Error(ERR_BAD_PARAMETER); + } + n_ = n; + for (size_t i = 0; i < n; i++) { + tbl_[i] = &tbl[i]; + } + } + const Xbyak::Reg64& operator[](size_t n) const + { + if (n >= n_) { + fprintf(stderr, "ERR Pack bad n=%d\n", (int)n); + throw Error(ERR_BAD_PARAMETER); + } + return *tbl_[n]; + } + size_t size() const { return n_; } + /* + get tbl[pos, pos + num) + */ + Pack sub(size_t pos, size_t num = size_t(-1)) const + { + if (num == size_t(-1)) num = n_ - pos; + if (pos + num > n_) { + fprintf(stderr, "ERR Pack::sub bad pos=%d, num=%d\n", (int)pos, (int)num); + throw Error(ERR_BAD_PARAMETER); + } + Pack pack; + pack.n_ = num; + for (size_t i = 0; i < num; i++) { + pack.tbl_[i] = tbl_[pos + i]; + } + return pack; + } + void put() const + { + for (size_t i = 0; i < n_; i++) { + printf("%s ", tbl_[i]->toString()); + } + printf("\n"); + } +}; + +class StackFrame { +#ifdef XBYAK64_WIN + static const int noSaveNum = 6; + static const int rcxPos = 0; + static const int rdxPos = 1; +#else + static const int noSaveNum = 8; + static const int rcxPos = 3; + static const int rdxPos = 2; +#endif + Xbyak::CodeGenerator *code_; + int pNum_; + int tNum_; + bool useRcx_; + bool useRdx_; + int saveNum_; + int P_; + bool makeEpilog_; + Xbyak::Reg64 pTbl_[4]; + Xbyak::Reg64 tTbl_[10]; + Pack p_; + Pack t_; + StackFrame(const StackFrame&); + void operator=(const StackFrame&); +public: + const Pack& p; + const Pack& t; + /* + make stack frame + @param sf [in] this + @param pNum [in] num of function parameter(0 <= pNum <= 4) + @param tNum [in] num of temporary register(0 <= tNum <= 10, with UseRCX, UseRDX) + @param stackSizeByte [in] local stack size + @param makeEpilog [in] automatically call close() if true + + you can use + rax + gp0, ..., gp(pNum - 1) + gt0, ..., gt(tNum-1) + rcx if tNum & UseRCX + rdx if tNum & UseRDX + rsp[0..stackSizeByte - 1] + */ + StackFrame(Xbyak::CodeGenerator *code, int pNum, int tNum = 0, int stackSizeByte = 0, bool makeEpilog = true) + : code_(code) + , pNum_(pNum) + , tNum_(tNum & ~(UseRCX | UseRDX)) + , useRcx_((tNum & UseRCX) != 0) + , useRdx_((tNum & UseRDX) != 0) + , saveNum_(0) + , P_(0) + , makeEpilog_(makeEpilog) + , p(p_) + , t(t_) + { + using namespace Xbyak; + if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM); + const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0); + if (allRegNum < pNum || allRegNum > 14) throw Error(ERR_BAD_TNUM); + const Reg64& _rsp = code->rsp; + const AddressFrame& _ptr = code->ptr; + saveNum_ = (std::max)(0, allRegNum - noSaveNum); + const int *tbl = getOrderTbl() + noSaveNum; + P_ = saveNum_ + (stackSizeByte + 7) / 8; + if (P_ > 0 && (P_ & 1) == 0) P_++; // here (rsp % 16) == 8, then increment P_ for 16 byte alignment + P_ *= 8; + if (P_ > 0) code->sub(_rsp, P_); +#ifdef XBYAK64_WIN + for (int i = 0; i < (std::min)(saveNum_, 4); i++) { + code->mov(_ptr [_rsp + P_ + (i + 1) * 8], Reg64(tbl[i])); + } + for (int i = 4; i < saveNum_; i++) { + code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i])); + } +#else + for (int i = 0; i < saveNum_; i++) { + code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i])); + } +#endif + int pos = 0; + for (int i = 0; i < pNum; i++) { + pTbl_[i] = Xbyak::Reg64(getRegIdx(pos)); + } + for (int i = 0; i < tNum_; i++) { + tTbl_[i] = Xbyak::Reg64(getRegIdx(pos)); + } + if (useRcx_ && rcxPos < pNum) code_->mov(code_->r10, code_->rcx); + if (useRdx_ && rdxPos < pNum) code_->mov(code_->r11, code_->rdx); + p_.init(pTbl_, pNum); + t_.init(tTbl_, tNum_); + } + /* + make epilog manually + @param callRet [in] call ret() if true + */ + void close(bool callRet = true) + { + using namespace Xbyak; + const Reg64& _rsp = code_->rsp; + const AddressFrame& _ptr = code_->ptr; + const int *tbl = getOrderTbl() + noSaveNum; +#ifdef XBYAK64_WIN + for (int i = 0; i < (std::min)(saveNum_, 4); i++) { + code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ + (i + 1) * 8]); + } + for (int i = 4; i < saveNum_; i++) { + code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]); + } +#else + for (int i = 0; i < saveNum_; i++) { + code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]); + } +#endif + if (P_ > 0) code_->add(_rsp, P_); + + if (callRet) code_->ret(); + } + ~StackFrame() + { + if (!makeEpilog_) return; + try { + close(); + } catch (std::exception& e) { + printf("ERR:StackFrame %s\n", e.what()); + exit(1); + } catch (...) { + printf("ERR:StackFrame otherwise\n"); + exit(1); + } + } +private: + const int *getOrderTbl() const + { + using namespace Xbyak; + static const int tbl[] = { +#ifdef XBYAK64_WIN + Operand::RCX, Operand::RDX, Operand::R8, Operand::R9, Operand::R10, Operand::R11, Operand::RDI, Operand::RSI, +#else + Operand::RDI, Operand::RSI, Operand::RDX, Operand::RCX, Operand::R8, Operand::R9, Operand::R10, Operand::R11, +#endif + Operand::RBX, Operand::RBP, Operand::R12, Operand::R13, Operand::R14, Operand::R15 + }; + return &tbl[0]; + } + int getRegIdx(int& pos) const + { + assert(pos < 14); + using namespace Xbyak; + const int *tbl = getOrderTbl(); + int r = tbl[pos++]; + if (useRcx_) { + if (r == Operand::RCX) { return Operand::R10; } + if (r == Operand::R10) { r = tbl[pos++]; } + } + if (useRdx_) { + if (r == Operand::RDX) { return Operand::R11; } + if (r == Operand::R11) { return tbl[pos++]; } + } + return r; + } +}; +#endif + +} } // end of util +#endif diff --git a/src/cpu/mips3_intf.cpp b/src/cpu/mips3_intf.cpp index f9a784652..d6c28f96a 100644 --- a/src/cpu/mips3_intf.cpp +++ b/src/cpu/mips3_intf.cpp @@ -3,6 +3,10 @@ #include "burnint.h" #include +#ifdef MIPS3_X64_DRC +#include "mips3/x64/mips3_x64.h" +#endif + #define ADDR_BITS 32 #define PAGE_SIZE 0x1000 #define PAGE_SHIFT 12 @@ -27,9 +31,13 @@ struct Mips3MemoryMap }; -static mips::mips3 *g_mips; -static Mips3MemoryMap *g_mmap; +static mips::mips3 *g_mips = nullptr; +static Mips3MemoryMap *g_mmap = nullptr; +static bool g_useRecompiler = false; +#ifdef MIPS3_X64_DRC +static mips::mips3_x64 *g_mips_x64 = nullptr; +#endif static unsigned char DefReadByte(unsigned int a) { return 0; } static unsigned short DefReadHalf(unsigned int a) { return 0; } @@ -65,11 +73,23 @@ int Mips3Init() g_mips = new mips::mips3(); g_mmap = new Mips3MemoryMap(); +#ifdef MIPS3_X64_DRC + g_mips_x64 = new mips::mips3_x64(g_mips); +#endif + ResetMemoryMap(); } +int Mips3UseRecompiler(bool use) +{ + g_useRecompiler = use; +} + int Mips3Exit() { +#ifdef MIPS3_X64_DRC + delete g_mips_x64; +#endif delete g_mips; delete g_mmap; g_mips = nullptr; @@ -85,8 +105,18 @@ void Mips3Reset() int Mips3Run(int cycles) { +#ifdef MIPS3_X64_DRC + if (g_mips) { + if (g_useRecompiler && g_mips_x64) { + g_mips_x64->run(cycles); + } else { + g_mips->run(cycles); + } + } +#else if (g_mips) g_mips->run(cycles); +#endif return 0; } diff --git a/src/cpu/mips3_intf.h b/src/cpu/mips3_intf.h index 9a81b6b9b..09d0f0cef 100644 --- a/src/cpu/mips3_intf.h +++ b/src/cpu/mips3_intf.h @@ -16,6 +16,7 @@ typedef unsigned long long (*pMips3ReadDoubleHandler)(unsigned int a); typedef void (*pMips3WriteDoubleHandler)(unsigned int a, unsigned long long d); int Mips3Init(); +int Mips3UseRecompiler(bool use); int Mips3Exit(); void Mips3Reset(); int Mips3Run(int cycles);