diff --git a/pcsx2/Gif_Unit.h b/pcsx2/Gif_Unit.h index 7bc1ac4419..d19dde051c 100644 --- a/pcsx2/Gif_Unit.h +++ b/pcsx2/Gif_Unit.h @@ -100,10 +100,18 @@ struct Gif_Tag { void analyzeTag() { hasAD = false; u32 t = tag.REGS[0]; - for(u32 i = 0; i < nRegs; i++) { - if (i == 8) t = tag.REGS[1]; + u32 i = 0; + u32 j = std::min(nRegs, 8); + for(; i < j; i++) { regs[i] = t & 0xf; - hasAD |= (regs[i] == GIF_REG_A_D); + hasAD |= (regs[i] == GIF_REG_A_D); + t >>= 4; + } + t = tag.REGS[1]; + j = nRegs; + for(; i < j; i++) { + regs[i] = t & 0xf; + hasAD |= (regs[i] == GIF_REG_A_D); t >>= 4; } } diff --git a/plugins/GSdx/GSClut.cpp b/plugins/GSdx/GSClut.cpp index 6c76c56d9f..2526df48d9 100644 --- a/plugins/GSdx/GSClut.cpp +++ b/plugins/GSdx/GSClut.cpp @@ -126,21 +126,26 @@ void GSClut::Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) (this->*m_wc[TEX0.CSM][TEX0.CPSM][TEX0.PSM])(TEX0, TEXCLUT); // Mirror write to other half of buffer to simulate wrapping memory + int offset = (TEX0.CSA & (TEX0.CPSM < PSM_PSMCT16 ? 15 : 31)) * 16; - if (TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT8H) + + if(TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT8H) { int size = TEX0.CPSM < PSM_PSMCT16 ? 512 : 256; - memcpy(m_clut + 512 + offset, m_clut + offset, sizeof *m_clut * min(size, 512 - offset)); - memcpy(m_clut, m_clut + 512, sizeof *m_clut * max(0, size + offset - 512)); + memcpy(m_clut + 512 + offset, m_clut + offset, sizeof(*m_clut) * min(size, 512 - offset)); + memcpy(m_clut, m_clut + 512, sizeof(*m_clut) * max(0, size + offset - 512)); } else { int size = 16; - memcpy(m_clut + 512 + offset, m_clut + offset, sizeof *m_clut * size); - if (TEX0.CPSM < PSM_PSMCT16) - memcpy(m_clut + 512 + 256 + offset, m_clut + 256 + offset, sizeof *m_clut * size); + memcpy(m_clut + 512 + offset, m_clut + offset, sizeof(*m_clut) * size); + + if(TEX0.CPSM < PSM_PSMCT16) + { + memcpy(m_clut + 512 + 256 + offset, m_clut + 256 + offset, sizeof(*m_clut) * size); + } } } @@ -289,7 +294,7 @@ void GSClut::Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) { case PSM_PSMT8: case PSM_PSMT8H: - clut += (TEX0.CSA & 15) << 4; + clut += (TEX0.CSA & 15) << 4; // disney golf title screen ReadCLUT_T32_I8(clut, m_buff32); break; case PSM_PSMT4: diff --git a/plugins/GSdx/GSDrawScanline.cpp b/plugins/GSdx/GSDrawScanline.cpp index 99b25dc80a..952c02ada2 100644 --- a/plugins/GSdx/GSDrawScanline.cpp +++ b/plugins/GSdx/GSDrawScanline.cpp @@ -958,6 +958,15 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS rb = m_global.frb.lerp16<0>(rb, fog); ga = m_global.fga.lerp16<0>(ga, fog).mix16(ga); + + /* + fog = fog.srl16(7); + + GSVector4i ifog = GSVector4i::x00ff().sub16(fog); + + rb = rb.mul16l(fog).add16(m_global.frb.mul16l(ifog)).srl16(8); + ga = ga.mul16l(fog).add16(m_global.fga.mul16l(ifog)).srl16(8).mix16(ga); + */ } // ReadFrame @@ -1204,12 +1213,6 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS if(sel.fwrite) { - if(sel.colclamp == 0) - { - rb &= GSVector4i::x00ff(); - ga &= GSVector4i::x00ff(); - } - if(sel.fpsm == 2 && sel.dthe) { int y = (top & 3) << 1; @@ -1218,6 +1221,12 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS ga = ga.add16(m_global.dimx[1 + y]); } + if(sel.colclamp == 0) + { + rb &= GSVector4i::x00ff(); + ga &= GSVector4i::x00ff(); + } + GSVector4i fs = rb.upl16(ga).pu16(rb.uph16(ga)); if(sel.fba && sel.fpsm != 1) diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp index 3bb55c58f1..8fa7ec3afe 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp @@ -2610,6 +2610,16 @@ void GSDrawScanlineCodeGenerator::WriteFrame() return; } + if(m_sel.fpsm == 2 && m_sel.dthe) + { + mov(eax, ptr[esp + _top]); + and(eax, 3); + shl(eax, 5); + mov(ebp, ptr[&m_local.gd->dimx]); + vpaddw(xmm5, ptr[ebp + eax + sizeof(GSVector4i) * 0]); + vpaddw(xmm6, ptr[ebp + eax + sizeof(GSVector4i) * 1]); + } + if(m_sel.colclamp == 0) { // c[0] &= 0x00ff00ff; @@ -2621,16 +2631,6 @@ void GSDrawScanlineCodeGenerator::WriteFrame() vpand(xmm6, xmm7); } - if(m_sel.fpsm == 2 && m_sel.dthe) - { - mov(eax, ptr[esp + _top]); - and(eax, 3); - shl(eax, 5); - mov(ebp, ptr[&m_local.gd->dimx]); - vpaddw(xmm5, ptr[ebp + eax + sizeof(GSVector4i) * 0]); - vpaddw(xmm6, ptr[ebp + eax + sizeof(GSVector4i) * 1]); - } - // GSVector4i fs = c[0].upl16(c[1]).pu16(c[0].uph16(c[1])); vpunpckhwd(xmm7, xmm5, xmm6); diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp index c19b4b4bff..9d15a89762 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp @@ -2745,6 +2745,16 @@ void GSDrawScanlineCodeGenerator::WriteFrame() return; } + if(m_sel.fpsm == 2 && m_sel.dthe) + { + mov(eax, ptr[esp + _top]); + and(eax, 3); + shl(eax, 5); + mov(ebp, ptr[&m_local.gd->dimx]); + paddw(xmm5, ptr[ebp + eax + sizeof(GSVector4i) * 0]); + paddw(xmm6, ptr[ebp + eax + sizeof(GSVector4i) * 1]); + } + if(m_sel.colclamp == 0) { // c[0] &= 0x000000ff; @@ -2756,16 +2766,6 @@ void GSDrawScanlineCodeGenerator::WriteFrame() pand(xmm6, xmm7); } - if(m_sel.fpsm == 2 && m_sel.dthe) - { - mov(eax, ptr[esp + _top]); - and(eax, 3); - shl(eax, 5); - mov(ebp, ptr[&m_local.gd->dimx]); - paddw(xmm5, ptr[ebp + eax + sizeof(GSVector4i) * 0]); - paddw(xmm6, ptr[ebp + eax + sizeof(GSVector4i) * 1]); - } - // GSVector4i fs = c[0].upl16(c[1]).pu16(c[0].uph16(c[1])); movdqa(xmm7, xmm5); diff --git a/plugins/GSdx/GSLocalMemory.cpp b/plugins/GSdx/GSLocalMemory.cpp index dac52f6d95..b9b9a3c457 100644 --- a/plugins/GSdx/GSLocalMemory.cpp +++ b/plugins/GSdx/GSLocalMemory.cpp @@ -1224,7 +1224,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF WritePixel32(addr + offset[x], *pd); } - if(x == ex) {x = sx; y++;} + if(x >= ex) {x = sx; y++;} } break; @@ -1244,7 +1244,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF WritePixel24(addr + offset[x], *(uint32*)pb); } - if(x == ex) {x = sx; y++;} + if(x >= ex) {x = sx; y++;} } break; @@ -1266,7 +1266,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF WritePixel16(addr + offset[x], *pw); } - if(x == ex) {x = sx; y++;} + if(x >= ex) {x = sx; y++;} } break; @@ -1283,7 +1283,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF WritePixel8(addr + offset[x], *pb); } - if(x == ex) {x = sx; y++;} + if(x >= ex) {x = sx; y++;} } break; @@ -1301,7 +1301,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF WritePixel4(addr + offset[x + 1], *pb >> 4); } - if(x == ex) {x = sx; y++;} + if(x >= ex) {x = sx; y++;} } break; @@ -1318,7 +1318,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF WritePixel8H(addr + offset[x], *pb); } - if(x == ex) {x = sx; y++;} + if(x >= ex) {x = sx; y++;} } break; @@ -1336,7 +1336,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF WritePixel4HL(addr + offset[x + 1], *pb >> 4); } - if(x == ex) {x = sx; y++;} + if(x >= ex) {x = sx; y++;} } break; @@ -1354,7 +1354,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, const uint8* src, int len, GIF WritePixel4HH(addr + offset[x + 1], *pb >> 4); } - if(x == ex) {x = sx; y++;} + if(x >= ex) {x = sx; y++;} } break; diff --git a/plugins/GSdx/GSRendererDX.cpp b/plugins/GSdx/GSRendererDX.cpp index 36713b6ee6..0a956cbd00 100644 --- a/plugins/GSdx/GSRendererDX.cpp +++ b/plugins/GSdx/GSRendererDX.cpp @@ -31,12 +31,11 @@ GSRendererDX::GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter) m_fba = !!theApp.GetConfig("fba", 1); UserHacks_AlphaHack = !!theApp.GetConfig("UserHacks_AlphaHack", 0) && !!theApp.GetConfig("UserHacks", 0); - UserHacks_WildHack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_WildHack", 0) : 0; UserHacks_AlphaStencil = !!theApp.GetConfig("UserHacks_AlphaStencil", 0) && !!theApp.GetConfig("UserHacks", 0); UserHacks_TCOffset = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_TCOffset", 0) : 0; - UserHacks_TCO_x = (UserHacks_TCOffset & 0xFFFF) / -1000.0f; - UserHacks_TCO_y = ((UserHacks_TCOffset >> 16) & 0xFFFF) / -1000.0f; + UserHacks_TCO_x = (UserHacks_TCOffset & 0xFFFF) / -1000.0f; + UserHacks_TCO_y = ((UserHacks_TCOffset >> 16) & 0xFFFF) / -1000.0f; } GSRendererDX::~GSRendererDX() diff --git a/plugins/GSdx/GSRendererDX.h b/plugins/GSdx/GSRendererDX.h index 5325433ee4..a9071b8597 100644 --- a/plugins/GSdx/GSRendererDX.h +++ b/plugins/GSdx/GSRendererDX.h @@ -37,7 +37,6 @@ protected: virtual void SetupIA() = 0; virtual void UpdateFBA(GSTexture* rt) {} - unsigned int UserHacks_WildHack; unsigned int UserHacks_TCOffset; float UserHacks_TCO_x, UserHacks_TCO_y; diff --git a/plugins/GSdx/GSRendererDX11.cpp b/plugins/GSdx/GSRendererDX11.cpp index cd44665eb2..4eeb93734b 100644 --- a/plugins/GSdx/GSRendererDX11.cpp +++ b/plugins/GSdx/GSRendererDX11.cpp @@ -51,8 +51,10 @@ void GSRendererDX11::SetupIA() { GSVertex* RESTRICT d = (GSVertex*)ptr; - for(unsigned int i = 0; i < m_vertex.next; i++, d++) - if(PRIM->TME && PRIM->FST) d->UV &= 0x3FEF3FEF; + for(unsigned int i = 0; i < m_vertex.next; i++) + { + if(PRIM->TME && PRIM->FST) d[i].UV &= 0x3FEF3FEF; + } } dev->IAUnmapVertexBuffer(); diff --git a/plugins/GSdx/GSRendererDX9.cpp b/plugins/GSdx/GSRendererDX9.cpp index c56218bef4..5ea645a9fb 100644 --- a/plugins/GSdx/GSRendererDX9.cpp +++ b/plugins/GSdx/GSRendererDX9.cpp @@ -205,7 +205,9 @@ void GSRendererDX9::SetupIA() //printf("GSDX: %08X | D3D9(%d) %s\n", s->UV & 0x3FEF3FEF, m_vertex.next, i == 0 ? "*" : ""); } else + { t = GSVector4(GSVector4i::load(s->UV).upl16()); + } } else { diff --git a/plugins/GSdx/GSRendererHW.cpp b/plugins/GSdx/GSRendererHW.cpp index ebda232835..2c91741623 100644 --- a/plugins/GSdx/GSRendererHW.cpp +++ b/plugins/GSdx/GSRendererHW.cpp @@ -202,6 +202,11 @@ void GSRendererHW::Draw() if(PRIM->TME) { + if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) + { + m_mem.m_clut.Read32(context->TEX0, env.TEXA); + } + GSVector4i r; GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear()); diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index c1047e8146..92cc3881b0 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -47,6 +47,16 @@ GSState::GSState() { m_nativeres = !!theApp.GetConfig("nativeres", 1); + s_n = 0; + s_dump = !!theApp.GetConfig("dump", 0); + s_save = !!theApp.GetConfig("save", 0); + s_savez = !!theApp.GetConfig("savez", 0); + s_saven = theApp.GetConfig("saven", 0); + + UserHacks_AggressiveCRC = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_AggressiveCRC", 0) : 0; + UserHacks_DisableCrcHacks = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig( "UserHacks_DisableCrcHacks", 0 ) : 0; + UserHacks_WildHack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_WildHack", 0) : 0; + memset(&m_v, 0, sizeof(m_v)); memset(&m_vertex, 0, sizeof(m_vertex)); memset(&m_index, 0, sizeof(m_index)); @@ -112,15 +122,6 @@ GSState::GSState() Reset(); ResetHandlers(); - - s_n = 0; - s_dump = !!theApp.GetConfig("dump", 0); - s_save = !!theApp.GetConfig("save", 0); - s_savez = !!theApp.GetConfig("savez", 0); - s_saven = theApp.GetConfig("saven", 0); - - userHacks_AggressiveCRC = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_AggressiveCRC", 0) : 0; - userHacks_DisableCrcHacks = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig( "UserHacks_DisableCrcHacks", 0 ) : 0; } GSState::~GSState() @@ -243,7 +244,7 @@ void GSState::ResetHandlers() m_fpGIFPackedRegHandlers[GIF_REG_PRIM] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerPRIM; m_fpGIFPackedRegHandlers[GIF_REG_RGBA] = &GSState::GIFPackedRegHandlerRGBA; m_fpGIFPackedRegHandlers[GIF_REG_STQ] = &GSState::GIFPackedRegHandlerSTQ; - m_fpGIFPackedRegHandlers[GIF_REG_UV] = &GSState::GIFPackedRegHandlerUV; + m_fpGIFPackedRegHandlers[GIF_REG_UV] = !UserHacks_WildHack ? &GSState::GIFPackedRegHandlerUV : &GSState::GIFPackedRegHandlerUV_Hack; m_fpGIFPackedRegHandlers[GIF_REG_TEX0_1] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerTEX0<0>; m_fpGIFPackedRegHandlers[GIF_REG_TEX0_2] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerTEX0<1>; m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<0>; @@ -281,7 +282,7 @@ void GSState::ResetHandlers() m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM; m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ; m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST; - m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV; + m_fpGIFRegHandlers[GIF_A_D_REG_UV] = !UserHacks_WildHack ? &GSState::GIFRegHandlerUV : &GSState::GIFRegHandlerUV_Hack; m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_1] = &GSState::GIFRegHandlerTEX0<0>; m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_2] = &GSState::GIFRegHandlerTEX0<1>; m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_1] = &GSState::GIFRegHandlerCLAMP<0>; @@ -492,6 +493,13 @@ void GSState::GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r) { GSVector4i v = GSVector4i::loadl(r) & GSVector4i::x00003fff(); + m_v.UV = (uint32)GSVector4i::store(v.ps32(v)); +} + +void GSState::GIFPackedRegHandlerUV_Hack(const GIFPackedReg* RESTRICT r) +{ + GSVector4i v = GSVector4i::loadl(r) & GSVector4i::x00003fff(); + m_v.UV = (uint32)GSVector4i::store(v.ps32(v)); isPackedUV_HackFlag = true; @@ -682,6 +690,11 @@ void GSState::GIFRegHandlerST(const GIFReg* RESTRICT r) } void GSState::GIFRegHandlerUV(const GIFReg* RESTRICT r) +{ + m_v.UV = r->UV.u32[0] & 0x3fff3fff; +} + +void GSState::GIFRegHandlerUV_Hack(const GIFReg* RESTRICT r) { m_v.UV = r->UV.u32[0] & 0x3fff3fff; @@ -1179,6 +1192,8 @@ template void GSState::GIFRegHandlerZBUF(const GIFReg* RESTRICT r) { // during startup all regs are cleared to 0 (by the bios or something), so we mask z until this register becomes valid // edit: breaks Grandia Xtreme and sounds like a bad idea generally. What was the intend? + // edit2: should be set only before any serious drawing happens, grandia extreme nulls out this register throughout the whole game, + // I already forgot what it fixed, that game never masked the zbuffer, but assumed it was set by default //ZBUF.ZMSK = 1; } @@ -2229,7 +2244,7 @@ void GSState::SetGameCRC(uint32 crc, int options) { m_crc = crc; m_options = options; - m_game = CRC::Lookup(userHacks_DisableCrcHacks ? 0 : crc); + m_game = CRC::Lookup(UserHacks_DisableCrcHacks ? 0 : crc); } // @@ -2589,7 +2604,7 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR mask = (uu.upl32(vv) == uu.uph32(vv)).mask(); } - uv = uv.rintersect(vr - GSVector4i(0,0,1,1)); + uv = uv.rintersect(tr); switch(wms) { @@ -2604,8 +2619,13 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR break; case CLAMP_CLAMP: case CLAMP_REGION_CLAMP: - if(vr.x < uv.x) vr.x = uv.x; - if(vr.z > uv.z + 1) vr.z = uv.z + 1; + if(vr.x > uv.z) vr.z = vr.x + 1; + else if(vr.z < uv.x) vr.x = vr.z - 1; + else + { + if(vr.x < uv.x) vr.x = uv.x; + if(vr.z > uv.z + 1) vr.z = uv.z + 1; + } break; case CLAMP_REGION_REPEAT: break; @@ -2625,8 +2645,13 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR break; case CLAMP_CLAMP: case CLAMP_REGION_CLAMP: - if(vr.y < uv.y) vr.y = uv.y; - if(vr.w > uv.w + 1) vr.w = uv.w + 1; + if(vr.y > uv.w) vr.w = vr.y + 1; + else if(vr.w < uv.y) vr.y = vr.w - 1; + else + { + if(vr.y < uv.y) vr.y = uv.y; + if(vr.w > uv.w + 1) vr.w = uv.w + 1; + } break; case CLAMP_REGION_REPEAT: break; @@ -2682,7 +2707,6 @@ void GSState::GetAlphaMinMax() a.w = max(env.TEXA.TA0, env.TEXA.TA1); break; case 3: - m_mem.m_clut.Read32(context->TEX0, env.TEXA); m_mem.m_clut.GetAlphaMinMax32(a.y, a.w); break; default: @@ -5355,7 +5379,7 @@ bool GSState::IsBadFrame(int& skip, int UserHacks_SkipDraw) GetSkipCount gsc = map[m_game.title]; g_crc_region = m_game.region; - g_aggressive = userHacks_AggressiveCRC; + g_aggressive = UserHacks_AggressiveCRC; #ifdef ENABLE_DYNAMIC_CRC_HACK bool res=false; if(IsInvokedDynamicCrcHack(fi, skip, g_crc_region, res, m_crc)){ if( !res ) return false; } else diff --git a/plugins/GSdx/GSState.h b/plugins/GSdx/GSState.h index b5338a98a0..e359a20440 100644 --- a/plugins/GSdx/GSState.h +++ b/plugins/GSdx/GSState.h @@ -48,6 +48,7 @@ class GSState : public GSAlignedClass<32> void GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r); void GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r); void GIFPackedRegHandlerUV(const GIFPackedReg* RESTRICT r); + void GIFPackedRegHandlerUV_Hack(const GIFPackedReg* RESTRICT r); template void GIFPackedRegHandlerXYZF2(const GIFPackedReg* RESTRICT r); template void GIFPackedRegHandlerXYZ2(const GIFPackedReg* RESTRICT r); void GIFPackedRegHandlerFOG(const GIFPackedReg* RESTRICT r); @@ -77,6 +78,7 @@ class GSState : public GSAlignedClass<32> void GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r); void GIFRegHandlerST(const GIFReg* RESTRICT r); void GIFRegHandlerUV(const GIFReg* RESTRICT r); + void GIFRegHandlerUV_Hack(const GIFReg* RESTRICT r); template void GIFRegHandlerXYZF2(const GIFReg* RESTRICT r); template void GIFRegHandlerXYZ2(const GIFReg* RESTRICT r); template void GIFRegHandlerTEX0(const GIFReg* RESTRICT r); @@ -138,8 +140,11 @@ class GSState : public GSAlignedClass<32> protected: bool IsBadFrame(int& skip, int UserHacks_SkipDraw); - int userHacks_AggressiveCRC; - int userHacks_DisableCrcHacks; + + int UserHacks_AggressiveCRC; + int UserHacks_DisableCrcHacks; + int UserHacks_WildHack; + bool isPackedUV_HackFlag; GSVertex m_v; float m_q; @@ -196,8 +201,6 @@ public: GSDump m_dump; bool m_nativeres; - bool isPackedUV_HackFlag; - int s_n; bool s_dump; bool s_save; diff --git a/plugins/GSdx/vsprops/common.props b/plugins/GSdx/vsprops/common.props index aa3b2ffff8..7ac9d55c40 100644 --- a/plugins/GSdx/vsprops/common.props +++ b/plugins/GSdx/vsprops/common.props @@ -14,7 +14,7 @@ Level4 ProgramDatabase 4996;4995;4324;4100;4101;4201;4556;%(DisableSpecificWarnings) - $(DXSDK_DIR)include;$(VTUNE_AMPLIFIER_XE_2011_DIR)include;%(AdditionalIncludeDirectories) + $(DXSDK_DIR)include;$(VTUNE_AMPLIFIER_XE_2013_DIR)include;%(AdditionalIncludeDirectories) true @@ -23,7 +23,7 @@ true Windows false - $(VTUNE_AMPLIFIER_XE_2011_DIR)lib32;%(AdditionalLibraryDirectories) + $(VTUNE_AMPLIFIER_XE_2013_DIR)lib32;%(AdditionalLibraryDirectories) .\postBuild.cmd "$(TargetPath)" "$(TargetName)" $(TargetExt) $(PcsxSubsection) diff --git a/plugins/GSdx/xbyak/xbyak.h b/plugins/GSdx/xbyak/xbyak.h index 82433373e7..74ed5e55e8 100644 --- a/plugins/GSdx/xbyak/xbyak.h +++ b/plugins/GSdx/xbyak/xbyak.h @@ -1,29 +1,51 @@ +#pragma once #ifndef XBYAK_XBYAK_H_ #define XBYAK_XBYAK_H_ /*! @file xbyak.h @brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++ @author herumi - @version $Revision: 1.239 $ - @url http://homepage1.nifty.com/herumi/soft/xbyak.html - @date $Date: 2011/02/07 06:09:35 $ + @url https://github.com/herumi/xbyak, http://homepage1.nifty.com/herumi/soft/xbyak_e.html @note modified new BSD license - http://www.opensource.org/licenses/bsd-license.php + http://opensource.org/licenses/BSD-3-Clause */ +#ifndef XBYAK_NO_OP_NAMES + #if not +0 // trick to detect whether 'not' is operator or not + #error "use -fno-operator-names option if you want to use and(), or(), xor(), not() as function names, Or define XBYAK_NO_OP_NAMES and use and_(), or_(), xor_(), not_()." + #endif +#endif #include // for debug print #include -#include +#include #include #include +#if (__cplusplus >= 201103) || (_MSC_VER >= 1500) || defined(__GXX_EXPERIMENTAL_CXX0X__) + #include + #if defined(_MSC_VER) && (_MSC_VER < 1600) + #define XBYAK_USE_TR1_UNORDERED_MAP + #else + #define XBYAK_USE_UNORDERED_MAP + #endif +#elif (__GNUC__ >= 4 && __GNUC_MINOR__ >= 5) || (__clang_major__ >= 3) + #include + #define XBYAK_USE_TR1_UNORDERED_MAP +#else + #include +#endif #ifdef _WIN32 #include + #include #elif defined(__GNUC__) #include #include + #include +#endif +#if !defined(_MSC_VER) || (_MSC_VER >= 1600) + #include #endif -#ifdef __x86_64__ +#if defined(__x86_64__) && !defined(__MINGW64__) #define XBYAK64_GCC #elif defined(_WIN64) #define XBYAK64_WIN @@ -42,12 +64,6 @@ #pragma warning(disable : 4786) /* identifier is too long */ #pragma warning(disable : 4503) /* name is too long */ #pragma warning(disable : 4127) /* constant expresison */ - #if (_MSC_VER <= 1200) - #ifndef for - #define for if(0);else for - #pragma warning(disable : 4127) /* condition is constant(for "if" trick) */ - #endif - #endif #endif namespace Xbyak { @@ -56,7 +72,7 @@ namespace Xbyak { enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x2991, /* 0xABCD = A.BC(D) */ + VERSION = 0x4000 /* 0xABCD = A.BC(D) */ }; /* #ifndef MIE_INTEGER_TYPE_DEFINED @@ -65,8 +81,8 @@ enum { typedef unsigned __int64 uint64; typedef __int64 sint64; #else - typedef unsigned long long uint64; - typedef long long sint64; + typedef uint64_t uint64; + typedef int64_t sint64; #endif typedef unsigned int uint32; typedef unsigned short uint16; @@ -107,10 +123,16 @@ enum Error { ERR_BAD_ST_COMBINATION, ERR_OVER_LOCAL_LABEL, ERR_UNDER_LOCAL_LABEL, + ERR_CANT_ALLOC, + ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW, + ERR_BAD_PROTECT_MODE, + ERR_BAD_PNUM, + ERR_BAD_TNUM, + ERR_BAD_VSIB_ADDRESSING, ERR_INTERNAL }; -static inline const char *ConvertErrorToString(Error err) +inline const char *ConvertErrorToString(Error err) { static const char errTbl[][40] = { "none", @@ -135,37 +157,87 @@ static inline const char *ConvertErrorToString(Error err) "bad st combination", "over local label", "under local label", + "can't alloc", + "T_SHORT is not supported in AutoGrow", + "bad protect mode", + "bad pNum", + "bad tNum", + "bad vsib addressing", "internal error", }; if (err < 0 || err > ERR_INTERNAL) return 0; return errTbl[err]; } +inline void *AlignedMalloc(size_t size, size_t alignment) +{ +#ifdef __MINGW32__ + return __mingw_aligned_malloc(size, alignment); +#elif defined(_WIN32) + return _aligned_malloc(size, alignment); +#else + void *p; + int ret = posix_memalign(&p, alignment, size); + return (ret == 0) ? p : 0; +#endif +} + +inline void AlignedFree(void *p) +{ +#ifdef __MINGW32__ + __mingw_aligned_free(p); +#elif defined(_MSC_VER) + _aligned_free(p); +#else + free(p); +#endif +} + +template +inline const To CastTo(From p) throw() +{ + return (const To)(size_t)(p); +} namespace inner { enum { debug = 1 }; +static const size_t ALIGN_PAGE_SIZE = 4096; -static inline uint32 GetPtrDist(const void *p1, const void *p2) +inline bool IsInDisp8(uint32 x) { return 0xFFFFFF80 <= x || x <= 0x7F; } +inline bool IsInInt32(uint64 x) { return ~uint64(0x7fffffffu) <= x || x <= 0x7FFFFFFFU; } + +inline uint32 VerifyInInt32(uint64 x) { - uint64 diff = static_cast(p1) - static_cast(p2); #ifdef XBYAK64 - if (0x7FFFFFFFULL < diff && diff < 0xFFFFFFFF80000000ULL) throw ERR_OFFSET_IS_TOO_BIG; + if (!IsInInt32(x)) throw ERR_OFFSET_IS_TOO_BIG; #endif - return static_cast(diff); + return static_cast(x); } -static inline bool IsInDisp8(uint32 x) { return 0xFFFFFF80 <= x || x <= 0x7F; } -static inline bool IsInInt32(uint64 x) { return 0xFFFFFFFF80000000ULL <= x || x <= 0x7FFFFFFFU; } +enum LabelMode { + LasIs, // as is + Labs, // absolute + LaddTop // (addr + top) for mov(reg, label) with AutoGrow +}; -} +} // inner + +/* + custom allocator +*/ +struct Allocator { + virtual uint8 *alloc(size_t size) { return reinterpret_cast(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); } + virtual void free(uint8 *p) { AlignedFree(p); } + virtual ~Allocator() {} + /* override to return false if you call protect() manually */ + virtual bool useProtect() const { return true; } +}; class Operand { private: - const uint8 idx_; - const uint8 kind_; - const uint8 bit_; - const uint8 ext8bit_; // 1 if spl/bpl/sil/dil, otherwise 0 - void operator=(Operand&); + uint8 idx_; // 0..15, MSB = 1 if spl/bpl/sil/dil + uint8 kind_; + uint16 bit_; public: enum Kind { NONE = 0, @@ -189,17 +261,16 @@ public: AX = 0, CX, DX, BX, SP, BP, SI, DI, AL = 0, CL, DL, BL, AH, CH, DH, BH }; - Operand() : idx_(0), kind_(0), bit_(0), ext8bit_(0) { } - Operand(int idx, Kind kind, int bit, int ext8bit = 0) - : idx_(static_cast(idx)) + Operand() : idx_(0), kind_(0), bit_(0) { } + Operand(int idx, Kind kind, int bit, bool ext8bit = 0) + : idx_(static_cast(idx | (ext8bit ? 0x80 : 0))) , kind_(static_cast(kind)) - , bit_(static_cast(bit)) - , ext8bit_(static_cast(ext8bit)) + , bit_(static_cast(bit)) { assert((bit_ & (bit_ - 1)) == 0); // bit must be power of two } Kind getKind() const { return static_cast(kind_); } - int getIdx() const { return idx_; } + int getIdx() const { return idx_ & 15; } bool isNone() const { return kind_ == 0; } bool isMMX() const { return is(MMX); } bool isXMM() const { return is(XMM); } @@ -207,7 +278,7 @@ public: bool isREG(int bit = 0) const { return is(REG, bit); } bool isMEM(int bit = 0) const { return is(MEM, bit); } bool isFPU() const { return is(FPU); } - bool isExt8bit() const { return ext8bit_ != 0; } + bool isExt8bit() const { return (idx_ & 0x80) != 0; } // any bit is accetable if bit == 0 bool is(int kind, uint32 bit = 0) const { @@ -217,10 +288,11 @@ public: uint32 getBit() const { return bit_; } const char *toString() const { + const int idx = getIdx(); if (kind_ == REG) { - if (ext8bit_) { + if (isExt8bit()) { static const char tbl[4][4] = { "spl", "bpl", "sil", "dil" }; - return tbl[idx_ - 4]; + return tbl[idx - 4]; } static const char tbl[4][16][5] = { { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" }, @@ -228,30 +300,31 @@ public: { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" }, { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" }, }; - return tbl[bit_ == 8 ? 0 : bit_ == 16 ? 1 : bit_ == 32 ? 2 : 3][idx_]; + return tbl[bit_ == 8 ? 0 : bit_ == 16 ? 1 : bit_ == 32 ? 2 : 3][idx]; } else if (isYMM()) { static const char tbl[16][5] = { "ym0", "ym1", "ym2", "ym3", "ym4", "ym5", "ym6", "ym7", "ym8", "ym9", "ym10", "ym11", "ym12", "ym13", "ym14", "ym15" }; - return tbl[idx_]; + return tbl[idx]; } else if (isXMM()) { static const char tbl[16][5] = { "xm0", "xm1", "xm2", "xm3", "xm4", "xm5", "xm6", "xm7", "xm8", "xm9", "xm10", "xm11", "xm12", "xm13", "xm14", "xm15" }; - return tbl[idx_]; + return tbl[idx]; } else if (isMMX()) { static const char tbl[8][4] = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }; - return tbl[idx_]; + return tbl[idx]; } else if (isFPU()) { static const char tbl[8][4] = { "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7" }; - return tbl[idx_]; + return tbl[idx]; } throw ERR_INTERNAL; } + bool operator==(const Operand& rhs) const { return idx_ == rhs.idx_ && kind_ == rhs.kind_ && bit_ == rhs.bit_; } + bool operator!=(const Operand& rhs) const { return !operator==(rhs); } }; class Reg : public Operand { - void operator=(const Reg&); bool hasRex() const { return isExt8bit() | isREG(64) | isExtIdx(); } public: Reg() { } - Reg(int idx, Kind kind, int bit = 0, int ext8bit = 0) : Operand(idx, kind, bit, ext8bit) { } + Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { } Reg changeBit(int bit) const { return Reg(getIdx(), getKind(), bit, isExt8bit()); } bool isExtIdx() const { return getIdx() > 7; } uint8 getRex(const Reg& base = Reg()) const @@ -260,49 +333,37 @@ public: } }; -class Reg8 : public Reg { - void operator=(const Reg8&); -public: - explicit Reg8(int idx, int ext8bit = 0) : Reg(idx, Operand::REG, 8, ext8bit) { } +struct Reg8 : public Reg { + explicit Reg8(int idx = 0, bool ext8bit = false) : Reg(idx, Operand::REG, 8, ext8bit) { } }; -class Reg16 : public Reg { - void operator=(const Reg16&); -public: - explicit Reg16(int idx) : Reg(idx, Operand::REG, 16) { } +struct Reg16 : public Reg { + explicit Reg16(int idx = 0) : Reg(idx, Operand::REG, 16) { } }; -class Mmx : public Reg { - void operator=(const Mmx&); -public: - explicit Mmx(int idx, Kind kind = Operand::MMX, int bit = 64) : Reg(idx, kind, bit) { } +struct Mmx : public Reg { + explicit Mmx(int idx = 0, Kind kind = Operand::MMX, int bit = 64) : Reg(idx, kind, bit) { } }; -class Xmm : public Mmx { - void operator=(const Xmm&); -public: - explicit Xmm(int idx, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { } +struct Xmm : public Mmx { + explicit Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { } }; -class Ymm : public Xmm { - void operator=(const Ymm&); -public: - explicit Ymm(int idx) : Xmm(idx, Operand::YMM, 256) { } +struct Ymm : public Xmm { + explicit Ymm(int idx = 0) : Xmm(idx, Operand::YMM, 256) { } }; -class Fpu : public Reg { - void operator=(const Fpu&); -public: - explicit Fpu(int idx) : Reg(idx, Operand::FPU, 32) { } +struct Fpu : public Reg { + explicit Fpu(int idx = 0) : Reg(idx, Operand::FPU, 32) { } }; // register for addressing(32bit or 64bit) class Reg32e : public Reg { public: // [base_(this) + index_ * scale_ + disp_] - const Reg index_; - const int scale_; // 0(index is none), 1, 2, 4, 8 - const uint32 disp_; + Reg index_; + int scale_; // 0(index is none), 1, 2, 4, 8 + uint32 disp_; private: friend class Address; friend Reg32e operator+(const Reg32e& a, const Reg32e& b) @@ -335,11 +396,14 @@ private: { return Reg32e(r, r.index_, r.scale_, r.disp_ + disp); } + friend Reg32e operator+(unsigned int disp, const Reg32e& r) + { + return operator+(r, disp); + } friend Reg32e operator-(const Reg32e& r, unsigned int disp) { return operator+(r, -static_cast(disp)); } - void operator=(const Reg32e&); public: explicit Reg32e(int idx, int bit) : Reg(idx, REG, bit) @@ -348,7 +412,7 @@ public: , disp_(0) { } - Reg32e(const Reg& base, const Reg& index, int scale, unsigned int disp) + Reg32e(const Reg& base, const Reg& index, int scale, unsigned int disp, bool allowUseEspIndex = false) : Reg(base) , index_(index) , scale_(scale) @@ -356,7 +420,7 @@ public: { if (scale != 0 && scale != 1 && scale != 2 && scale != 4 && scale != 8) throw ERR_BAD_SCALE; if (!base.isNone() && !index.isNone() && base.getBit() != index.getBit()) throw ERR_BAD_COMBINATION; - if (index.getIdx() == Operand::ESP) throw ERR_ESP_CANT_BE_INDEX; + if (!allowUseEspIndex && index.getIdx() == Operand::ESP) throw ERR_ESP_CANT_BE_INDEX; } Reg32e optimize() const // select smaller size { @@ -367,18 +431,19 @@ public: } return *this; } + bool operator==(const Reg32e& rhs) const + { + if (getIdx() == rhs.getIdx() && index_.getIdx() == rhs.getIdx() && scale_ == rhs.scale_ && disp_ == rhs.disp_) return true; + return false; + } }; struct Reg32 : public Reg32e { - explicit Reg32(int idx) : Reg32e(idx, 32) {} -private: - void operator=(const Reg32&); + explicit Reg32(int idx = 0) : Reg32e(idx, 32) {} }; #ifdef XBYAK64 struct Reg64 : public Reg32e { - explicit Reg64(int idx) : Reg32e(idx, 64) {} -private: - void operator=(const Reg64&); + explicit Reg64(int idx = 0) : Reg32e(idx, 64) {} }; struct RegRip { uint32 disp_; @@ -392,46 +457,165 @@ struct RegRip { }; #endif +// QQQ:need to refactor +struct Vsib { + // [index_ * scale_ + base_ + disp_] + uint8 indexIdx_; // xmm reg idx + uint8 scale_; // 0(none), 1, 2, 4, 8 + uint8 baseIdx_; // base reg idx + uint8 baseBit_; // 0(none), 32, 64 + uint32 disp_; + bool isYMM_; // idx is YMM +public: + static inline void verifyScale(int scale) + { + if (scale != 1 && scale != 2 && scale != 4 && scale != 8) throw ERR_BAD_SCALE; + } + int getIndexIdx() const { return indexIdx_; } + int getScale() const { return scale_; } + int getBaseIdx() const { return baseIdx_; } + int getBaseBit() const { return baseBit_; } + bool isYMM() const { return isYMM_; } + uint32 getDisp() const { return disp_; } + Vsib(int indexIdx, int scale, bool isYMM, int baseIdx = 0, int baseBit = 0, uint32 disp = 0) + : indexIdx_((uint8)indexIdx) + , scale_((uint8)scale) + , baseIdx_((uint8)baseIdx) + , baseBit_((uint8)baseBit) + , disp_(disp) + , isYMM_(isYMM) + { + } +}; +inline Vsib operator*(const Xmm& x, int scale) +{ + Vsib::verifyScale(scale); + return Vsib(x.getIdx(), scale, x.isYMM()); +} +inline Vsib operator+(const Xmm& x, uint32 disp) +{ + return Vsib(x.getIdx(), 1, x.isYMM(), 0, 0, disp); +} +inline Vsib operator+(const Xmm& x, const Reg32e& r) +{ + if (!r.index_.isNone()) throw ERR_BAD_COMBINATION; + return Vsib(x.getIdx(), 1, x.isYMM(), r.getIdx(), r.getBit(), r.disp_); +} +inline Vsib operator+(const Vsib& vs, uint32 disp) +{ + Vsib ret(vs); + ret.disp_ += disp; + return ret; +} +inline Vsib operator+(const Vsib& vs, const Reg32e& r) +{ + if (vs.getBaseBit() || !r.index_.isNone()) throw ERR_BAD_COMBINATION; + Vsib ret(vs); + ret.baseIdx_ = (uint8)r.getIdx(); + ret.baseBit_ = (uint8)r.getBit(); + ret.disp_ += r.disp_; + return ret; +} +inline Vsib operator+(uint32 disp, const Xmm& x) { return x + disp; } +inline Vsib operator+(uint32 disp, const Vsib& vs) { return vs + disp; } +inline Vsib operator+(const Reg32e& r, const Xmm& x) { return x + r; } +inline Vsib operator+(const Reg32e& r, const Vsib& vs) { return vs + r; } + +// 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc) +void *const AutoGrow = (void*)1; + class CodeArray { enum { - ALIGN_PAGE_SIZE = 4096, MAX_FIXED_BUF_SIZE = 8 }; enum Type { FIXED_BUF, // use buf_(non alignment, non protect) USER_BUF, // use userPtr(non alignment, non protect) - ALLOC_BUF // use new(alignment, protect) + ALLOC_BUF, // use new(alignment, protect) + AUTO_GROW // automatically move and grow memory if necessary }; void operator=(const CodeArray&); - Type type_; - uint8 *const allocPtr_; // for ALLOC_BUF + bool isAllocType() const { return type_ == ALLOC_BUF || type_ == AUTO_GROW; } + Type getType(size_t maxSize, void *userPtr) const + { + if (userPtr == AutoGrow) return AUTO_GROW; + if (userPtr) return USER_BUF; + if (maxSize <= MAX_FIXED_BUF_SIZE) return FIXED_BUF; + return ALLOC_BUF; + } + struct AddrInfo { + size_t codeOffset; // position to write + size_t jmpAddr; // value to write + int jmpSize; // size of jmpAddr + inner::LabelMode mode; + AddrInfo(size_t _codeOffset, size_t _jmpAddr, int _jmpSize, inner::LabelMode _mode) + : codeOffset(_codeOffset), jmpAddr(_jmpAddr), jmpSize(_jmpSize), mode(_mode) {} + uint64 getVal(const uint8 *top) const + { + uint64 disp = (mode == inner::LaddTop) ? jmpAddr + size_t(top) : (mode == inner::LasIs) ? jmpAddr : jmpAddr - size_t(top); + if (jmpSize == 4) disp = inner::VerifyInInt32(disp); + return disp; + } + }; + typedef std::list AddrInfoList; + AddrInfoList addrInfoList_; + const Type type_; + Allocator defaultAllocator_; + Allocator *alloc_; uint8 buf_[MAX_FIXED_BUF_SIZE]; // for FIXED_BUF protected: - const size_t maxSize_; - uint8 *const top_; + size_t maxSize_; + uint8 *top_; size_t size_; + + /* + allocate new memory and copy old data to the new area + */ + void growMemory() + { + const size_t newSize = (std::max)(DEFAULT_MAX_CODE_SIZE, maxSize_ * 2); + uint8 *newTop = alloc_->alloc(newSize); + if (newTop == 0) throw ERR_CANT_ALLOC; + for (size_t i = 0; i < size_; i++) newTop[i] = top_[i]; + alloc_->free(top_); + top_ = newTop; + maxSize_ = newSize; + } + /* + calc jmp address for AutoGrow mode + */ + void calcJmpAddress() + { + for (AddrInfoList::const_iterator i = addrInfoList_.begin(), ie = addrInfoList_.end(); i != ie; ++i) { + uint64 disp = i->getVal(top_); + rewrite(i->codeOffset, disp, i->jmpSize); + } + if (alloc_->useProtect() && !protect(top_, size_, true)) throw ERR_CANT_PROTECT; + } public: - CodeArray(size_t maxSize = MAX_FIXED_BUF_SIZE, void *userPtr = 0) - : type_(userPtr ? USER_BUF : maxSize <= MAX_FIXED_BUF_SIZE ? FIXED_BUF : ALLOC_BUF) - , allocPtr_(type_ == ALLOC_BUF ? new uint8[maxSize + ALIGN_PAGE_SIZE] : 0) + CodeArray(size_t maxSize = MAX_FIXED_BUF_SIZE, void *userPtr = 0, Allocator *allocator = 0) + : type_(getType(maxSize, userPtr)) + , alloc_(allocator ? allocator : &defaultAllocator_) , maxSize_(maxSize) - , top_(type_ == ALLOC_BUF ? getAlignedAddress(allocPtr_, ALIGN_PAGE_SIZE) : type_ == USER_BUF ? reinterpret_cast(userPtr) : buf_) + , top_(isAllocType() ? alloc_->alloc((std::max)(maxSize, 1)) : type_ == USER_BUF ? reinterpret_cast(userPtr) : buf_) , size_(0) { - if (type_ == ALLOC_BUF && !protect(top_, maxSize, true)) { + if (maxSize_ > 0 && top_ == 0) throw ERR_CANT_ALLOC; + if ((type_ == ALLOC_BUF && alloc_->useProtect()) && !protect(top_, maxSize, true)) { + alloc_->free(top_); throw ERR_CANT_PROTECT; } } virtual ~CodeArray() { - if (type_ == ALLOC_BUF) { - protect(top_, maxSize_, false); - delete[] allocPtr_; + if (isAllocType()) { + if (alloc_->useProtect()) protect(top_, maxSize_, false); + alloc_->free(top_); } } CodeArray(const CodeArray& rhs) : type_(rhs.type_) - , allocPtr_(0) + , defaultAllocator_(rhs.defaultAllocator_) , maxSize_(rhs.maxSize_) , top_(buf_) , size_(rhs.size_) @@ -439,9 +623,20 @@ public: if (type_ != FIXED_BUF) throw ERR_CODE_ISNOT_COPYABLE; for (size_t i = 0; i < size_; i++) top_[i] = rhs.top_[i]; } + void resetSize() + { + size_ = 0; + addrInfoList_.clear(); + } void db(int code) { - if (size_ >= maxSize_) throw ERR_CODE_IS_TOO_BIG; + if (size_ >= maxSize_) { + if (type_ == AUTO_GROW) { + growMemory(); + } else { + throw ERR_CODE_IS_TOO_BIG; + } + } top_[size_++] = static_cast(code); } void db(const uint8 *code, int codeSize) @@ -456,8 +651,17 @@ public: void dw(uint32 code) { db(code, 2); } void dd(uint32 code) { db(code, 4); } const uint8 *getCode() const { return top_; } + template + const F getCode() const { return CastTo(top_); } const uint8 *getCurr() const { return &top_[size_]; } + template + const F getCurr() const { return CastTo(&top_[size_]); } size_t getSize() const { return size_; } + void setSize(size_t size) + { + if (size >= maxSize_) throw ERR_OFFSET_IS_TOO_BIG; + size_ = size; + } void dump() const { const uint8 *p = getCode(); @@ -481,17 +685,24 @@ public: } } /* - @param data [in] address of jmp data + @param offset [in] offset from top @param disp [in] offset from the next of jmp @param size [in] write size(1, 2, 4, 8) */ - void rewrite(uint8 *data, uint64 disp, size_t size) + void rewrite(size_t offset, uint64 disp, size_t size) { + assert(offset < maxSize_); if (size != 1 && size != 2 && size != 4 && size != 8) throw ERR_BAD_PARAMETER; + uint8 *const data = top_ + offset; for (size_t i = 0; i < size; i++) { data[i] = static_cast(disp >> (i * 8)); } } + void save(size_t offset, size_t val, int size, inner::LabelMode mode) + { + addrInfoList_.push_back(AddrInfo(offset, val, size, mode)); + } + bool isAutoGrow() const { return type_ == AUTO_GROW; } void updateRegField(uint8 regIdx) const { *top_ = (*top_ & B11000111) | ((regIdx << 3) & B00111000); @@ -533,60 +744,43 @@ public: class Address : public Operand, public CodeArray { void operator=(const Address&); uint64 disp_; + uint8 rex_; bool isOnlyDisp_; bool is64bitDisp_; - uint8 rex_; -public: + mutable bool isVsib_; + bool isYMM_; + void verify() const { if (isVsib_) throw ERR_BAD_VSIB_ADDRESSING; } const bool is32bit_; - Address(uint32 sizeBit, bool isOnlyDisp, uint64 disp, bool is32bit, bool is64bitDisp = false) +public: + Address(uint32 sizeBit, bool isOnlyDisp, uint64 disp, bool is32bit, bool is64bitDisp = false, bool isVsib = false, bool isYMM = false) : Operand(0, MEM, sizeBit) , CodeArray(6) // 6 = 1(ModRM) + 1(SIB) + 4(disp) , disp_(disp) + , rex_(0) , isOnlyDisp_(isOnlyDisp) , is64bitDisp_(is64bitDisp) - , rex_(0) + , isVsib_(isVsib) + , isYMM_(isYMM) , is32bit_(is32bit) { } - bool isOnlyDisp() const { return isOnlyDisp_; } // for mov eax - uint64 getDisp() const { return disp_; } - uint8 getRex() const { return rex_; } - bool is64bitDisp() const { return is64bitDisp_; } // for moffset + void setVsib(bool isVsib) const { isVsib_ = isVsib; } + bool isVsib() const { return isVsib_; } + bool isYMM() const { return isYMM_; } + bool is32bit() const { verify(); return is32bit_; } + bool isOnlyDisp() const { verify(); return isOnlyDisp_; } // for mov eax + uint64 getDisp() const { verify(); return disp_; } + uint8 getRex() const { verify(); return rex_; } + bool is64bitDisp() const { verify(); return is64bitDisp_; } // for moffset void setRex(uint8 rex) { rex_ = rex; } }; class AddressFrame { private: void operator=(const AddressFrame&); -public: - const uint32 bit_; - explicit AddressFrame(uint32 bit) : bit_(bit) { } - Address operator[](const void *disp) const + Address makeAddress(const Reg32e& r, bool isVsib, bool isYMM) const { - size_t adr = reinterpret_cast(disp); -#ifdef XBYAK64 - if (adr > 0xFFFFFFFFU) throw ERR_OFFSET_IS_TOO_BIG; -#endif - Reg32e r(Reg(), Reg(), 0, static_cast(adr)); - return operator[](r); - } -#ifdef XBYAK64 - Address operator[](uint64 disp) const - { - return Address(64, true, disp, false, true); - } - Address operator[](const RegRip& addr) const - { - Address frame(64, true, addr.disp_, false); - frame.db(B00000101); - frame.dd(addr.disp_); - return frame; - } -#endif - Address operator[](const Reg32e& in) const - { - const Reg32e& r = in.optimize(); - Address frame(bit_, (r.isNone() && r.index_.isNone()), r.disp_, r.isBit(32) || r.index_.isBit(32)); + Address frame(bit_, (r.isNone() && r.index_.isNone()), r.disp_, r.isBit(32) || r.index_.isBit(32), false, isVsib, isYMM); enum { mod00 = 0, mod01 = 1, mod10 = 2 }; @@ -622,11 +816,60 @@ public: frame.setRex(rex); return frame; } +public: + const uint32 bit_; + explicit AddressFrame(uint32 bit) : bit_(bit) { } + Address operator[](const void *disp) const + { + size_t adr = reinterpret_cast(disp); +#ifdef XBYAK64 + if (adr > 0xFFFFFFFFU) throw ERR_OFFSET_IS_TOO_BIG; +#endif + Reg32e r(Reg(), Reg(), 0, static_cast(adr)); + return operator[](r); + } +#ifdef XBYAK64 + Address operator[](uint64 disp) const + { + return Address(64, true, disp, false, true); + } + Address operator[](const RegRip& addr) const + { + Address frame(bit_, true, addr.disp_, false); + frame.db(B00000101); + frame.dd(addr.disp_); + return frame; + } +#endif + Address operator[](const Reg32e& in) const + { + return makeAddress(in.optimize(), false, false); + } + Address operator[](const Vsib& vs) const + { + if (vs.getBaseBit() == 0) { +#ifdef XBYAK64 + const int bit = 64; +#else + const int bit = 32; +#endif + const Reg32e r(Reg(), Reg32e(vs.getIndexIdx(), bit), vs.getScale(), vs.getDisp(), true); + return makeAddress(r, true, vs.isYMM()); + } else { + const Reg32e r(Reg32e(vs.getBaseIdx(), vs.getBaseBit()), Reg32e(vs.getIndexIdx(), vs.getBaseBit()), vs.getScale(), vs.getDisp(), true); + return makeAddress(r, true, vs.isYMM()); + } + } + Address operator[](const Xmm& x) const + { + return operator[](x + 0); + } }; struct JmpLabel { - uint8 *endOfJmp; /* end address of jmp */ - bool isShort; + size_t endOfJmp; /* offset from top to the end address of jmp */ + int jmpSize; + inner::LabelMode mode; }; class Label { @@ -639,8 +882,18 @@ class Label { int stackPos_; int usedCount_; int localCount_; // for .*** - typedef std::map DefinedList; - typedef std::multimap UndefinedList; +public: +private: +#ifdef XBYAK_USE_UNORDERED_MAP + typedef std::unordered_map DefinedList; + typedef std::unordered_multimap UndefinedList; +#elif defined(XBYAK_USE_TR1_UNORDERED_MAP) + typedef std::tr1::unordered_map DefinedList; + typedef std::tr1::unordered_multimap UndefinedList; +#else + typedef std::map DefinedList; + typedef std::multimap UndefinedList; +#endif DefinedList definedList_; UndefinedList undefinedList_; @@ -671,6 +924,16 @@ public: , localCount_(0) { } + void reset() + { + base_ = 0; + anonymousCount_ = 0; + stackPos_ = 1; + usedCount_ = 0; + localCount_ = 0; + definedList_.clear(); + undefinedList_.clear(); + } void enterLocal() { if (stackPos_ == maxStack) throw ERR_OVER_LOCAL_LABEL; @@ -682,7 +945,7 @@ public: localCount_ = stack_[--stackPos_ - 1]; } void set(CodeArray *base) { base_ = base; } - void define(const char *label, const uint8 *address) + void define(const char *label, size_t addrOffset, const uint8 *addr) { std::string newLabel(label); if (newLabel == "@@") { @@ -692,7 +955,7 @@ public: } label = newLabel.c_str(); // add label - DefinedList::value_type item(label, address); + DefinedList::value_type item(label, addrOffset); std::pair ret = definedList_.insert(item); if (!ret.second) throw ERR_LABEL_IS_REDEFINED; // search undefined label @@ -700,22 +963,34 @@ public: UndefinedList::iterator itr = undefinedList_.find(label); if (itr == undefinedList_.end()) break; const JmpLabel *jmp = &itr->second; - uint32 disp = inner::GetPtrDist(address, jmp->endOfJmp); - if (jmp->isShort && !inner::IsInDisp8(disp)) throw ERR_LABEL_IS_TOO_FAR; - size_t jmpSize = jmp->isShort ? 1 : 4; - uint8 *data = jmp->endOfJmp - jmpSize; - base_->rewrite(data, disp, jmpSize); + const size_t offset = jmp->endOfJmp - jmp->jmpSize; + size_t disp; + if (jmp->mode == inner::LaddTop) { + disp = addrOffset; + } else if (jmp->mode == inner::Labs) { + disp = size_t(addr); + } else { + disp = addrOffset - jmp->endOfJmp; + if (jmp->jmpSize <= 4) disp = inner::VerifyInInt32(disp); + if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32)disp)) throw ERR_LABEL_IS_TOO_FAR; + } + if (base_->isAutoGrow()) { + base_->save(offset, disp, jmp->jmpSize, jmp->mode); + } else { + base_->rewrite(offset, disp, jmp->jmpSize); + } undefinedList_.erase(itr); } } - const uint8 *getAddress(const char *label) const + bool getOffset(size_t *offset, const char *label) const { std::string newLabel = convertLabel(label); DefinedList::const_iterator itr = definedList_.find(newLabel); if (itr != definedList_.end()) { - return itr->second; + *offset = itr->second; + return true; } else { - return 0; + return false; } } void addUndefinedLabel(const char *label, const JmpLabel& jmp) @@ -735,12 +1010,8 @@ public: static inline std::string toStr(int num) { char buf[16]; -#ifdef _WIN32 - #if _MSC_VER < 1400 - _snprintf - #else - _snprintf_s - #endif +#ifdef _MSC_VER + _snprintf_s #else snprintf #endif @@ -801,7 +1072,7 @@ private: if (p1->isMEM()) throw ERR_BAD_COMBINATION; if (p2->isMEM()) { const Address& addr = static_cast(*p2); - if (BIT == 64 && addr.is32bit_) db(0x67); + if (BIT == 64 && addr.is32bit()) db(0x67); rex = addr.getRex() | static_cast(*p1).getRex(); } else { // ModRM(reg, base); @@ -849,52 +1120,52 @@ private: addr.updateRegField(static_cast(reg.getIdx())); db(addr.getCode(), static_cast(addr.getSize())); } - void opJmp(const char *label, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref) + void makeJmp(uint32 disp, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref) { - const uint8 *address = label_.getAddress(label); - if (address) { /* label exists */ - opJmp(address, type, shortCode, longCode, longPref); - } else { - const int shortHeaderSize = 1; - const int shortJmpSize = shortHeaderSize + 1; /* +1 means 8-bit displacement */ - const int longHeaderSize = longPref ? 2 : 1; - const int longJmpSize = longHeaderSize + 4; /* +4 means 32-bit displacement */ - uint8 *top = const_cast(getCurr()); - bool isShort = (type != T_NEAR); - JmpLabel jmp; - jmp.endOfJmp = top + (isShort ? shortJmpSize : longJmpSize); - jmp.isShort = isShort; - if (isShort) { - db(shortCode); - db(0); - } else { - if (longPref) db(longPref); - db(longCode); - dd(0); - } - label_.addUndefinedLabel(label, jmp); - } - } - void opJmp(const void *addr, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref) - { - const int shortHeaderSize = 1; - const int shortJmpSize = shortHeaderSize + 1; /* +1 means 8-bit displacement */ + const int shortJmpSize = 2; const int longHeaderSize = longPref ? 2 : 1; - const int longJmpSize = longHeaderSize + 4; /* +4 means 32-bit displacement */ - - uint8 *top = const_cast(getCurr()); - uint32 disp = inner::GetPtrDist(addr, top); + const int longJmpSize = longHeaderSize + 4; if (type != T_NEAR && inner::IsInDisp8(disp - shortJmpSize)) { - db(shortCode); - db(0); - rewrite(top + shortHeaderSize, disp - shortJmpSize, 1); + db(shortCode); db(disp - shortJmpSize); } else { if (type == T_SHORT) throw ERR_LABEL_IS_TOO_FAR; if (longPref) db(longPref); + db(longCode); dd(disp - longJmpSize); + } + } + void opJmp(const char *label, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref) + { + if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); /* avoid splitting code of jmp */ + size_t offset = 0; + if (label_.getOffset(&offset, label)) { /* label exists */ + makeJmp(inner::VerifyInInt32(offset - size_), type, shortCode, longCode, longPref); + } else { + JmpLabel jmp; + if (type == T_NEAR) { + jmp.jmpSize = 4; + if (longPref) db(longPref); + db(longCode); dd(0); + } else { + jmp.jmpSize = 1; + db(shortCode); db(0); + } + jmp.mode = inner::LasIs; + jmp.endOfJmp = size_; + label_.addUndefinedLabel(label, jmp); + } + } + void opJmpAbs(const void *addr, LabelType type, uint8 shortCode, uint8 longCode) + { + if (isAutoGrow()) { + if (type != T_NEAR) throw ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW; + if (size_ + 16 >= maxSize_) growMemory(); db(longCode); dd(0); - rewrite(top + longHeaderSize, disp - longJmpSize, 4); + save(size_ - 4, size_t(addr) - size_, 4, inner::Labs); + } else { + makeJmp(inner::VerifyInInt32(reinterpret_cast(addr) - getCurr()), type, shortCode, longCode, 0); } + } /* preCode is for SSSE3/SSE4 */ void opGen(const Operand& reg, const Operand& op, int code, int pref, bool isValid(const Operand&, const Operand&), int imm8 = NONE, int preCode = NONE) @@ -991,8 +1262,9 @@ private: { verifyMemHasSize(op); uint32 immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32; + if (op.isBit(8)) immBit = 8; if (op.getBit() < immBit) throw ERR_IMM_IS_TOO_BIG; - if (op.isREG(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */ + if (op.isBit(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */ if (op.isREG() && op.getIdx() == 0 && (op.getBit() == immBit || (op.isBit(64) && immBit == 32))) { // rax, eax, ax, al rex(op); db(code | 4 | (immBit == 8 ? 0 : 1)); @@ -1036,17 +1308,11 @@ private: } void opMovxx(const Reg& reg, const Operand& op, uint8 code) { + if (op.isBit(32)) throw ERR_BAD_COMBINATION; int w = op.isBit(16); bool cond = reg.isREG() && (reg.getBit() > op.getBit()); opModRM(reg, op, cond && op.isREG(), cond && op.isMEM(), 0x0F, code | w); } - #ifdef XBYAK64 - void opMovsxd(const Reg& reg, const Operand& op) - { - bool cond = reg.isREG() && (reg.getBit() > op.getBit()); - opModRM(reg, op, cond && op.isREG(), cond && op.isMEM(), 0x63); - } - #endif void opFpuMem(const Address& addr, uint8 m16, uint8 m32, uint8 m64, uint8 ext, uint8 m64ext) { if (addr.is64bitDisp()) throw ERR_CANT_USE_64BIT_DISP; @@ -1059,7 +1325,6 @@ private: addr.updateRegField(ext); db(addr.getCode(), static_cast(addr.getSize())); } - // like yasm not nasm // use code1 if reg1 == st0 // use code2 if reg1 != st0 && reg2 == st0 void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32 code1, uint32 code2) @@ -1073,6 +1338,101 @@ private: { db(code1); db(code2 | reg.getIdx()); } + void opVex(const Reg& r, const Operand *p1, const Operand *p2, int type, int code, int w) + { + bool x, b; + if (p2->isMEM()) { + const Address& addr = static_cast(*p2); + uint8 rex = addr.getRex(); + x = (rex & 2) != 0; + b = (rex & 1) != 0; + if (BIT == 64 && addr.is32bit()) db(0x67); + if (BIT == 64 && w == -1) w = (rex & 4) ? 1 : 0; + } else { + x = false; + b = static_cast(*p2).isExtIdx(); + } + if (w == -1) w = 0; + vex(r.isExtIdx(), p1->getIdx(), r.isYMM(), type, x, b, w); + db(code); + if (p2->isMEM()) { + const Address& addr = static_cast(*p2); + addr.updateRegField(static_cast(r.getIdx())); + db(addr.getCode(), static_cast(addr.getSize())); + } else { + db(getModRM(3, r.getIdx(), p2->getIdx())); + } + } + // (r, r, r/m) if isR_R_RM + // (r, r/m, r) + void opGpr(const Reg32e& r, const Operand& op1, const Operand& op2, int type, uint8 code, bool isR_R_RM) + { + const Operand *p1 = &op1; + const Operand *p2 = &op2; + if (!isR_R_RM) std::swap(p1, p2); + const unsigned int bit = r.getBit(); + if (p1->getBit() != bit || (p2->isREG() && p2->getBit() != bit)) throw ERR_BAD_COMBINATION; + int w = bit == 64; + opVex(r, p1, p2, type, code, w); + } + void opAVX_X_X_XM(const Xmm& x1, const Operand& op1, const Operand& op2, int type, int code0, bool supportYMM, int w = -1) + { + const Xmm *x2; + const Operand *op; + if (op2.isNone()) { + x2 = &x1; + op = &op1; + } else { + if (!(op1.isXMM() || (supportYMM && op1.isYMM()))) throw ERR_BAD_COMBINATION; + x2 = static_cast(&op1); + op = &op2; + } + // (x1, x2, op) + if (!((x1.isXMM() && x2->isXMM()) || (supportYMM && x1.isYMM() && x2->isYMM()))) throw ERR_BAD_COMBINATION; + opVex(x1, x2, op, type, code0, w); + } + // if cvt then return pointer to Xmm(idx) (or Ymm(idx)), otherwise return op + void opAVX_X_X_XMcvt(const Xmm& x1, const Operand& op1, const Operand& op2, bool cvt, Operand::Kind kind, int type, int code0, bool supportYMM, int w = -1) + { + // use static_cast to avoid calling unintentional copy constructor on gcc + opAVX_X_X_XM(x1, op1, cvt ? kind == Operand::XMM ? static_cast(Xmm(op2.getIdx())) : static_cast(Ymm(op2.getIdx())) : op2, type, code0, supportYMM, w); + } + // support (x, x/m, imm), (y, y/m, imm) + void opAVX_X_XM_IMM(const Xmm& x, const Operand& op, int type, int code, bool supportYMM, int w = -1, int imm = NONE) + { + opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, op, type, code, supportYMM, w); if (imm != NONE) db((uint8)imm); + } + // QQQ:need to refactor + void opSp1(const Reg& reg, const Operand& op, uint8 pref, uint8 code0, uint8 code1) + { + if (reg.isBit(8)) throw ERR_BAD_SIZE_OF_REGISTER; + bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM()); + if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) throw ERR_BAD_COMBINATION; + if (is16bit) db(0x66); + db(pref); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, code0, code1); + } + void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, int type, uint8 code, int w, int mode) + { + if (!addr.isVsib()) throw ERR_BAD_VSIB_ADDRESSING; + const int y_vx_y = 0; + const int y_vy_y = 1; +// const int x_vy_x = 2; + const bool isAddrYMM = addr.isYMM(); + if (!x1.isXMM() || isAddrYMM || !x2.isXMM()) { + bool isOK = false; + if (mode == y_vx_y) { + isOK = x1.isYMM() && !isAddrYMM && x2.isYMM(); + } else if (mode == y_vy_y) { + isOK = x1.isYMM() && isAddrYMM && x2.isYMM(); + } else { // x_vy_x + isOK = !x1.isYMM() && isAddrYMM && !x2.isYMM(); + } + if (!isOK) throw ERR_BAD_VSIB_ADDRESSING; + } + addr.setVsib(false); + opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type, code, true, w); + addr.setVsib(true); + } public: unsigned int getVersion() const { return VERSION; } using CodeArray::db; @@ -1086,8 +1446,6 @@ public: const Reg8 al, cl, dl, bl, ah, ch, dh, bh; const AddressFrame ptr, byte, word, dword, qword; const Fpu st0, st1, st2, st3, st4, st5, st6, st7; - const Xmm* xmTbl[16]; - const Ymm* ymTbl[16]; #ifdef XBYAK64 const Reg64 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15; const Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d; @@ -1100,10 +1458,9 @@ public: const Ymm &ym8, &ym9, &ym10, &ym11, &ym12, &ym13, &ym14, &ym15; const RegRip rip; #endif - void L(const char *label) { - label_.define(label, getCurr()); + label_.define(label, getSize(), getCurr()); } void inLocalLabel() { label_.enterLocal(); } void outLocalLabel() { label_.leaveLocal(); } @@ -1113,7 +1470,7 @@ public: } void jmp(const void *addr, LabelType type = T_AUTO) { - opJmp(addr, type, B11101011, B11101001, 0); + opJmpAbs(addr, type, B11101011, B11101001); } void jmp(const Operand& op) { @@ -1229,18 +1586,18 @@ public: } void mov(const Operand& op, #ifdef XBYAK64 - uint64 + uint64 imm, bool opti = true #else - uint32 + uint32 imm, bool = true #endif - imm) + ) { verifyMemHasSize(op); if (op.isREG()) { rex(op); int code, size; #ifdef XBYAK64 - if (op.isBit(64) && inner::IsInInt32(imm)) { + if (opti && op.isBit(64) && inner::IsInInt32(imm)) { db(B11000111); code = B11000000; size = 4; @@ -1261,6 +1618,42 @@ public: throw ERR_BAD_COMBINATION; } } + void mov( +#ifdef XBYAK64 + const Reg64& reg, +#else + const Reg32& reg, +#endif + const char *label) + { + if (label == 0) { + mov(reg, 0, true); + return; + } + const int jmpSize = (int)sizeof(size_t); +#ifdef XBYAK64 + const size_t dummyAddr = (size_t(0x11223344) << 32) | 55667788; +#else + const size_t dummyAddr = 0x12345678; +#endif + if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); + size_t offset = 0; + if (label_.getOffset(&offset, label)) { + if (isAutoGrow()) { + mov(reg, dummyAddr); + save(size_ - jmpSize, offset, jmpSize, inner::LaddTop); + } else { + mov(reg, size_t(top_) + offset, false); // not to optimize 32-bit imm + } + return; + } + mov(reg, dummyAddr); + JmpLabel jmp; + jmp.endOfJmp = size_; + jmp.jmpSize = jmpSize; + jmp.mode = isAutoGrow() ? inner::LaddTop : inner::Labs; + label_.addUndefinedLabel(label, jmp); + } void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, B11000111); } #ifdef XBYAK64 void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, B11000111); } @@ -1292,7 +1685,7 @@ public: } void call(const void *addr) { - opJmp(addr, T_NEAR, 0, B11101000, 0); + opJmpAbs(addr, T_NEAR, 0, B11101000); } // special case void movd(const Address& addr, const Mmx& mmx) @@ -1354,6 +1747,11 @@ public: if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opGen(Reg64(xmm.getIdx()), op, 0x22, 0x66, 0, imm, B00111010); // force to 64bit } + void movsxd(const Reg64& reg, const Operand& op) + { + if (!op.isBit(32)) throw ERR_BAD_COMBINATION; + opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); + } #endif // MMX2 : pextrw : reg, mmx/xmm, imm // SSE4 : pextrw, pextrb, pextrd, extractps : reg/mem, mmx/xmm, imm @@ -1393,84 +1791,24 @@ public: if (!mmx.isMMX()) throw ERR_BAD_COMBINATION; opModM(addr, mmx, 0x0F, B11100111); } - void popcnt(const Reg& reg, const Operand& op) - { - bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM()); - if (!is16bit && !(reg.isREG(i32e) && (op.isREG(i32e) || op.isMEM()))) throw ERR_BAD_COMBINATION; - if (is16bit) db(0x66); - db(0xF3); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, op.isREG(), true, 0x0F, 0xB8); - } void crc32(const Reg32e& reg, const Operand& op) { if (reg.isBit(32) && op.isBit(16)) db(0x66); db(0xF2); opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1)); } - void vextractps(const Operand& op, const Xmm& xmm, uint8 imm) - { - if (!(op.isREG(32) || op.isMEM()) || xmm.isYMM()) throw ERR_BAD_COMBINATION; - opAVX_X_XM_IMM(xmm, cvtReg(op, op.isREG(), Operand::XMM), MM_0F3A | PP_66, 0x17, false, 0, imm); - } - // support (x, x, x/m), (y, y, y/m) - void opAVX_X_X_XM(const Xmm& xm1, const Operand& op1, const Operand& op2, int type, int code0, bool supportYMM, int w = -1) - { - const Xmm *xm2; - const Operand *op; - if (op2.isNone()) { - xm2 = &xm1; - op = &op1; - } else { - if (!(op1.isXMM() || (supportYMM && op1.isYMM()))) throw ERR_BAD_COMBINATION; - xm2 = static_cast(&op1); - op = &op2; - } - // (xm1, xm2, op) - if (!((xm1.isXMM() && xm2->isXMM()) || (supportYMM && xm1.isYMM() && xm2->isYMM()))) throw ERR_BAD_COMBINATION; - bool x, b; - if (op->isMEM()) { - const Address& addr = *static_cast(op); - uint8 rex = addr.getRex(); - x = (rex & 2) != 0; - b = (rex & 1) != 0; - if (BIT == 64 && addr.is32bit_) db(0x67); - if (BIT == 64 && w == -1) w = (rex & 4) ? 1 : 0; - } else { - x = false; - b = static_cast(op)->isExtIdx(); - } - if (w == -1) w = 0; - vex(xm1.isExtIdx(), xm2->getIdx(), xm1.isYMM(), type, x, b, w); - db(code0); - if (op->isMEM()) { - const Address& addr = *static_cast(op); - addr.updateRegField(static_cast(xm1.getIdx())); - db(addr.getCode(), static_cast(addr.getSize())); - } else { - db(getModRM(3, xm1.getIdx(), op->getIdx())); - } - } - // if cvt then return pointer to Xmm(idx) (or Ymm(idx)), otherwise return op - const Operand& cvtReg(const Operand& op, bool cvt, Operand::Kind kind) const - { - if (!cvt) return op; - return (kind == Operand::XMM) ? *xmTbl[op.getIdx()] : *ymTbl[op.getIdx()]; - } - // support (x, x/m, imm), (y, y/m, imm) - void opAVX_X_XM_IMM(const Xmm& xmm, const Operand& op, int type, int code, bool supportYMM, int w = -1, int imm = NONE) - { - opAVX_X_X_XM(xmm, xmm.isXMM() ? xm0 : ym0, op, type, code, supportYMM, w); if (imm != NONE) db((uint8)imm); - } + void rdrand(const Reg& r) { if (r.isBit(8)) throw ERR_BAD_SIZE_OF_REGISTER; opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0f, 0xc7); } + void rorx(const Reg32e& r, const Operand& op, uint8 imm) { opGpr(r, op, Reg32e(0, r.getBit()), MM_0F3A | PP_F2, 0xF0, false); db(imm); } enum { NONE = 256 }; -public: - CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0) - : CodeArray(maxSize, userPtr) + CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0, Allocator *allocator = 0) + : CodeArray(maxSize, userPtr, allocator) , mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7) , xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7) , ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7) , xm0(xmm0), xm1(xmm1), xm2(xmm2), xm3(xmm3), xm4(xmm4), xm5(xmm5), xm6(xmm6), xm7(xmm7) // for my convenience , ym0(ymm0), ym1(ymm1), ym2(ymm2), ym3(ymm3), ym4(ymm4), ym5(ymm5), ym6(ymm6), ym7(ymm7) // for my convenience , eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI) - , ax(Operand::EAX), cx(Operand::ECX), dx(Operand::EDX), bx(Operand::EBX), sp(Operand::ESP), bp(Operand::EBP), si(Operand::ESI), di(Operand::EDI) + , ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI) , al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH) , ptr(0), byte(8), word(16), dword(32), qword(64) , st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7) @@ -1479,7 +1817,7 @@ public: , r8d(Operand::R8D), r9d(Operand::R9D), r10d(Operand::R10D), r11d(Operand::R11D), r12d(Operand::R12D), r13d(Operand::R13D), r14d(Operand::R14D), r15d(Operand::R15D) , r8w(Operand::R8W), r9w(Operand::R9W), r10w(Operand::R10W), r11w(Operand::R11W), r12w(Operand::R12W), r13w(Operand::R13W), r14w(Operand::R14W), r15w(Operand::R15W) , r8b(Operand::R8B), r9b(Operand::R9B), r10b(Operand::R10B), r11b(Operand::R11B), r12b(Operand::R12B), r13b(Operand::R13B), r14b(Operand::R14B), r15b(Operand::R15B) - , spl(Operand::SPL, 1), bpl(Operand::BPL, 1), sil(Operand::SIL, 1), dil(Operand::DIL, 1) + , spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true) , xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15) , ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15) , xm8(xmm8), xm9(xmm9), xm10(xmm10), xm11(xmm11), xm12(xmm12), xm13(xmm13), xm14(xmm14), xm15(xmm15) // for my convenience @@ -1487,25 +1825,22 @@ public: , rip() #endif { - xmTbl[0] = &xm0; xmTbl[1] = &xm1; xmTbl[2] = &xm2; xmTbl[3] = &xm3; - xmTbl[4] = &xm4; xmTbl[5] = &xm5; xmTbl[6] = &xm6; xmTbl[7] = &xm7; - ymTbl[0] = &ym0; ymTbl[1] = &ym1; ymTbl[2] = &ym2; ymTbl[3] = &ym3; - ymTbl[4] = &ym4; ymTbl[5] = &ym5; ymTbl[6] = &ym6; ymTbl[7] = &ym7; -#ifdef XBYAK64 - xmTbl[8] = &xm8; xmTbl[9] = &xm9; xmTbl[10] = &xm10; xmTbl[11] = &xm11; - xmTbl[12] = &xm12; xmTbl[13] = &xm13; xmTbl[14] = &xm14; xmTbl[15] = &xm15; - ymTbl[8] = &ym8; ymTbl[9] = &ym9; ymTbl[10] = &ym10; ymTbl[11] = &ym11; - ymTbl[12] = &ym12; ymTbl[13] = &ym13; ymTbl[14] = &ym14; ymTbl[15] = &ym15; -#endif - + label_.set(this); + } + void reset() + { + resetSize(); + label_.reset(); label_.set(this); } bool hasUndefinedLabel() const { return label_.hasUndefinedLabel(); } - const uint8 *getCode() const + /* + call ready() to complete generating code on AutoGrow + */ + void ready() { - assert(!hasUndefinedLabel()); -// if (hasUndefinedLabel()) throw ERR_LABEL_IS_NOT_FOUND; - return top_; + if (hasUndefinedLabel()) throw ERR_LABEL_IS_NOT_FOUND; + calcJmpAddress(); } #ifdef XBYAK_TEST void dump(bool doClear = true) @@ -1519,7 +1854,9 @@ public: #include "xbyak_mnemonic.h" void align(int x = 16) { - if (x != 4 && x != 8 && x != 16 && x != 32) throw ERR_BAD_ALIGN; + if (x == 1) return; + if (x < 1 || (x & (x - 1))) throw ERR_BAD_ALIGN; + if (isAutoGrow() && x > (int)inner::ALIGN_PAGE_SIZE) fprintf(stderr, "warning:autoGrow mode does not support %d align\n", x); while (size_t(getCurr()) % x) { nop(); } @@ -1527,6 +1864,26 @@ public: #endif }; +namespace util { +static const Mmx mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7); +static const Xmm xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7); +static const Ymm ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7); +static const Reg32 eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI); +static const Reg16 ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI); +static const Reg8 al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH); +static const AddressFrame ptr(0), byte(8), word(16), dword(32), qword(64); +static const Fpu st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7); +#ifdef XBYAK64 +static const Reg64 rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15); +static const Reg32 r8d(Operand::R8D), r9d(Operand::R9D), r10d(Operand::R10D), r11d(Operand::R11D), r12d(Operand::R12D), r13d(Operand::R13D), r14d(Operand::R14D), r15d(Operand::R15D); +static const Reg16 r8w(Operand::R8W), r9w(Operand::R9W), r10w(Operand::R10W), r11w(Operand::R11W), r12w(Operand::R12W), r13w(Operand::R13W), r14w(Operand::R14W), r15w(Operand::R15W); +static const Reg8 r8b(Operand::R8B), r9b(Operand::R9B), r10b(Operand::R10B), r11b(Operand::R11B), r12b(Operand::R12B), r13b(Operand::R13B), r14b(Operand::R14B), r15b(Operand::R15B), spl(Operand::SPL, 1), bpl(Operand::BPL, 1), sil(Operand::SIL, 1), dil(Operand::DIL, 1); +static const Xmm xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15); +static const Ymm ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15); +static const RegRip rip; +#endif +} // util + #ifdef _MSC_VER #pragma warning(pop) #endif diff --git a/plugins/GSdx/xbyak/xbyak_mnemonic.h b/plugins/GSdx/xbyak/xbyak_mnemonic.h index 459ce6e369..bcd81b7629 100644 --- a/plugins/GSdx/xbyak/xbyak_mnemonic.h +++ b/plugins/GSdx/xbyak/xbyak_mnemonic.h @@ -1,4 +1,4 @@ -const char *getVersionString() const { return "2.991"; } +const char *getVersionString() const { return "4.00"; } void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); } void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); } void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); } @@ -272,8 +272,16 @@ void setnle(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 15); } void cmovg(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 15); } void jg(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); } void setg(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, B10010000 | 15); } +#ifdef XBYAK32 +void jcxz(const char *label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } +void jecxz(const char *label) { opJmp(label, T_SHORT, 0xe3, 0, 0); } +#else +void jecxz(const char *label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } +void jrcxz(const char *label) { opJmp(label, T_SHORT, 0xe3, 0, 0); } +#endif #ifdef XBYAK64 void cdqe() { db(0x48); db(0x98); } +void cqo() { db(0x48); db(0x99); } #else void aaa() { db(0x37); } void aad() { db(0xD5); db(0x0A); } @@ -315,7 +323,9 @@ void rdmsr() { db(0x0F); db(0x32); } void rdpmc() { db(0x0F); db(0x33); } void rdtsc() { db(0x0F); db(0x31); } void rdtscp() { db(0x0F); db(0x01); db(0xF9); } +void ud2() { db(0x0F); db(0x0B); } void wait() { db(0x9B); } +void fwait() { db(0x9B); } void wbinvd() { db(0x0F); db(0x09); } void wrmsr() { db(0x0F); db(0x30); } void xlatb() { db(0xD7); } @@ -336,6 +346,8 @@ void fdecstp() { db(0xD9); db(0xF6); } void fdivp() { db(0xDE); db(0xF9); } void fdivrp() { db(0xDE); db(0xF1); } void fincstp() { db(0xD9); db(0xF7); } +void finit() { db(0x9B); db(0xDB); db(0xE3); } +void fninit() { db(0xDB); db(0xE3); } void fld1() { db(0xD9); db(0xE8); } void fldl2t() { db(0xD9); db(0xE9); } void fldl2e() { db(0xD9); db(0xEA); } @@ -369,18 +381,30 @@ void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); } void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); } void add(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x00); } void add(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x00, 0); } +void and_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x20); } +void and_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x20, 4); } +#ifndef XBYAK_NO_OP_NAMES void and(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x20); } void and(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x20, 4); } +#endif void cmp(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x38); } void cmp(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x38, 7); } +void or_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); } +void or_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x08, 1); } +#ifndef XBYAK_NO_OP_NAMES void or(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); } void or(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x08, 1); } +#endif void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); } void sbb(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x18, 3); } void sub(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x28); } void sub(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x28, 5); } +void xor_(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x30); } +void xor_(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x30, 6); } +#ifndef XBYAK_NO_OP_NAMES void xor(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x30); } void xor(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x30, 6); } +#endif void dec(const Operand& op) { opIncDec(op, 0x48, 1); } void inc(const Operand& op) { opIncDec(op, 0x40, 0); } void div(const Operand& op) { opR_ModM(op, 0, 6, 0xF6); } @@ -388,7 +412,10 @@ void idiv(const Operand& op) { opR_ModM(op, 0, 7, 0xF6); } void imul(const Operand& op) { opR_ModM(op, 0, 5, 0xF6); } void mul(const Operand& op) { opR_ModM(op, 0, 4, 0xF6); } void neg(const Operand& op) { opR_ModM(op, 0, 3, 0xF6); } +void not_(const Operand& op) { opR_ModM(op, 0, 2, 0xF6); } +#ifndef XBYAK_NO_OP_NAMES void not(const Operand& op) { opR_ModM(op, 0, 2, 0xF6); } +#endif void rcl(const Operand& op, int imm) { opShift(op, imm, 2); } void rcl(const Operand& op, const Reg8& cl) { opShift(op, cl, 2); } void rcr(const Operand& op, int imm) { opShift(op, imm, 3); } @@ -411,6 +438,9 @@ void shrd(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0 void shrd(const Operand& op, const Reg& reg, const Reg8& cl) { opShxd(op, reg, 0, 0xAC, &cl); } void bsf(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBC); } void bsr(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBD); } +void popcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xB8); } +void tzcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBC); } +void lzcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBD); } void pshufb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x00, 0x66, NONE, 0x38); } void phaddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x01, 0x66, NONE, 0x38); } void phaddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x02, 0x66, NONE, 0x38); } @@ -478,9 +508,15 @@ void pcmpistrm(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x62 void pcmpistri(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x63, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } void pclmulqdq(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x44, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } void aeskeygenassist(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0xDF, 0x66, isXMM_XMMorMEM, static_cast(imm), 0x3A); } +void pclmullqlqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x00); } +void pclmulhqlqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x01); } +void pclmullqhdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x10); } +void pclmulhqhdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x11); } void ldmxcsr(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0xAE); } void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); } void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); } +void fldcw(const Address& addr) { opModM(addr, Reg32(5), 0xD9, 0x100); } +void fstcw(const Address& addr) { db(0x9B); opModM(addr, Reg32(7), 0xD9, NONE); } void movntpd(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0x2B); } void movntdq(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0xE7); } void movsx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xBE); } @@ -509,29 +545,53 @@ void fisub(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 4, 0); } void fsubr(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 5, 0); } void fisubr(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 5, 0); } void fadd(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C0, 0xDCC0); } +void fadd(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8C0, 0xDCC0); } void faddp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC0); } +void faddp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC0); } void fcmovb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC0, 0x00C0); } +void fcmovb(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAC0, 0x00C0); } void fcmove(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC8, 0x00C8); } +void fcmove(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAC8, 0x00C8); } void fcmovbe(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAD0, 0x00D0); } +void fcmovbe(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAD0, 0x00D0); } void fcmovu(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAD8, 0x00D8); } +void fcmovu(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAD8, 0x00D8); } void fcmovnb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBC0, 0x00C0); } +void fcmovnb(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBC0, 0x00C0); } void fcmovne(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBC8, 0x00C8); } +void fcmovne(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBC8, 0x00C8); } void fcmovnbe(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBD0, 0x00D0); } +void fcmovnbe(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBD0, 0x00D0); } void fcmovnu(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBD8, 0x00D8); } +void fcmovnu(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBD8, 0x00D8); } void fcomi(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBF0, 0x00F0); } +void fcomi(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBF0, 0x00F0); } void fcomip(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDFF0, 0x00F0); } +void fcomip(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDFF0, 0x00F0); } void fucomi(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBE8, 0x00E8); } +void fucomi(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBE8, 0x00E8); } void fucomip(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDFE8, 0x00E8); } +void fucomip(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDFE8, 0x00E8); } void fdiv(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8F0, 0xDCF8); } +void fdiv(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8F0, 0xDCF8); } void fdivp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEF8); } +void fdivp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEF8); } void fdivr(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8F8, 0xDCF0); } +void fdivr(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8F8, 0xDCF0); } void fdivrp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEF0); } +void fdivrp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEF0); } void fmul(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C8, 0xDCC8); } +void fmul(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8C8, 0xDCC8); } void fmulp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC8); } +void fmulp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC8); } void fsub(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E0, 0xDCE8); } +void fsub(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E0, 0xDCE8); } void fsubp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEE8); } +void fsubp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEE8); } void fsubr(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E8, 0xDCE0); } +void fsubr(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E8, 0xDCE0); } void fsubrp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEE0); } +void fsubrp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEE0); } void fcom(const Fpu& reg) { opFpu(reg, 0xD8, 0xD0); } void fcomp(const Fpu& reg) { opFpu(reg, 0xD8, 0xD8); } void ffree(const Fpu& reg) { opFpu(reg, 0xDD, 0xC0); } @@ -581,10 +641,12 @@ void vdppd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX void vdppd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x41, false, 0); db(imm); } void vdpps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); } void vdpps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x40, true, 0); db(imm); } -void vmpsadbw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x42, false, 0); db(imm); } -void vmpsadbw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x42, false, 0); db(imm); } -void vpblendw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0E, false, 0); db(imm); } -void vpblendw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0E, false, 0); db(imm); } +void vmpsadbw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x42, true, 0); db(imm); } +void vmpsadbw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x42, true, 0); db(imm); } +void vpblendw(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0E, true, 0); db(imm); } +void vpblendw(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0E, true, 0); db(imm); } +void vpblendd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x02, true, 0); db(imm); } +void vpblendd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x02, true, 0); db(imm); } void vroundsd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); } void vroundsd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0B, false, 0); db(imm); } void vroundss(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0A, false, 0); db(imm); } @@ -593,6 +655,11 @@ void vpclmulqdq(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { void vpclmulqdq(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x44, false, 0); db(imm); } void vpermilps(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0C, true, 0); } void vpermilpd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0D, true, 0); } +void vpsllvd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x47, true, 0); } +void vpsllvq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x47, true, 1); } +void vpsravd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x46, true, 0); } +void vpsrlvd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x45, true, 0); } +void vpsrlvq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x45, true, 1); } void vcmppd(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xC2, true, -1); db(imm); } void vcmppd(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xC2, true, -1); db(imm); } void vcmpps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F, 0xC2, true, -1); db(imm); } @@ -607,121 +674,121 @@ void vcvtss2sd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM void vcvtss2sd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x5A, false, -1); } void vinsertps(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x21, false, 0); db(imm); } void vinsertps(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x21, false, 0); db(imm); } -void vpacksswb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x63, false, -1); } -void vpacksswb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x63, false, -1); } -void vpackssdw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6B, false, -1); } -void vpackssdw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6B, false, -1); } -void vpackuswb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x67, false, -1); } -void vpackuswb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x67, false, -1); } -void vpackusdw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x2B, false, -1); } -void vpackusdw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x2B, false, -1); } -void vpaddb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFC, false, -1); } -void vpaddb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFC, false, -1); } -void vpaddw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFD, false, -1); } -void vpaddw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFD, false, -1); } -void vpaddd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFE, false, -1); } -void vpaddd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFE, false, -1); } -void vpaddq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD4, false, -1); } -void vpaddq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD4, false, -1); } -void vpaddsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEC, false, -1); } -void vpaddsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEC, false, -1); } -void vpaddsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xED, false, -1); } -void vpaddsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xED, false, -1); } -void vpaddusb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDC, false, -1); } -void vpaddusb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDC, false, -1); } -void vpaddusw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDD, false, -1); } -void vpaddusw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDD, false, -1); } -void vpalignr(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0F, false, -1); db(imm); } -void vpalignr(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0F, false, -1); db(imm); } -void vpand(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDB, false, -1); } -void vpand(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDB, false, -1); } -void vpandn(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDF, false, -1); } -void vpandn(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDF, false, -1); } -void vpavgb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE0, false, -1); } -void vpavgb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE0, false, -1); } -void vpavgw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE3, false, -1); } -void vpavgw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE3, false, -1); } -void vpcmpeqb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x74, false, -1); } -void vpcmpeqb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x74, false, -1); } -void vpcmpeqw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x75, false, -1); } -void vpcmpeqw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x75, false, -1); } -void vpcmpeqd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x76, false, -1); } -void vpcmpeqd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x76, false, -1); } -void vpcmpeqq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x29, false, -1); } -void vpcmpeqq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x29, false, -1); } -void vpcmpgtb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x64, false, -1); } -void vpcmpgtb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x64, false, -1); } -void vpcmpgtw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x65, false, -1); } -void vpcmpgtw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x65, false, -1); } -void vpcmpgtd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x66, false, -1); } -void vpcmpgtd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x66, false, -1); } -void vpcmpgtq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x37, false, -1); } -void vpcmpgtq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x37, false, -1); } -void vphaddw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x01, false, -1); } -void vphaddw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x01, false, -1); } -void vphaddd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x02, false, -1); } -void vphaddd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x02, false, -1); } -void vphaddsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x03, false, -1); } -void vphaddsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x03, false, -1); } -void vphsubw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x05, false, -1); } -void vphsubw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x05, false, -1); } -void vphsubd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x06, false, -1); } -void vphsubd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x06, false, -1); } -void vphsubsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x07, false, -1); } -void vphsubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x07, false, -1); } -void vpmaddwd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF5, false, -1); } -void vpmaddwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF5, false, -1); } -void vpmaddubsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x04, false, -1); } -void vpmaddubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x04, false, -1); } -void vpmaxsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3C, false, -1); } -void vpmaxsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3C, false, -1); } -void vpmaxsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEE, false, -1); } -void vpmaxsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEE, false, -1); } -void vpmaxsd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3D, false, -1); } -void vpmaxsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3D, false, -1); } -void vpmaxub(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDE, false, -1); } -void vpmaxub(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDE, false, -1); } -void vpmaxuw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3E, false, -1); } -void vpmaxuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3E, false, -1); } -void vpmaxud(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3F, false, -1); } -void vpmaxud(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3F, false, -1); } -void vpminsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x38, false, -1); } -void vpminsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x38, false, -1); } -void vpminsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEA, false, -1); } -void vpminsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEA, false, -1); } -void vpminsd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x39, false, -1); } -void vpminsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x39, false, -1); } -void vpminub(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDA, false, -1); } -void vpminub(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDA, false, -1); } -void vpminuw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3A, false, -1); } -void vpminuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3A, false, -1); } -void vpminud(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3B, false, -1); } -void vpminud(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3B, false, -1); } -void vpmulhuw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE4, false, -1); } -void vpmulhuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE4, false, -1); } -void vpmulhrsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0B, false, -1); } -void vpmulhrsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x0B, false, -1); } -void vpmulhw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE5, false, -1); } -void vpmulhw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE5, false, -1); } -void vpmullw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD5, false, -1); } -void vpmullw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD5, false, -1); } -void vpmulld(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x40, false, -1); } -void vpmulld(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x40, false, -1); } +void vpacksswb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x63, true, -1); } +void vpacksswb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x63, true, -1); } +void vpackssdw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6B, true, -1); } +void vpackssdw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6B, true, -1); } +void vpackuswb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x67, true, -1); } +void vpackuswb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x67, true, -1); } +void vpackusdw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x2B, true, -1); } +void vpackusdw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x2B, true, -1); } +void vpaddb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFC, true, -1); } +void vpaddb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFC, true, -1); } +void vpaddw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFD, true, -1); } +void vpaddw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFD, true, -1); } +void vpaddd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFE, true, -1); } +void vpaddd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFE, true, -1); } +void vpaddq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD4, true, -1); } +void vpaddq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD4, true, -1); } +void vpaddsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEC, true, -1); } +void vpaddsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEC, true, -1); } +void vpaddsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xED, true, -1); } +void vpaddsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xED, true, -1); } +void vpaddusb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDC, true, -1); } +void vpaddusb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDC, true, -1); } +void vpaddusw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDD, true, -1); } +void vpaddusw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDD, true, -1); } +void vpalignr(const Xmm& xm1, const Xmm& xm2, const Operand& op, uint8 imm) { opAVX_X_X_XM(xm1, xm2, op, MM_0F3A | PP_66, 0x0F, true, -1); db(imm); } +void vpalignr(const Xmm& xmm, const Operand& op, uint8 imm) { opAVX_X_X_XM(xmm, xmm, op, MM_0F3A | PP_66, 0x0F, true, -1); db(imm); } +void vpand(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDB, true, -1); } +void vpand(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDB, true, -1); } +void vpandn(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDF, true, -1); } +void vpandn(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDF, true, -1); } +void vpavgb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE0, true, -1); } +void vpavgb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE0, true, -1); } +void vpavgw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE3, true, -1); } +void vpavgw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE3, true, -1); } +void vpcmpeqb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x74, true, -1); } +void vpcmpeqb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x74, true, -1); } +void vpcmpeqw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x75, true, -1); } +void vpcmpeqw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x75, true, -1); } +void vpcmpeqd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x76, true, -1); } +void vpcmpeqd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x76, true, -1); } +void vpcmpeqq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x29, true, -1); } +void vpcmpeqq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x29, true, -1); } +void vpcmpgtb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x64, true, -1); } +void vpcmpgtb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x64, true, -1); } +void vpcmpgtw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x65, true, -1); } +void vpcmpgtw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x65, true, -1); } +void vpcmpgtd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x66, true, -1); } +void vpcmpgtd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x66, true, -1); } +void vpcmpgtq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x37, true, -1); } +void vpcmpgtq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x37, true, -1); } +void vphaddw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x01, true, -1); } +void vphaddw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x01, true, -1); } +void vphaddd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x02, true, -1); } +void vphaddd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x02, true, -1); } +void vphaddsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x03, true, -1); } +void vphaddsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x03, true, -1); } +void vphsubw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x05, true, -1); } +void vphsubw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x05, true, -1); } +void vphsubd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x06, true, -1); } +void vphsubd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x06, true, -1); } +void vphsubsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x07, true, -1); } +void vphsubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x07, true, -1); } +void vpmaddwd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF5, true, -1); } +void vpmaddwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF5, true, -1); } +void vpmaddubsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x04, true, -1); } +void vpmaddubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x04, true, -1); } +void vpmaxsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3C, true, -1); } +void vpmaxsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3C, true, -1); } +void vpmaxsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEE, true, -1); } +void vpmaxsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEE, true, -1); } +void vpmaxsd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3D, true, -1); } +void vpmaxsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3D, true, -1); } +void vpmaxub(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDE, true, -1); } +void vpmaxub(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDE, true, -1); } +void vpmaxuw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3E, true, -1); } +void vpmaxuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3E, true, -1); } +void vpmaxud(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3F, true, -1); } +void vpmaxud(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3F, true, -1); } +void vpminsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x38, true, -1); } +void vpminsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x38, true, -1); } +void vpminsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEA, true, -1); } +void vpminsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEA, true, -1); } +void vpminsd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x39, true, -1); } +void vpminsd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x39, true, -1); } +void vpminub(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xDA, true, -1); } +void vpminub(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xDA, true, -1); } +void vpminuw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3A, true, -1); } +void vpminuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3A, true, -1); } +void vpminud(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x3B, true, -1); } +void vpminud(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x3B, true, -1); } +void vpmulhuw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE4, true, -1); } +void vpmulhuw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE4, true, -1); } +void vpmulhrsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0B, true, -1); } +void vpmulhrsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x0B, true, -1); } +void vpmulhw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE5, true, -1); } +void vpmulhw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE5, true, -1); } +void vpmullw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD5, true, -1); } +void vpmullw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD5, true, -1); } +void vpmulld(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x40, true, -1); } +void vpmulld(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x40, true, -1); } void vpmuludq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF4, false, -1); } void vpmuludq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF4, false, -1); } -void vpmuldq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x28, false, -1); } -void vpmuldq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x28, false, -1); } -void vpor(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEB, false, -1); } -void vpor(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEB, false, -1); } -void vpsadbw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF6, false, -1); } -void vpsadbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF6, false, -1); } -void vpshufb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x00, false, -1); } -void vpsignb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x08, false, -1); } -void vpsignb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x08, false, -1); } -void vpsignw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x09, false, -1); } -void vpsignw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x09, false, -1); } -void vpsignd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0A, false, -1); } -void vpsignd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x0A, false, -1); } +void vpmuldq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x28, true, -1); } +void vpmuldq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x28, true, -1); } +void vpor(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEB, true, -1); } +void vpor(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEB, true, -1); } +void vpsadbw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF6, true, -1); } +void vpsadbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF6, true, -1); } +void vpshufb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x00, true, -1); } +void vpsignb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x08, true, -1); } +void vpsignb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x08, true, -1); } +void vpsignw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x09, true, -1); } +void vpsignw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x09, true, -1); } +void vpsignd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F38 | PP_66, 0x0A, true, -1); } +void vpsignd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F38 | PP_66, 0x0A, true, -1); } void vpsllw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF1, false, -1); } void vpsllw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF1, false, -1); } void vpslld(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF2, false, -1); } @@ -738,40 +805,40 @@ void vpsrld(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm void vpsrld(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD2, false, -1); } void vpsrlq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD3, false, -1); } void vpsrlq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD3, false, -1); } -void vpsubb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF8, false, -1); } -void vpsubb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF8, false, -1); } -void vpsubw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF9, false, -1); } -void vpsubw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF9, false, -1); } -void vpsubd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFA, false, -1); } -void vpsubd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFA, false, -1); } -void vpsubq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFB, false, -1); } -void vpsubq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFB, false, -1); } -void vpsubsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE8, false, -1); } -void vpsubsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE8, false, -1); } -void vpsubsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE9, false, -1); } -void vpsubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE9, false, -1); } -void vpsubusb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD8, false, -1); } -void vpsubusb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD8, false, -1); } -void vpsubusw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD9, false, -1); } -void vpsubusw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD9, false, -1); } -void vpunpckhbw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x68, false, -1); } -void vpunpckhbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x68, false, -1); } -void vpunpckhwd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x69, false, -1); } -void vpunpckhwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x69, false, -1); } -void vpunpckhdq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6A, false, -1); } -void vpunpckhdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6A, false, -1); } -void vpunpckhqdq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6D, false, -1); } -void vpunpckhqdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6D, false, -1); } -void vpunpcklbw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x60, false, -1); } -void vpunpcklbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x60, false, -1); } -void vpunpcklwd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x61, false, -1); } -void vpunpcklwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x61, false, -1); } -void vpunpckldq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x62, false, -1); } -void vpunpckldq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x62, false, -1); } -void vpunpcklqdq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6C, false, -1); } -void vpunpcklqdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6C, false, -1); } -void vpxor(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEF, false, -1); } -void vpxor(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEF, false, -1); } +void vpsubb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF8, true, -1); } +void vpsubb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF8, true, -1); } +void vpsubw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xF9, true, -1); } +void vpsubw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xF9, true, -1); } +void vpsubd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFA, true, -1); } +void vpsubd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFA, true, -1); } +void vpsubq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xFB, true, -1); } +void vpsubq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xFB, true, -1); } +void vpsubsb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE8, true, -1); } +void vpsubsb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE8, true, -1); } +void vpsubsw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xE9, true, -1); } +void vpsubsw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xE9, true, -1); } +void vpsubusb(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD8, true, -1); } +void vpsubusb(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD8, true, -1); } +void vpsubusw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xD9, true, -1); } +void vpsubusw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xD9, true, -1); } +void vpunpckhbw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x68, true, -1); } +void vpunpckhbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x68, true, -1); } +void vpunpckhwd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x69, true, -1); } +void vpunpckhwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x69, true, -1); } +void vpunpckhdq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6A, true, -1); } +void vpunpckhdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6A, true, -1); } +void vpunpckhqdq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6D, true, -1); } +void vpunpckhqdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6D, true, -1); } +void vpunpcklbw(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x60, true, -1); } +void vpunpcklbw(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x60, true, -1); } +void vpunpcklwd(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x61, true, -1); } +void vpunpcklwd(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x61, true, -1); } +void vpunpckldq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x62, true, -1); } +void vpunpckldq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x62, true, -1); } +void vpunpcklqdq(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0x6C, true, -1); } +void vpunpcklqdq(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0x6C, true, -1); } +void vpxor(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_66, 0xEF, true, -1); } +void vpxor(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_66, 0xEF, true, -1); } void vrcpss(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F3, 0x53, false, -1); } void vrcpss(const Xmm& xmm, const Operand& op) { opAVX_X_X_XM(xmm, xmm, op, MM_0F | PP_F3, 0x53, false, -1); } void vrsqrtss(const Xmm& xm1, const Xmm& xm2, const Operand& op) { opAVX_X_X_XM(xm1, xm2, op, MM_0F | PP_F3, 0x52, false, -1); } @@ -817,25 +884,25 @@ void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x12, true, -1); } void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x10, true, -1); } void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x10, true, -1); } -void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1C, false, -1); } -void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1D, false, -1); } -void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1E, false, -1); } +void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1C, true, -1); } +void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1D, true, -1); } +void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x1E, true, -1); } void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x41, false, -1); } -void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x20, false, -1); } -void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x21, false, -1); } -void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x22, false, -1); } -void vpmovsxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x23, false, -1); } -void vpmovsxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x24, false, -1); } -void vpmovsxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x25, false, -1); } -void vpmovzxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x30, false, -1); } -void vpmovzxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x31, false, -1); } -void vpmovzxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x32, false, -1); } -void vpmovzxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x33, false, -1); } -void vpmovzxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x34, false, -1); } -void vpmovzxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x35, false, -1); } -void vpshufd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x70, false, -1, imm); } -void vpshufhw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x70, false, -1, imm); } -void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F2, 0x70, false, -1, imm); } +void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x20, true, -1); } +void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x21, true, -1); } +void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x22, true, -1); } +void vpmovsxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x23, true, -1); } +void vpmovsxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x24, true, -1); } +void vpmovsxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x25, true, -1); } +void vpmovzxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x30, true, -1); } +void vpmovzxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x31, true, -1); } +void vpmovzxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x32, true, -1); } +void vpmovzxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x33, true, -1); } +void vpmovzxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x34, true, -1); } +void vpmovzxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x35, true, -1); } +void vpshufd(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_66, 0x70, true, -1, imm); } +void vpshufhw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F3, 0x70, true, -1, imm); } +void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm, op, MM_0F | PP_F2, 0x70, true, -1, imm); } void vptest(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F38 | PP_66, 0x17, false, -1); } void vrcpps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x53, true, -1); } void vrsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, MM_0F, 0x52, true, -1); } @@ -863,6 +930,302 @@ void vmaskmovps(const Xmm& xm1, const Xmm& xm2, const Address& addr) { opAVX_X_X void vmaskmovps(const Address& addr, const Xmm& xm1, const Xmm& xm2) { opAVX_X_X_XM(xm2, xm1, addr, MM_0F38 | PP_66, 0x2E, true, 0); } void vmaskmovpd(const Xmm& xm1, const Xmm& xm2, const Address& addr) { opAVX_X_X_XM(xm1, xm2, addr, MM_0F38 | PP_66, 0x2D, true, 0); } void vmaskmovpd(const Address& addr, const Xmm& xm1, const Xmm& xm2) { opAVX_X_X_XM(xm2, xm1, addr, MM_0F38 | PP_66, 0x2F, true, 0); } +void vpmaskmovd(const Xmm& xm1, const Xmm& xm2, const Address& addr) { opAVX_X_X_XM(xm1, xm2, addr, MM_0F38 | PP_66, 0x8C, true, 0); } +void vpmaskmovd(const Address& addr, const Xmm& xm1, const Xmm& xm2) { opAVX_X_X_XM(xm2, xm1, addr, MM_0F38 | PP_66, 0x8E, true, 0); } +void vpmaskmovq(const Xmm& xm1, const Xmm& xm2, const Address& addr) { opAVX_X_X_XM(xm1, xm2, addr, MM_0F38 | PP_66, 0x8C, true, 1); } +void vpmaskmovq(const Address& addr, const Xmm& xm1, const Xmm& xm2) { opAVX_X_X_XM(xm2, xm1, addr, MM_0F38 | PP_66, 0x8E, true, 1); } +void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, MM_0F38 | PP_66, 0x36, true, 0); } +void vpermps(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, MM_0F38 | PP_66, 0x16, true, 0); } +void vpermq(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, MM_0F3A | PP_66, 0x00, true, 1, imm); } +void vpermpd(const Ymm& y, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(y, op, MM_0F3A | PP_66, 0x01, true, 1, imm); } +void cmpeqpd(const Xmm& x, const Operand& op) { cmppd(x, op, 0); } +void vcmpeqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 0); } +void vcmpeqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 0); } +void cmpltpd(const Xmm& x, const Operand& op) { cmppd(x, op, 1); } +void vcmpltpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 1); } +void vcmpltpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 1); } +void cmplepd(const Xmm& x, const Operand& op) { cmppd(x, op, 2); } +void vcmplepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 2); } +void vcmplepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 2); } +void cmpunordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 3); } +void vcmpunordpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 3); } +void vcmpunordpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 3); } +void cmpneqpd(const Xmm& x, const Operand& op) { cmppd(x, op, 4); } +void vcmpneqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 4); } +void vcmpneqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 4); } +void cmpnltpd(const Xmm& x, const Operand& op) { cmppd(x, op, 5); } +void vcmpnltpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 5); } +void vcmpnltpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 5); } +void cmpnlepd(const Xmm& x, const Operand& op) { cmppd(x, op, 6); } +void vcmpnlepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 6); } +void vcmpnlepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 6); } +void cmpordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 7); } +void vcmpordpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 7); } +void vcmpordpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 7); } +void vcmpeq_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 8); } +void vcmpeq_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 8); } +void vcmpngepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 9); } +void vcmpngepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 9); } +void vcmpngtpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 10); } +void vcmpngtpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 10); } +void vcmpfalsepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 11); } +void vcmpfalsepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 11); } +void vcmpneq_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 12); } +void vcmpneq_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 12); } +void vcmpgepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 13); } +void vcmpgepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 13); } +void vcmpgtpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 14); } +void vcmpgtpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 14); } +void vcmptruepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 15); } +void vcmptruepd(const Xmm& x, const Operand& op) { vcmppd(x, op, 15); } +void vcmpeq_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 16); } +void vcmpeq_ospd(const Xmm& x, const Operand& op) { vcmppd(x, op, 16); } +void vcmplt_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 17); } +void vcmplt_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 17); } +void vcmple_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 18); } +void vcmple_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 18); } +void vcmpunord_spd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 19); } +void vcmpunord_spd(const Xmm& x, const Operand& op) { vcmppd(x, op, 19); } +void vcmpneq_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 20); } +void vcmpneq_uspd(const Xmm& x, const Operand& op) { vcmppd(x, op, 20); } +void vcmpnlt_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 21); } +void vcmpnlt_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 21); } +void vcmpnle_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 22); } +void vcmpnle_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 22); } +void vcmpord_spd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 23); } +void vcmpord_spd(const Xmm& x, const Operand& op) { vcmppd(x, op, 23); } +void vcmpeq_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 24); } +void vcmpeq_uspd(const Xmm& x, const Operand& op) { vcmppd(x, op, 24); } +void vcmpnge_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 25); } +void vcmpnge_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 25); } +void vcmpngt_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 26); } +void vcmpngt_uqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 26); } +void vcmpfalse_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 27); } +void vcmpfalse_ospd(const Xmm& x, const Operand& op) { vcmppd(x, op, 27); } +void vcmpneq_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 28); } +void vcmpneq_ospd(const Xmm& x, const Operand& op) { vcmppd(x, op, 28); } +void vcmpge_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 29); } +void vcmpge_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 29); } +void vcmpgt_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 30); } +void vcmpgt_oqpd(const Xmm& x, const Operand& op) { vcmppd(x, op, 30); } +void vcmptrue_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 31); } +void vcmptrue_uspd(const Xmm& x, const Operand& op) { vcmppd(x, op, 31); } +void cmpeqps(const Xmm& x, const Operand& op) { cmpps(x, op, 0); } +void vcmpeqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 0); } +void vcmpeqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 0); } +void cmpltps(const Xmm& x, const Operand& op) { cmpps(x, op, 1); } +void vcmpltps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 1); } +void vcmpltps(const Xmm& x, const Operand& op) { vcmpps(x, op, 1); } +void cmpleps(const Xmm& x, const Operand& op) { cmpps(x, op, 2); } +void vcmpleps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 2); } +void vcmpleps(const Xmm& x, const Operand& op) { vcmpps(x, op, 2); } +void cmpunordps(const Xmm& x, const Operand& op) { cmpps(x, op, 3); } +void vcmpunordps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 3); } +void vcmpunordps(const Xmm& x, const Operand& op) { vcmpps(x, op, 3); } +void cmpneqps(const Xmm& x, const Operand& op) { cmpps(x, op, 4); } +void vcmpneqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 4); } +void vcmpneqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 4); } +void cmpnltps(const Xmm& x, const Operand& op) { cmpps(x, op, 5); } +void vcmpnltps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 5); } +void vcmpnltps(const Xmm& x, const Operand& op) { vcmpps(x, op, 5); } +void cmpnleps(const Xmm& x, const Operand& op) { cmpps(x, op, 6); } +void vcmpnleps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 6); } +void vcmpnleps(const Xmm& x, const Operand& op) { vcmpps(x, op, 6); } +void cmpordps(const Xmm& x, const Operand& op) { cmpps(x, op, 7); } +void vcmpordps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 7); } +void vcmpordps(const Xmm& x, const Operand& op) { vcmpps(x, op, 7); } +void vcmpeq_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 8); } +void vcmpeq_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 8); } +void vcmpngeps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 9); } +void vcmpngeps(const Xmm& x, const Operand& op) { vcmpps(x, op, 9); } +void vcmpngtps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 10); } +void vcmpngtps(const Xmm& x, const Operand& op) { vcmpps(x, op, 10); } +void vcmpfalseps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 11); } +void vcmpfalseps(const Xmm& x, const Operand& op) { vcmpps(x, op, 11); } +void vcmpneq_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 12); } +void vcmpneq_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 12); } +void vcmpgeps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 13); } +void vcmpgeps(const Xmm& x, const Operand& op) { vcmpps(x, op, 13); } +void vcmpgtps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 14); } +void vcmpgtps(const Xmm& x, const Operand& op) { vcmpps(x, op, 14); } +void vcmptrueps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 15); } +void vcmptrueps(const Xmm& x, const Operand& op) { vcmpps(x, op, 15); } +void vcmpeq_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 16); } +void vcmpeq_osps(const Xmm& x, const Operand& op) { vcmpps(x, op, 16); } +void vcmplt_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 17); } +void vcmplt_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 17); } +void vcmple_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 18); } +void vcmple_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 18); } +void vcmpunord_sps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 19); } +void vcmpunord_sps(const Xmm& x, const Operand& op) { vcmpps(x, op, 19); } +void vcmpneq_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 20); } +void vcmpneq_usps(const Xmm& x, const Operand& op) { vcmpps(x, op, 20); } +void vcmpnlt_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 21); } +void vcmpnlt_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 21); } +void vcmpnle_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 22); } +void vcmpnle_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 22); } +void vcmpord_sps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 23); } +void vcmpord_sps(const Xmm& x, const Operand& op) { vcmpps(x, op, 23); } +void vcmpeq_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 24); } +void vcmpeq_usps(const Xmm& x, const Operand& op) { vcmpps(x, op, 24); } +void vcmpnge_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 25); } +void vcmpnge_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 25); } +void vcmpngt_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 26); } +void vcmpngt_uqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 26); } +void vcmpfalse_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 27); } +void vcmpfalse_osps(const Xmm& x, const Operand& op) { vcmpps(x, op, 27); } +void vcmpneq_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 28); } +void vcmpneq_osps(const Xmm& x, const Operand& op) { vcmpps(x, op, 28); } +void vcmpge_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 29); } +void vcmpge_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 29); } +void vcmpgt_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 30); } +void vcmpgt_oqps(const Xmm& x, const Operand& op) { vcmpps(x, op, 30); } +void vcmptrue_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 31); } +void vcmptrue_usps(const Xmm& x, const Operand& op) { vcmpps(x, op, 31); } +void cmpeqsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 0); } +void vcmpeqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 0); } +void vcmpeqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 0); } +void cmpltsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 1); } +void vcmpltsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 1); } +void vcmpltsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 1); } +void cmplesd(const Xmm& x, const Operand& op) { cmpsd(x, op, 2); } +void vcmplesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 2); } +void vcmplesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 2); } +void cmpunordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 3); } +void vcmpunordsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 3); } +void vcmpunordsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 3); } +void cmpneqsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 4); } +void vcmpneqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 4); } +void vcmpneqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 4); } +void cmpnltsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 5); } +void vcmpnltsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 5); } +void vcmpnltsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 5); } +void cmpnlesd(const Xmm& x, const Operand& op) { cmpsd(x, op, 6); } +void vcmpnlesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 6); } +void vcmpnlesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 6); } +void cmpordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 7); } +void vcmpordsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 7); } +void vcmpordsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 7); } +void vcmpeq_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 8); } +void vcmpeq_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 8); } +void vcmpngesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 9); } +void vcmpngesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 9); } +void vcmpngtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 10); } +void vcmpngtsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 10); } +void vcmpfalsesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 11); } +void vcmpfalsesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 11); } +void vcmpneq_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 12); } +void vcmpneq_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 12); } +void vcmpgesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 13); } +void vcmpgesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 13); } +void vcmpgtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 14); } +void vcmpgtsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 14); } +void vcmptruesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 15); } +void vcmptruesd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 15); } +void vcmpeq_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 16); } +void vcmpeq_ossd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 16); } +void vcmplt_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 17); } +void vcmplt_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 17); } +void vcmple_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 18); } +void vcmple_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 18); } +void vcmpunord_ssd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 19); } +void vcmpunord_ssd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 19); } +void vcmpneq_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 20); } +void vcmpneq_ussd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 20); } +void vcmpnlt_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 21); } +void vcmpnlt_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 21); } +void vcmpnle_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 22); } +void vcmpnle_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 22); } +void vcmpord_ssd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 23); } +void vcmpord_ssd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 23); } +void vcmpeq_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 24); } +void vcmpeq_ussd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 24); } +void vcmpnge_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 25); } +void vcmpnge_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 25); } +void vcmpngt_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 26); } +void vcmpngt_uqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 26); } +void vcmpfalse_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 27); } +void vcmpfalse_ossd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 27); } +void vcmpneq_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 28); } +void vcmpneq_ossd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 28); } +void vcmpge_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 29); } +void vcmpge_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 29); } +void vcmpgt_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 30); } +void vcmpgt_oqsd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 30); } +void vcmptrue_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 31); } +void vcmptrue_ussd(const Xmm& x, const Operand& op) { vcmpsd(x, op, 31); } +void cmpeqss(const Xmm& x, const Operand& op) { cmpss(x, op, 0); } +void vcmpeqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 0); } +void vcmpeqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 0); } +void cmpltss(const Xmm& x, const Operand& op) { cmpss(x, op, 1); } +void vcmpltss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 1); } +void vcmpltss(const Xmm& x, const Operand& op) { vcmpss(x, op, 1); } +void cmpless(const Xmm& x, const Operand& op) { cmpss(x, op, 2); } +void vcmpless(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 2); } +void vcmpless(const Xmm& x, const Operand& op) { vcmpss(x, op, 2); } +void cmpunordss(const Xmm& x, const Operand& op) { cmpss(x, op, 3); } +void vcmpunordss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 3); } +void vcmpunordss(const Xmm& x, const Operand& op) { vcmpss(x, op, 3); } +void cmpneqss(const Xmm& x, const Operand& op) { cmpss(x, op, 4); } +void vcmpneqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 4); } +void vcmpneqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 4); } +void cmpnltss(const Xmm& x, const Operand& op) { cmpss(x, op, 5); } +void vcmpnltss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 5); } +void vcmpnltss(const Xmm& x, const Operand& op) { vcmpss(x, op, 5); } +void cmpnless(const Xmm& x, const Operand& op) { cmpss(x, op, 6); } +void vcmpnless(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 6); } +void vcmpnless(const Xmm& x, const Operand& op) { vcmpss(x, op, 6); } +void cmpordss(const Xmm& x, const Operand& op) { cmpss(x, op, 7); } +void vcmpordss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 7); } +void vcmpordss(const Xmm& x, const Operand& op) { vcmpss(x, op, 7); } +void vcmpeq_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 8); } +void vcmpeq_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 8); } +void vcmpngess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 9); } +void vcmpngess(const Xmm& x, const Operand& op) { vcmpss(x, op, 9); } +void vcmpngtss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 10); } +void vcmpngtss(const Xmm& x, const Operand& op) { vcmpss(x, op, 10); } +void vcmpfalsess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 11); } +void vcmpfalsess(const Xmm& x, const Operand& op) { vcmpss(x, op, 11); } +void vcmpneq_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 12); } +void vcmpneq_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 12); } +void vcmpgess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 13); } +void vcmpgess(const Xmm& x, const Operand& op) { vcmpss(x, op, 13); } +void vcmpgtss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 14); } +void vcmpgtss(const Xmm& x, const Operand& op) { vcmpss(x, op, 14); } +void vcmptruess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 15); } +void vcmptruess(const Xmm& x, const Operand& op) { vcmpss(x, op, 15); } +void vcmpeq_osss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 16); } +void vcmpeq_osss(const Xmm& x, const Operand& op) { vcmpss(x, op, 16); } +void vcmplt_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 17); } +void vcmplt_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 17); } +void vcmple_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 18); } +void vcmple_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 18); } +void vcmpunord_sss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 19); } +void vcmpunord_sss(const Xmm& x, const Operand& op) { vcmpss(x, op, 19); } +void vcmpneq_usss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 20); } +void vcmpneq_usss(const Xmm& x, const Operand& op) { vcmpss(x, op, 20); } +void vcmpnlt_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 21); } +void vcmpnlt_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 21); } +void vcmpnle_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 22); } +void vcmpnle_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 22); } +void vcmpord_sss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 23); } +void vcmpord_sss(const Xmm& x, const Operand& op) { vcmpss(x, op, 23); } +void vcmpeq_usss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 24); } +void vcmpeq_usss(const Xmm& x, const Operand& op) { vcmpss(x, op, 24); } +void vcmpnge_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 25); } +void vcmpnge_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 25); } +void vcmpngt_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 26); } +void vcmpngt_uqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 26); } +void vcmpfalse_osss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 27); } +void vcmpfalse_osss(const Xmm& x, const Operand& op) { vcmpss(x, op, 27); } +void vcmpneq_osss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 28); } +void vcmpneq_osss(const Xmm& x, const Operand& op) { vcmpss(x, op, 28); } +void vcmpge_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 29); } +void vcmpge_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 29); } +void vcmpgt_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 30); } +void vcmpgt_oqss(const Xmm& x, const Operand& op) { vcmpss(x, op, 30); } +void vcmptrue_usss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 31); } +void vcmptrue_usss(const Xmm& x, const Operand& op) { vcmpss(x, op, 31); } void vmovhpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, op2, MM_0F | PP_66, 0x16, false); } void vmovhpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x17, false); } void vmovhps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, op2, MM_0F, 0x16, false); } @@ -933,46 +1296,51 @@ void vfnmsub213ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand() void vfnmsub231ss(const Xmm& xmm, const Xmm& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, MM_0F38 | PP_66, 0xBF, false, 0); } void vaesimc(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, MM_0F38 | PP_66, 0xDB, false, 0); } void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, MM_0F38 | PP_66, 0x1A, true, 0); } -void vbroadcastsd(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, MM_0F38 | PP_66, 0x19, true, 0); } -void vbroadcastss(const Xmm& x, const Address& addr) { opAVX_X_XM_IMM(x, addr, MM_0F38 | PP_66, 0x18, true, 0); } -void vextractf128(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_XM_IMM(y, cvtReg(op, op.isXMM(), Operand::YMM), MM_0F3A | PP_66, 0x19, true, 0, imm); } -void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, cvtReg(op, op.isXMM(), Operand::YMM), MM_0F3A | PP_66, 0x18, true, 0); db(imm); } +void vbroadcasti128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, MM_0F38 | PP_66, 0x5A, true, 0); } +void vbroadcastsd(const Ymm& y, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw ERR_BAD_COMBINATION; opAVX_X_XM_IMM(y, op, MM_0F38 | PP_66, 0x19, true, 0); } +void vbroadcastss(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) throw ERR_BAD_COMBINATION; opAVX_X_XM_IMM(x, op, MM_0F38 | PP_66, 0x18, true, 0); } +void vextractf128(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_X_XMcvt(y, y.isXMM() ? xm0 : ym0, op, op.isXMM(), Operand::YMM, MM_0F3A | PP_66, 0x19, true, 0); db(imm); } +void vextracti128(const Operand& op, const Ymm& y, uint8 imm) { opAVX_X_X_XMcvt(y, y.isXMM() ? xm0 : ym0, op, op.isXMM(), Operand::YMM, MM_0F3A | PP_66, 0x39, true, 0); db(imm); } +void vextractps(const Operand& op, const Xmm& x, uint8 imm) { if (!(op.isREG(32) || op.isMEM()) || x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, x.isXMM() ? xm0 : ym0, op, op.isREG(), Operand::XMM, MM_0F3A | PP_66, 0x17, false, 0); db(imm); } +void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XMcvt(y1, y2, op, op.isXMM(), Operand::YMM, MM_0F3A | PP_66, 0x18, true, 0); db(imm); } +void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XMcvt(y1, y2, op, op.isXMM(), Operand::YMM, MM_0F3A | PP_66, 0x38, true, 0); db(imm); } void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, MM_0F3A | PP_66, 0x06, true, 0); db(imm); } +void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, MM_0F3A | PP_66, 0x46, true, 0); db(imm); } void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_F2, 0xF0, true, 0); } void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, MM_0F, 0xAE, false, -1); } void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, MM_0F, 0xAE, false, -1); } void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_66, 0xF7, false, -1); } -void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x14, false); db(imm); } +void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(i32e) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, xm0, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x14, false); db(imm); } void vpextrw(const Reg& r, const Xmm& x, uint8 imm) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xC5, false, r.isBit(64) ? 1 : 0); db(imm); } void vpextrw(const Address& addr, const Xmm& x, uint8 imm) { opAVX_X_X_XM(x, xm0, addr, MM_0F3A | PP_66, 0x15, false); db(imm); } -void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 0); db(imm); } -void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x20, false); db(imm); } -void vpinsrb(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x20, false); db(imm); } -void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F | PP_66, 0xC4, false); db(imm); } -void vpinsrw(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F | PP_66, 0xC4, false); db(imm); } -void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 0); db(imm); } -void vpinsrd(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 0); db(imm); } -void vpmovmskb(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, x, MM_0F | PP_66, 0xD7, false); } -void vpslldq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm7, x1, x2, MM_0F | PP_66, 0x73, false); db(imm); } -void vpslldq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm7, x, x, MM_0F | PP_66, 0x73, false); db(imm); } -void vpsrldq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm3, x1, x2, MM_0F | PP_66, 0x73, false); db(imm); } -void vpsrldq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm3, x, x, MM_0F | PP_66, 0x73, false); db(imm); } -void vpsllw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm6, x1, x2, MM_0F | PP_66, 0x71, false); db(imm); } -void vpsllw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm6, x, x, MM_0F | PP_66, 0x71, false); db(imm); } -void vpslld(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm6, x1, x2, MM_0F | PP_66, 0x72, false); db(imm); } -void vpslld(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm6, x, x, MM_0F | PP_66, 0x72, false); db(imm); } -void vpsllq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm6, x1, x2, MM_0F | PP_66, 0x73, false); db(imm); } -void vpsllq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm6, x, x, MM_0F | PP_66, 0x73, false); db(imm); } -void vpsraw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm4, x1, x2, MM_0F | PP_66, 0x71, false); db(imm); } -void vpsraw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm4, x, x, MM_0F | PP_66, 0x71, false); db(imm); } -void vpsrad(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm4, x1, x2, MM_0F | PP_66, 0x72, false); db(imm); } -void vpsrad(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm4, x, x, MM_0F | PP_66, 0x72, false); db(imm); } -void vpsrlw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm2, x1, x2, MM_0F | PP_66, 0x71, false); db(imm); } -void vpsrlw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm2, x, x, MM_0F | PP_66, 0x71, false); db(imm); } -void vpsrld(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm2, x1, x2, MM_0F | PP_66, 0x72, false); db(imm); } -void vpsrld(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm2, x, x, MM_0F | PP_66, 0x72, false); db(imm); } -void vpsrlq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(xm2, x1, x2, MM_0F | PP_66, 0x73, false); db(imm); } -void vpsrlq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(xm2, x, x, MM_0F | PP_66, 0x73, false); db(imm); } +void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, xm0, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x16, false, 0); db(imm); } +void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x1, x2, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x20, false); db(imm); } +void vpinsrb(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, x, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x20, false); db(imm); } +void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x1, x2, op, !op.isMEM(), Operand::XMM, MM_0F | PP_66, 0xC4, false); db(imm); } +void vpinsrw(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, x, op, !op.isMEM(), Operand::XMM, MM_0F | PP_66, 0xC4, false); db(imm); } +void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x1, x2, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x22, false, 0); db(imm); } +void vpinsrd(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(32) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, x, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x22, false, 0); db(imm); } +void vpmovmskb(const Reg32e& r, const Xmm& x) { bool isYMM= x.isYMM(); opAVX_X_X_XM(isYMM ? Ymm(r.getIdx()) : Xmm(r.getIdx()), isYMM ? ym0 : xm0, x, MM_0F | PP_66, 0xD7, true); } +void vpslldq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym7 : xm7, x1, x2, MM_0F | PP_66, 0x73, true); db(imm); } +void vpslldq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym7 : xm7, x, x, MM_0F | PP_66, 0x73, true); db(imm); } +void vpsrldq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym3 : xm3, x1, x2, MM_0F | PP_66, 0x73, true); db(imm); } +void vpsrldq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym3 : xm3, x, x, MM_0F | PP_66, 0x73, true); db(imm); } +void vpsllw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym6 : xm6, x1, x2, MM_0F | PP_66, 0x71, true); db(imm); } +void vpsllw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym6 : xm6, x, x, MM_0F | PP_66, 0x71, true); db(imm); } +void vpslld(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym6 : xm6, x1, x2, MM_0F | PP_66, 0x72, true); db(imm); } +void vpslld(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym6 : xm6, x, x, MM_0F | PP_66, 0x72, true); db(imm); } +void vpsllq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym6 : xm6, x1, x2, MM_0F | PP_66, 0x73, true); db(imm); } +void vpsllq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym6 : xm6, x, x, MM_0F | PP_66, 0x73, true); db(imm); } +void vpsraw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym4 : xm4, x1, x2, MM_0F | PP_66, 0x71, true); db(imm); } +void vpsraw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym4 : xm4, x, x, MM_0F | PP_66, 0x71, true); db(imm); } +void vpsrad(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym4 : xm4, x1, x2, MM_0F | PP_66, 0x72, true); db(imm); } +void vpsrad(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym4 : xm4, x, x, MM_0F | PP_66, 0x72, true); db(imm); } +void vpsrlw(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym2 : xm2, x1, x2, MM_0F | PP_66, 0x71, true); db(imm); } +void vpsrlw(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym2 : xm2, x, x, MM_0F | PP_66, 0x71, true); db(imm); } +void vpsrld(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym2 : xm2, x1, x2, MM_0F | PP_66, 0x72, true); db(imm); } +void vpsrld(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym2 : xm2, x, x, MM_0F | PP_66, 0x72, true); db(imm); } +void vpsrlq(const Xmm& x1, const Xmm& x2, uint8 imm) { opAVX_X_X_XM(x1.isYMM() ? ym2 : xm2, x1, x2, MM_0F | PP_66, 0x73, true); db(imm); } +void vpsrlq(const Xmm& x, uint8 imm) { opAVX_X_X_XM(x.isYMM() ? ym2 : xm2, x, x, MM_0F | PP_66, 0x73, true); db(imm); } void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x4B, true); db(x4.getIdx() << 4); } void vblendvpd(const Xmm& x1, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x1, op, MM_0F3A | PP_66, 0x4B, true); db(x4.getIdx() << 4); } void vblendvps(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, MM_0F3A | PP_66, 0x4A, true); db(x4.getIdx() << 4); } @@ -983,6 +1351,9 @@ void vmovd(const Xmm& x, const Reg32& reg) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx void vmovd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x6E, false, 0); } void vmovd(const Reg32& reg, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x7E, false, 0); } void vmovd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0x7E, false, 0); } +void vmovq(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x7E, false, -1); } +void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0xD6, false, -1); } +void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_F3, 0x7E, false, -1); } void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, op, MM_0F, 0x12, false); } void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, op, MM_0F, 0x16, false); } void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), x.isXMM() ? xm0 : ym0, x, MM_0F | PP_66, 0x50, true, 0); } @@ -990,7 +1361,7 @@ void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) throw ERR_BAD_C void vmovntdq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0xE7, true); } void vmovntpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F | PP_66, 0x2B, true); } void vmovntps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, addr, MM_0F, 0x2B, true); } -void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F38 | PP_66, 0x2A, false); } +void vmovntdqa(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, x.isXMM() ? xm0 : ymm0, addr, MM_0F38 | PP_66, 0x2A, true); } void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, op, MM_0F | PP_F2, 0x10, false); } void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x10, false); } void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F2, 0x11, false); } @@ -1001,25 +1372,41 @@ void vcvtss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()) void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2C, false, 0); } void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2D, false, 0); } void vcvttsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2C, false, 0); } -void vcvtsi2ss(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, cvtReg(op2, op2.isREG(), Operand::XMM), MM_0F | PP_F3, 0x2A, false, (op1.isMEM() || op2.isMEM()) ? -1 : (op1.isREG(32) || op2.isREG(32)) ? 0 : 1); } -void vcvtsi2sd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, op1, cvtReg(op2, op2.isREG(), Operand::XMM), MM_0F | PP_F2, 0x2A, false, (op1.isMEM() || op2.isMEM()) ? -1 : (op1.isREG(32) || op2.isREG(32)) ? 0 : 1); } -void vcvtps2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, cvtReg(op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM), MM_0F, 0x5A, true); } -void vcvtdq2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x.isXMM() ? xm0 : ym0, cvtReg(op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM), MM_0F | PP_F3, 0xE6, true); } +void vcvtsi2ss(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, op1, op2, op2.isREG(), Operand::XMM, MM_0F | PP_F3, 0x2A, false, (op1.isMEM() || op2.isMEM()) ? -1 : (op1.isREG(32) || op2.isREG(32)) ? 0 : 1); } +void vcvtsi2sd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !(op2.isREG(i32e) || op2.isMEM())) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, op1, op2, op2.isREG(), Operand::XMM, MM_0F | PP_F2, 0x2A, false, (op1.isMEM() || op2.isMEM()) ? -1 : (op1.isREG(32) || op2.isREG(32)) ? 0 : 1); } +void vcvtps2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, x.isXMM() ? xm0 : ym0, op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, MM_0F, 0x5A, true); } +void vcvtdq2pd(const Xmm& x, const Operand& op) { if (!op.isMEM() && !op.isXMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, x.isXMM() ? xm0 : ym0, op, !op.isMEM(), x.isXMM() ? Operand::XMM : Operand::YMM, MM_0F | PP_F3, 0xE6, true); } void vcvtpd2ps(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_66, 0x5A, true); } void vcvtpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_F2, 0xE6, true); } void vcvttpd2dq(const Xmm& x, const Operand& op) { if (x.isYMM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(op.isYMM() ? Ymm(x.getIdx()) : x, op.isYMM() ? ym0 : xm0, op, MM_0F | PP_66, 0xE6, true); } -void vmovq(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_F3, 0x7E, false, -1); } -void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, MM_0F | PP_66, 0xD6, false, -1); } #ifdef XBYAK64 void vmovq(const Xmm& x, const Reg64& reg) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x6E, false, 1); } void vmovq(const Reg64& reg, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(reg.getIdx()), MM_0F | PP_66, 0x7E, false, 1); } -void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, MM_0F | PP_F3, 0x7E, false, -1); } -void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, xm0, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x16, false, 1); db(imm); } -void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x1, x2, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 1); db(imm); } -void vpinsrq(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XM(x, x, cvtReg(op, !op.isMEM(), Operand::XMM), MM_0F3A | PP_66, 0x22, false, 1); db(imm); } +void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, xm0, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x16, false, 1); db(imm); } +void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x1, x2, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x22, false, 1); db(imm); } +void vpinsrq(const Xmm& x, const Operand& op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw ERR_BAD_COMBINATION; opAVX_X_X_XMcvt(x, x, op, !op.isMEM(), Operand::XMM, MM_0F3A | PP_66, 0x22, false, 1); db(imm); } void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2D, false, 1); } void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F3, 0x2C, false, 1); } void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2D, false, 1); } void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, MM_0F | PP_F2, 0x2C, false, 1); } -void movsxd(const Reg64& reg, const Operand& op) { opMovsxd(reg, op); } #endif +void andn(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38, 0xf2, true); } +void mulx(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38 | PP_F2, 0xf6, true); } +void pdep(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38 | PP_F2, 0xf5, true); } +void pext(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opGpr(r1, r2, op, MM_0F38 | PP_F3, 0xf5, true); } +void bextr(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38, 0xf7, false); } +void bzhi(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38, 0xf5, false); } +void sarx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38 | PP_F3, 0xf7, false); } +void shlx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38 | PP_66, 0xf7, false); } +void shrx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, MM_0F38 | PP_F2, 0xf7, false); } +void blsi(const Reg32e& r, const Operand& op) { opGpr(Reg32e(3, r.getBit()), op, r, MM_0F38, 0xf3, false); } +void blsmsk(const Reg32e& r, const Operand& op) { opGpr(Reg32e(2, r.getBit()), op, r, MM_0F38, 0xf3, false); } +void blsr(const Reg32e& r, const Operand& op) { opGpr(Reg32e(1, r.getBit()), op, r, MM_0F38, 0xf3, false); } +void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x92, 1, 0); } +void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x93, 1, 1); } +void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x92, 0, 1); } +void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x93, 0, 2); } +void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x90, 0, 1); } +void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x91, 0, 2); } +void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x90, 1, 0); } +void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, MM_0F38 | PP_66, 0x91, 1, 1); } diff --git a/plugins/GSdx/xbyak/xbyak_util.h b/plugins/GSdx/xbyak/xbyak_util.h index 8f757636b8..7d386caf96 100644 --- a/plugins/GSdx/xbyak/xbyak_util.h +++ b/plugins/GSdx/xbyak/xbyak_util.h @@ -3,6 +3,8 @@ /** utility class and functions for Xbyak + Xbyak::util::Clock ; rdtsc timer + Xbyak::util::Cpu ; detect CPU @note this header is UNDER CONSTRUCTION! */ #include "xbyak.h" @@ -31,15 +33,17 @@ #endif #else #ifndef __GNUC_PREREQ - #define __GNUC_PREREQ(major, minor) (((major) << 16) + (minor)) + #define __GNUC_PREREQ(major, minor) ((((__GNUC__) << 16) + (__GNUC_MINOR__)) >= (((major) << 16) + (minor))) #endif #if __GNUC_PREREQ(4, 3) && !defined(__APPLE__) #include #else #if defined(__APPLE__) && defined(XBYAK32) // avoid err : can't find a register in class `BREG' while reloading `asm' #define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn)) + #define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn)) #else #define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn)) + #define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn)) #endif #endif #endif @@ -59,13 +63,53 @@ class Cpu { { return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24); } + unsigned int mask(int n) const + { + return (1U << n) - 1; + } + void setFamily() + { + unsigned int data[4]; + getCpuid(1, data); + stepping = data[0] & mask(4); + model = (data[0] >> 4) & mask(4); + family = (data[0] >> 8) & mask(4); + // type = (data[0] >> 12) & mask(2); + extModel = (data[0] >> 16) & mask(4); + extFamily = (data[0] >> 20) & mask(8); + if (family == 0x0f) { + displayFamily = family + extFamily; + } else { + displayFamily = family; + } + if (family == 6 || family == 0x0f) { + displayModel = (extModel << 4) + model; + } else { + displayModel = model; + } + } public: + int model; + int family; + int stepping; + int extModel; + int extFamily; + int displayFamily; // family + extFamily + int displayModel; // model + extModel static inline void getCpuid(unsigned int eaxIn, unsigned int data[4]) { #ifdef _WIN32 __cpuid(reinterpret_cast(data), eaxIn); #else __cpuid(eaxIn, data[0], data[1], data[2], data[3]); +#endif + } + static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4]) + { +#ifdef _WIN32 + __cpuidex(reinterpret_cast(data), eaxIn, ecxIn); +#else + __cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]); #endif } static inline uint64 getXfeature() @@ -74,7 +118,9 @@ public: return __xgetbv(0); #else unsigned int eax, edx; - __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0)); + // xgetvb is not support on gcc 4.2 +// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0)); + __asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0)); return ((uint64)edx << 32) | eax; #endif } @@ -92,7 +138,7 @@ public: tPOPCNT = 1 << 9, tAESNI = 1 << 10, tSSE5 = 1 << 11, - tOSXSACE = 1 << 12, + tOSXSAVE = 1 << 12, tPCLMULQDQ = 1 << 13, tAVX = 1 << 14, tFMA = 1 << 15, @@ -101,6 +147,10 @@ public: tE3DN = 1 << 17, tSSE4a = 1 << 18, tRDTSCP = 1 << 19, + tAVX2 = 1 << 20, + tGPR1 = 1 << 21, // andn, bextr, blsi, blsmk, blsr, tzcnt + tGPR2 = 1 << 22, // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx + tLZCNT = 1 << 23, tINTEL = 1 << 24, tAMD = 1 << 25 @@ -125,6 +175,7 @@ public: type_ |= tINTEL; getCpuid(0x80000001, data); if (data[3] & (1U << 27)) type_ |= tRDTSCP; + if (data[2] & (1U << 5)) type_ |= tLZCNT; } getCpuid(1, data); if (data[2] & (1U << 0)) type_ |= tSSE3; @@ -134,9 +185,14 @@ public: if (data[2] & (1U << 23)) type_ |= tPOPCNT; if (data[2] & (1U << 25)) type_ |= tAESNI; if (data[2] & (1U << 1)) type_ |= tPCLMULQDQ; - if (data[2] & (1U << 27)) type_ |= tOSXSACE; + if (data[2] & (1U << 27)) type_ |= tOSXSAVE; - if (type_ & tOSXSACE) { + if (data[3] & (1U << 15)) type_ |= tCMOV; + if (data[3] & (1U << 23)) type_ |= tMMX; + if (data[3] & (1U << 25)) type_ |= tMMX2 | tSSE; + if (data[3] & (1U << 26)) type_ |= tSSE2; + + if (type_ & tOSXSAVE) { // check XFEATURE_ENABLED_MASK[2:1] = '11b' uint64 bv = getXfeature(); if ((bv & 6) == 6) { @@ -144,11 +200,17 @@ public: if (data[2] & (1U << 12)) type_ |= tFMA; } } - - if (data[3] & (1U << 15)) type_ |= tCMOV; - if (data[3] & (1U << 23)) type_ |= tMMX; - if (data[3] & (1U << 25)) type_ |= tMMX2 | tSSE; - if (data[3] & (1U << 26)) type_ |= tSSE2; + getCpuidEx(7, 0, data); + if (type_ & tAVX && data[1] & 0x20) type_ |= tAVX2; + if (data[1] & (1U << 3)) type_ |= tGPR1; + if (data[1] & (1U << 8)) type_ |= tGPR2; + setFamily(); + } + void putFamily() + { + printf("family=%d, model=%X, stepping=%d, extFamily=%d, extModel=%X\n", + family, model, stepping, extFamily, extModel); + printf("display:family=%X, model=%X\n", displayFamily, displayModel); } bool has(Type type) const { @@ -190,70 +252,251 @@ private: int count_; }; -#ifdef XBYAK32 +#ifdef XBYAK64 +const int UseRCX = 1 << 6; +const int UseRDX = 1 << 7; -namespace local { -#ifdef _WIN32 - #define XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(x) static inline __declspec(naked) void set_eip_to_ ## x() { \ - __asm { mov x, dword ptr [esp] } __asm { ret } \ +class Pack { + static const size_t maxTblNum = 10; + const Xbyak::Reg64 *tbl_[maxTblNum]; + size_t n_; +public: + Pack() : n_(0) {} + Pack(const Xbyak::Reg64 *tbl, size_t n) { init(tbl, n); } + Pack(const Pack& rhs) + : n_(rhs.n_) + { + for (size_t i = 0; i < n_; i++) tbl_[i] = rhs.tbl_[i]; } + Pack(const Xbyak::Reg64& t0) + { n_ = 1; tbl_[0] = &t0; } + Pack(const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 2; tbl_[0] = &t0; tbl_[1] = &t1; } + Pack(const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 3; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; } + Pack(const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 4; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; } + Pack(const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 5; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; } + Pack(const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 6; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; } + Pack(const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 7; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; } + Pack(const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 8; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; } + Pack(const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 9; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; } + Pack(const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 10; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; tbl_[9] = &t9; } + Pack& append(const Xbyak::Reg64& t) + { + if (n_ == 10) { + fprintf(stderr, "ERR Pack::can't append\n"); + throw ERR_BAD_PARAMETER; + } + tbl_[n_++] = &t; + return *this; + } + void init(const Xbyak::Reg64 *tbl, size_t n) + { + if (n > maxTblNum) { + fprintf(stderr, "ERR Pack::init bad n=%d\n", (int)n); + throw ERR_BAD_PARAMETER; + } + n_ = n; + for (size_t i = 0; i < n; i++) { + tbl_[i] = &tbl[i]; + } + } + const Xbyak::Reg64& operator[](size_t n) const + { + if (n >= n_) { + fprintf(stderr, "ERR Pack bad n=%d\n", (int)n); + throw ERR_BAD_PARAMETER; + } + return *tbl_[n]; + } + size_t size() const { return n_; } + /* + get tbl[pos, pos + num) + */ + Pack sub(size_t pos, size_t num = size_t(-1)) const + { + if (num == size_t(-1)) num = n_ - pos; + if (pos + num > n_) { + fprintf(stderr, "ERR Pack::sub bad pos=%d, num=%d\n", (int)pos, (int)num); + throw ERR_BAD_PARAMETER; + } + Pack pack; + pack.n_ = num; + for (size_t i = 0; i < num; i++) { + pack.tbl_[i] = tbl_[pos + i]; + } + return pack; + } +}; + +class StackFrame { +#ifdef XBYAK64_WIN + static const int noSaveNum = 6; + static const int rcxPos = 0; + static const int rdxPos = 1; #else - #define XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(x) static inline void set_eip_to_ ## x() { \ - __asm__ volatile("movl (%esp), %" #x); \ - } + static const int noSaveNum = 8; + static const int rcxPos = 3; + static const int rdxPos = 2; #endif + Xbyak::CodeGenerator *code_; + int pNum_; + int tNum_; + bool useRcx_; + bool useRdx_; + int saveNum_; + int P_; + bool makeEpilog_; + Xbyak::Reg64 pTbl_[4]; + Xbyak::Reg64 tTbl_[10]; + Pack p_; + Pack t_; + StackFrame(const StackFrame&); + void operator=(const StackFrame&); +public: + const Pack& p; + const Pack& t; + /* + make stack frame + @param sf [in] this + @param pNum [in] num of function parameter(0 <= pNum <= 4) + @param tNum [in] num of temporary register(0 <= tNum <= 10, with UseRCX, UseRDX) + @param stackSizeByte [in] local stack size + @param makeEpilog [in] automatically call close() if true -XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(eax) -XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(ecx) -XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(edx) -XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(ebx) -XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(esi) -XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(edi) -XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(ebp) - -#undef XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG -} // end of local - -/** - get eip to out register - @note out is not esp -*/ -template -void setEipTo(T *self, const Xbyak::Reg32& out) -{ -#if 0 - self->call("@f"); -self->L("@@"); - self->pop(out); + you can use + rax + gp0, ..., gp(pNum - 1) + gt0, ..., gt(tNum-1) + rcx if tNum & UseRCX + rdx if tNum & UseRDX + rsp[0..stackSizeByte - 1] + */ + StackFrame(Xbyak::CodeGenerator *code, int pNum, int tNum = 0, int stackSizeByte = 0, bool makeEpilog = true) + : code_(code) + , pNum_(pNum) + , tNum_(tNum & ~(UseRCX | UseRDX)) + , useRcx_((tNum & UseRCX) != 0) + , useRdx_((tNum & UseRDX) != 0) + , saveNum_(0) + , P_(0) + , makeEpilog_(makeEpilog) + , p(p_) + , t(t_) + { + using namespace Xbyak; + if (pNum < 0 || pNum > 4) throw ERR_BAD_PNUM; + const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0); + if (allRegNum < pNum || allRegNum > 14) throw ERR_BAD_TNUM; + const Reg64& rsp = code->rsp; + const AddressFrame& ptr = code->ptr; + saveNum_ = (std::max)(0, allRegNum - noSaveNum); + const int *tbl = getOrderTbl() + noSaveNum; + P_ = saveNum_ + (stackSizeByte + 7) / 8; + if (P_ > 0 && (P_ & 1) == 0) P_++; // here (rsp % 16) == 8, then increment P_ for 16 byte alignment + P_ *= 8; + if (P_ > 0) code->sub(rsp, P_); +#ifdef XBYAK64_WIN + for (int i = 0; i < (std::min)(saveNum_, 4); i++) { + code->mov(ptr [rsp + P_ + (i + 1) * 8], Reg64(tbl[i])); + } + for (int i = 4; i < saveNum_; i++) { + code->mov(ptr [rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i])); + } #else - int idx = out.getIdx(); - switch (idx) { - case Xbyak::Operand::EAX: - self->call((void*)local::set_eip_to_eax); - break; - case Xbyak::Operand::ECX: - self->call((void*)local::set_eip_to_ecx); - break; - case Xbyak::Operand::EDX: - self->call((void*)local::set_eip_to_edx); - break; - case Xbyak::Operand::EBX: - self->call((void*)local::set_eip_to_ebx); - break; - case Xbyak::Operand::ESI: - self->call((void*)local::set_eip_to_esi); - break; - case Xbyak::Operand::EDI: - self->call((void*)local::set_eip_to_edi); - break; - case Xbyak::Operand::EBP: - self->call((void*)local::set_eip_to_ebp); - break; - default: - assert(0); - } + for (int i = 0; i < saveNum_; i++) { + code->mov(ptr [rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i])); + } #endif -} + int pos = 0; + for (int i = 0; i < pNum; i++) { + pTbl_[i] = Xbyak::Reg64(getRegIdx(pos)); + } + for (int i = 0; i < tNum_; i++) { + tTbl_[i] = Xbyak::Reg64(getRegIdx(pos)); + } + if (useRcx_ && rcxPos < pNum) code_->mov(code_->r10, code_->rcx); + if (useRdx_ && rdxPos < pNum) code_->mov(code_->r11, code_->rdx); + p_.init(pTbl_, pNum); + t_.init(tTbl_, tNum_); + } + /* + make epilog manually + @param callRet [in] call ret() if true + */ + void close(bool callRet = true) + { + using namespace Xbyak; + const Reg64& rsp = code_->rsp; + const AddressFrame& ptr = code_->ptr; + const int *tbl = getOrderTbl() + noSaveNum; +#ifdef XBYAK64_WIN + for (int i = 0; i < (std::min)(saveNum_, 4); i++) { + code_->mov(Reg64(tbl[i]), ptr [rsp + P_ + (i + 1) * 8]); + } + for (int i = 4; i < saveNum_; i++) { + code_->mov(Reg64(tbl[i]), ptr [rsp + P_ - 8 * (saveNum_ - i)]); + } +#else + for (int i = 0; i < saveNum_; i++) { + code_->mov(Reg64(tbl[i]), ptr [rsp + P_ - 8 * (saveNum_ - i)]); + } +#endif + if (P_ > 0) code_->add(rsp, P_); + + if (callRet) code_->ret(); + } + ~StackFrame() + { + if (!makeEpilog_) return; + try { + close(); + } catch (Xbyak::Error e) { + printf("ERR:StackFrame %s\n", ConvertErrorToString(e)); + exit(1); + } catch (...) { + printf("ERR:StackFrame otherwise\n"); + exit(1); + } + } +private: + const int *getOrderTbl() const + { + using namespace Xbyak; + static const int tbl[] = { +#ifdef XBYAK64_WIN + Operand::RCX, Operand::RDX, Operand::R8, Operand::R9, Operand::R10, Operand::R11, Operand::RDI, Operand::RSI, +#else + Operand::RDI, Operand::RSI, Operand::RDX, Operand::RCX, Operand::R8, Operand::R9, Operand::R10, Operand::R11, +#endif + Operand::RBX, Operand::RBP, Operand::R12, Operand::R13, Operand::R14, Operand::R15 + }; + return &tbl[0]; + } + int getRegIdx(int& pos) const + { + assert(pos < 14); + using namespace Xbyak; + const int *tbl = getOrderTbl(); + int r = tbl[pos++]; + if (useRcx_) { + if (r == Operand::RCX) { return Operand::R10; } + if (r == Operand::R10) { r = tbl[pos++]; } + } + if (useRdx_) { + if (r == Operand::RDX) { return Operand::R11; } + if (r == Operand::R11) { return tbl[pos++]; } + } + return r; + } +}; #endif } } // end of util