GSdx software renderer speed-up, using xbyak to JIT compile a few things, more to follow.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@469 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2009-02-11 07:05:44 +00:00
parent 5249b67a56
commit 9ee9d817c4
19 changed files with 4628 additions and 251 deletions

View File

@ -871,11 +871,11 @@ GPUDrawScanline::GPUDrawScanlineMap::GPUDrawScanlineMap()
#endif
}
IDrawScanline::DrawScanlinePtr GPUDrawScanline::GPUDrawScanlineMap::GetDefaultFunction(DWORD dw)
IDrawScanline::DrawScanlinePtr GPUDrawScanline::GPUDrawScanlineMap::GetDefaultFunction(DWORD key)
{
GPUScanlineSelector sel;
sel.dw = dw;
sel.key = key;
return m_default[sel];
}
@ -899,11 +899,11 @@ GPUDrawScanline::GPUSetupPrimMap::GPUSetupPrimMap()
InitSP_SPRITE(1);
}
IDrawScanline::SetupPrimPtr GPUDrawScanline::GPUSetupPrimMap::GetDefaultFunction(DWORD dw)
IDrawScanline::SetupPrimPtr GPUDrawScanline::GPUSetupPrimMap::GetDefaultFunction(DWORD key)
{
DWORD sprite = (dw >> 0) & 1;
DWORD tme = (dw >> 1) & 1;
DWORD iip = (dw >> 2) & 1;
DWORD sprite = (key >> 0) & 1;
DWORD tme = (key >> 1) & 1;
DWORD iip = (key >> 2) & 1;
return m_default[sprite][tme][iip];
}

View File

@ -50,9 +50,9 @@ union GPUScanlineSelector
DWORD tfx:2; // 5
};
DWORD dw;
DWORD key;
operator DWORD() {return dw & 0xff;}
operator DWORD() {return key & 0xff;}
};
__declspec(align(16)) struct GPUScanlineEnvironment
@ -87,28 +87,28 @@ class GPUDrawScanline : public GSAlignedClass<16>, public IDrawScanline
//
class GPUDrawScanlineMap : public GSFunctionMap<DrawScanlinePtr>
class GPUDrawScanlineMap : public GSFunctionMap<DWORD, DrawScanlinePtr>
{
DrawScanlinePtr m_default[256];
public:
GPUDrawScanlineMap();
DrawScanlinePtr GetDefaultFunction(DWORD dw);
DrawScanlinePtr GetDefaultFunction(DWORD key);
};
GPUDrawScanlineMap m_ds;
//
class GPUSetupPrimMap : public GSFunctionMap<SetupPrimPtr>
class GPUSetupPrimMap : public GSFunctionMap<DWORD, SetupPrimPtr>
{
SetupPrimPtr m_default[2][2][2];
public:
GPUSetupPrimMap();
SetupPrimPtr GetDefaultFunction(DWORD dw);
SetupPrimPtr GetDefaultFunction(DWORD key);
};
GPUSetupPrimMap m_sp;

View File

@ -125,7 +125,7 @@ protected:
GPUScanlineParam p;
p.sel.dw = 0;
p.sel.key = 0;
p.sel.iip = env.PRIM.IIP;
p.sel.me = env.STATUS.ME;
p.sel.abe = env.PRIM.ABE;

View File

@ -567,41 +567,38 @@ REG64_(GIFReg, COLCLAMP)
REG_END
REG64_(GIFReg, DIMX)
UINT32 DM00:3;
UINT32 _PAD00:1;
UINT32 DM01:3;
UINT32 _PAD01:1;
UINT32 DM02:3;
UINT32 _PAD02:1;
UINT32 DM03:3;
UINT32 _PAD03:1;
UINT32 DM10:3;
UINT32 _PAD10:1;
UINT32 DM11:3;
UINT32 _PAD11:1;
UINT32 DM12:3;
UINT32 _PAD12:1;
UINT32 DM13:3;
UINT32 _PAD13:1;
UINT32 DM20:3;
UINT32 _PAD20:1;
UINT32 DM21:3;
UINT32 _PAD21:1;
UINT32 DM22:3;
UINT32 _PAD22:1;
UINT32 DM23:3;
UINT32 _PAD23:1;
UINT32 DM30:3;
UINT32 _PAD30:1;
UINT32 DM31:3;
UINT32 _PAD31:1;
UINT32 DM32:3;
UINT32 _PAD32:1;
UINT32 DM33:3;
UINT32 _PAD33:1;
INT32 DM00:3;
INT32 _PAD00:1;
INT32 DM01:3;
INT32 _PAD01:1;
INT32 DM02:3;
INT32 _PAD02:1;
INT32 DM03:3;
INT32 _PAD03:1;
INT32 DM10:3;
INT32 _PAD10:1;
INT32 DM11:3;
INT32 _PAD11:1;
INT32 DM12:3;
INT32 _PAD12:1;
INT32 DM13:3;
INT32 _PAD13:1;
INT32 DM20:3;
INT32 _PAD20:1;
INT32 DM21:3;
INT32 _PAD21:1;
INT32 DM22:3;
INT32 _PAD22:1;
INT32 DM23:3;
INT32 _PAD23:1;
INT32 DM30:3;
INT32 _PAD30:1;
INT32 DM31:3;
INT32 _PAD31:1;
INT32 DM32:3;
INT32 _PAD32:1;
INT32 DM33:3;
INT32 _PAD33:1;
REG_END
REG64_(GIFReg, DTHE)

View File

@ -32,6 +32,12 @@ GSDrawScanline::GSDrawScanline(GSState* state, int id)
GSDrawScanline::~GSDrawScanline()
{
POSITION pos = m_dscg.GetHeadPosition();
while(pos)
{
delete m_dscg.GetNextValue(pos);
}
}
void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
@ -60,6 +66,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
m_env.afix2 = m_env.afix.yywwlh().sll16(7);
m_env.frb = GSVector4i((int)env.FOGCOL.ai32[0] & 0x00ff00ff);
m_env.fga = GSVector4i((int)(env.FOGCOL.ai32[0] >> 8) & 0x00ff00ff);
m_env.dimx = env.dimx;
if(m_env.sel.fpsm == 1)
{
@ -163,7 +170,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
//
f->sl = m_ds.Lookup(m_env.sel);
f->sl = (DrawScanlinePtr)&GSDrawScanline::DrawScanline; // m_ds.Lookup(m_env.sel);
//
@ -186,16 +193,31 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
}
f->sp = m_sp.Lookup(sel);
//
GSDrawScanlineCodeGenerator* cg = NULL;
if(!m_dscg.Lookup(m_env.sel, cg))
{
cg = new GSDrawScanlineCodeGenerator(m_env);
m_dscg.SetAt(m_env.sel, cg);
}
m_dsf = (DrawScanlineStaticPtr)cg->getCode();
}
void GSDrawScanline::EndDraw(const GSRasterizerStats& stats)
{
m_ds.UpdateStats(stats, m_state->m_perfmon.GetFrame());
// m_ds.UpdateStats(stats, m_state->m_perfmon.GetFrame());
}
template<DWORD zbe, DWORD fge, DWORD tme, DWORD fst, DWORD iip>
void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan)
{
// TODO: clean up unused parts
// p
GSVector4 p = dscan.p;
@ -311,47 +333,28 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dsc
}
else
{
GSVector4i rgba = GSVector4i(vertices[0].c);
GSVector4i c = GSVector4i(vertices[0].c);
GSVector4i rbga = rgba.upl16(rgba.zwxy());
c = c.upl16(c.zwxy());
if(tme == 0)
{
rbga = rbga.srl16(7);
DWORD abe = m_env.sel.abe & 0x3f; // a, b, c
DWORD abea = m_env.sel.abea;
DWORD abeb = m_env.sel.abeb;
DWORD abec = m_env.sel.abec;
DWORD abed = m_env.sel.abed;
if(fge == 0 && abe != 0x3f && !(abe & 0x15) && abea != abeb) // 0x15 = 010101b => a, b, c != 1
{
GSVector4i c[4];
c[0] = rbga;
c[1] = rgba.zzzzh().zzzz();
c[2] = GSVector4i::zero();
c[3] = m_env.afix2;
GSVector4i cc = GSVector4i::lerp16<1>(c[abea], c[abeb], c[abec + 1]);
if(abed == 0)
{
cc = cc.add16(c[0]);
}
m_env.c2.rb = cc.xxxx();
m_env.c2.ga = cc.zzzz().mix16(c[1].srl16(7));
}
c = c.srl16(7);
}
m_env.c.rb = rbga.xxxx();
m_env.c.ga = rbga.zzzz();
m_env.c.rb = c.xxxx();
m_env.c.ga = c.zzzz();
}
}
void GSDrawScanline::DrawScanline(int top, int left, int right, const GSVertexSW& v)
{
// TODO: call this directly from rasterizer
m_dsf(top, left, right, v);
}
/*
GSVector4i GSDrawScanline::Wrap(const GSVector4i& t)
{
GSVector4i clamp = t.sat_i16(m_env.t.min, m_env.t.max);
@ -1133,7 +1136,7 @@ void GSDrawScanline::DrawScanlineEx(int top, int left, int right, const GSVertex
GSVector4i rb, ga;
if(tfx == TFX_NONE && fge == 0 && abea != 1 && abeb != 1 && abec != 1 && abea != abeb)
if(!iip && tfx == TFX_NONE && !fge && abea != 1 && abeb != 1 && abec != 1 && abea != abeb)
{
c[0] = m_env.c2.rb;
c[1] = m_env.c2.ga;
@ -1236,15 +1239,9 @@ void GSDrawScanline::DrawScanlineEx(int top, int left, int right, const GSVertex
}
}
}
*/
void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
{
/*
static FILE* s_fp = NULL;
if(!s_fp) s_fp = fopen("c:\\log2.txt", "w");
__int64 start = __rdtsc();
int size = (r.z - r.x) * (r.w - r.y);
*/
ASSERT(r.y >= 0);
ASSERT(r.w >= 0);
@ -1318,10 +1315,6 @@ int size = (r.z - r.x) * (r.w - r.y);
}
}
}
/*
__int64 stop = __rdtsc();
fprintf(s_fp, "%I64d => %I64d = %I64d (%d,%d - %d,%d) %d\n", start, stop, stop - start, r.x, r.y, r.z, r.w, size);
*/
}
template<class T, bool masked>
@ -1396,7 +1389,7 @@ void GSDrawScanline::FillBlock(const GSVector4i* row, int* col, const GSVector4i
}
}
}
/*
//
GSDrawScanline::GSDrawScanlineMap::GSDrawScanlineMap()
@ -2736,11 +2729,11 @@ GSDrawScanline::GSDrawScanlineMap::GSDrawScanlineMap()
#endif
}
IDrawScanline::DrawScanlinePtr GSDrawScanline::GSDrawScanlineMap::GetDefaultFunction(DWORD dw)
IDrawScanline::DrawScanlinePtr GSDrawScanline::GSDrawScanlineMap::GetDefaultFunction(DWORD key)
{
GSScanlineSelector sel;
sel.dw = dw;
sel.key = key;
return m_default[sel.fpsm][sel.zpsm][sel.ztst][sel.iip];
}
@ -2755,19 +2748,19 @@ void GSDrawScanline::GSDrawScanlineMap::PrintStats()
while(pos)
{
DWORD dw;
DWORD key;
ActivePtr* p;
m_map_active.GetNextAssoc(pos, dw, p);
m_map_active.GetNextAssoc(pos, key, p);
if(m_map.Lookup(dw))
if(m_map.Lookup(key))
{
continue;
}
GSScanlineSelector sel;
sel.dw = dw;
sel.key = key;
if(p->frames > 30 && !sel.IsSolidRect())
{
@ -2775,7 +2768,7 @@ void GSDrawScanline::GSDrawScanlineMap::PrintStats()
if(tpf >= 500)
{
_ftprintf(fp, _T("InitDS_Sel(0x%08x); // %6.2f%%\n"), sel.dw, (float)tpf / 100);
_ftprintf(fp, _T("InitDS_Sel(0x%08x); // %6.2f%%\n"), (DWORD)sel, (float)tpf / 100);
}
}
}
@ -2785,7 +2778,7 @@ void GSDrawScanline::GSDrawScanlineMap::PrintStats()
}
//
*/
GSDrawScanline::GSSetupPrimMap::GSSetupPrimMap()
{
#define InitSP_IIP(zbe, fge, tme, fst, iip) \
@ -2811,13 +2804,13 @@ GSDrawScanline::GSSetupPrimMap::GSSetupPrimMap()
InitSP_ZBE(1);
}
IDrawScanline::SetupPrimPtr GSDrawScanline::GSSetupPrimMap::GetDefaultFunction(DWORD dw)
IDrawScanline::SetupPrimPtr GSDrawScanline::GSSetupPrimMap::GetDefaultFunction(DWORD key)
{
DWORD zbe = (dw >> 0) & 1;
DWORD fge = (dw >> 1) & 1;
DWORD tme = (dw >> 2) & 1;
DWORD fst = (dw >> 3) & 1;
DWORD iip = (dw >> 4) & 1;
DWORD zbe = (key >> 0) & 1;
DWORD fge = (key >> 1) & 1;
DWORD tme = (key >> 2) & 1;
DWORD fst = (key >> 3) & 1;
DWORD iip = (key >> 4) & 1;
return m_default[zbe][fge][tme][fst][iip];
}
@ -2831,7 +2824,7 @@ const GSVector4 GSDrawScanline::m_shift[4] =
GSVector4(-2.0f, -1.0f, 0.0f, 1.0f),
GSVector4(-3.0f, -2.0f, -1.0f, 0.0f),
};
/*
const GSVector4i GSDrawScanline::m_test[8] =
{
GSVector4i::zero(),
@ -2843,3 +2836,4 @@ const GSVector4i GSDrawScanline::m_test[8] =
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff),
GSVector4i::zero(),
};
*/

View File

@ -23,150 +23,61 @@
#include "GSState.h"
#include "GSRasterizer.h"
#include "GSScanlineEnvironment.h"
#include "GSDrawScanlineCodeGenerator.h"
#include "GSAlignedClass.h"
union GSScanlineSelector
{
struct
{
DWORD fpsm:2; // 0
DWORD zpsm:2; // 2
DWORD ztst:2; // 4 (0: off, 1: write, 2: test (ge), 3: test (g))
DWORD atst:3; // 6
DWORD afail:2; // 9
DWORD iip:1; // 11
DWORD tfx:3; // 12
DWORD tcc:1; // 15
DWORD fst:1; // 16
DWORD ltf:1; // 17
DWORD tlu:1; // 18
DWORD fge:1; // 19
DWORD date:1; // 20
DWORD abea:2; // 21
DWORD abeb:2; // 23
DWORD abec:2; // 25
DWORD abed:2; // 27
DWORD pabe:1; // 29
DWORD rfb:1; // 30
DWORD sprite:1; // 31
};
struct
{
DWORD _pad1:21;
DWORD abe:8;
DWORD _pad2:3;
};
DWORD dw;
operator DWORD() {return dw;}
bool IsSolidRect()
{
return sprite
&& iip == 0
&& tfx == TFX_NONE
&& abe == 255
&& ztst <= 1
&& atst <= 1
&& date == 0
&& fge == 0;
}
};
__declspec(align(16)) struct GSScanlineEnvironment
{
GSScanlineSelector sel;
void* vm;
const void* tex;
const DWORD* clut;
DWORD tw;
GSVector4i* fbr;
GSVector4i* zbr;
int** fbc;
int** zbc;
GSVector2i* fzbr;
GSVector2i* fzbc;
GSVector4i fm, zm;
struct {GSVector4i min, max, mask;} t; // [u] x 4 [v] x 4
GSVector4i datm;
GSVector4i colclamp;
GSVector4i fba;
GSVector4i aref;
GSVector4i afix, afix2;
GSVector4i frb, fga;
struct {GSVector4 z, s, t, q; GSVector4i rb, ga, f, si, ti, _pad[3];} d[4];
struct {GSVector4 z, stq; GSVector4i c, f, st;} d4;
struct {GSVector4i rb, ga;} c;
struct {GSVector4i z, f;} p;
struct {GSVector4i rb, ga;} c2;
};
__declspec(align(16)) struct GSScanlineParam
{
GSScanlineSelector sel;
void* vm;
const void* tex;
const DWORD* clut;
DWORD tw;
GSLocalMemory::Offset* fbo;
GSLocalMemory::Offset* zbo;
GSLocalMemory::Offset4* fzbo;
DWORD fm, zm;
};
class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline
{
GSScanlineEnvironment m_env;
static const GSVector4 m_shift[4];
static const GSVector4i m_test[8];
/* static const GSVector4i m_test[8];
//
class GSDrawScanlineMap : public GSFunctionMap<DrawScanlinePtr>
class GSDrawScanlineMap : public GSFunctionMap<DWORD, DrawScanlinePtr>
{
DrawScanlinePtr m_default[4][4][4][2];
public:
GSDrawScanlineMap();
DrawScanlinePtr GetDefaultFunction(DWORD dw);
DrawScanlinePtr GetDefaultFunction(DWORD key);
void PrintStats();
};
GSDrawScanlineMap m_ds;
*/
//
class GSSetupPrimMap : public GSFunctionMap<SetupPrimPtr>
class GSSetupPrimMap : public GSFunctionMap<DWORD, SetupPrimPtr>
{
SetupPrimPtr m_default[2][2][2][2][2];
public:
GSSetupPrimMap();
SetupPrimPtr GetDefaultFunction(DWORD dw);
SetupPrimPtr GetDefaultFunction(DWORD key);
};
GSSetupPrimMap m_sp;
//
template<DWORD zbe, DWORD fge, DWORD tme, DWORD fst, DWORD iip>
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan);
//
CRBMap<UINT64, GSDrawScanlineCodeGenerator*> m_dscg;
DrawScanlineStaticPtr m_dsf;
void DrawScanline(int top, int left, int right, const GSVertexSW& v);
/*
//
__forceinline GSVector4i Wrap(const GSVector4i& t);
__forceinline void SampleTexture(DWORD ltf, DWORD tlu, const GSVector4i& u, const GSVector4i& v, GSVector4i* c);
@ -187,7 +98,7 @@ class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline
template<DWORD sel>
void DrawScanlineEx(int top, int left, int right, const GSVertexSW& v);
*/
//
void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v);
@ -213,5 +124,5 @@ public:
void BeginDraw(const GSRasterizerData* data, Functions* f);
void EndDraw(const GSRasterizerStats& stats);
void PrintStats() {m_ds.PrintStats();}
void PrintStats() {/*m_ds.PrintStats();*/}
};

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,74 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GSScanlineEnvironment.h"
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
using namespace Xbyak;
class GSDrawScanlineCodeGenerator : public CodeGenerator
{
void operator = (const GSDrawScanlineCodeGenerator&);
static const GSVector4i m_test[8];
util::Cpu m_cpu;
GSScanlineEnvironment& m_env;
void Generate();
void Init(int params);
void Step();
void TestZ(const Xmm& temp1, const Xmm& temp2);
void SampleTexture();
void AlphaTFX();
void TestAlpha();
void ColorTFX();
void Fog();
void ReadFrame();
void TestDestAlpha();
void WriteZBuf();
void AlphaBlend();
void WriteFrame(int params);
void ReadPixel(const Xmm& dst, const Reg32& addr);
void WritePixel(const Xmm& src, const Xmm& temp, const Reg32& addr, uint8 i, int psm);
void ReadTexel(const Xmm& dst, const Xmm& addr, const Reg32& base, const Xmm& temp1, const Xmm& temp2);
void ReadTexel(const Xmm& dst, const Xmm& addr, const Reg32& base, uint8 i);
void Wrap(const Xmm& uv, const Xmm& temp);
template<int shift> void modulate16(const Xmm& a, const Operand& f);
template<int shift> void lerp16(const Xmm& a, const Xmm& b, const Xmm& f);
void mix16(const Xmm& a, const Xmm& b, const Xmm& temp);
void clamp16(const Xmm& a, const Xmm& temp);
void alltrue(const Xmm& a, const Reg32& temp, LPCTSTR label);
void blend8(const Xmm& a, const Xmm& b);
void blend(const Xmm& a, const Xmm& b, const Xmm& mask);
void blend8r(const Xmm& b, const Xmm& a);
void blendr(const Xmm& b, const Xmm& a, const Xmm& mask);
public:
GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env);
};

View File

@ -71,6 +71,22 @@ public:
CTXT[0].Reset();
CTXT[1].Reset();
memset(dimx, 0, sizeof(dimx));
}
GSVector4i dimx[8];
void UpdateDIMX()
{
dimx[1] = GSVector4i(DIMX.DM00, 0, DIMX.DM01, 0, DIMX.DM02, 0, DIMX.DM03, 0);
dimx[0] = dimx[1].xxzzlh();
dimx[3] = GSVector4i(DIMX.DM10, 0, DIMX.DM11, 0, DIMX.DM12, 0, DIMX.DM13, 0),
dimx[2] = dimx[2].xxzzlh();
dimx[5] = GSVector4i(DIMX.DM20, 0, DIMX.DM21, 0, DIMX.DM22, 0, DIMX.DM23, 0),
dimx[4] = dimx[4].xxzzlh();
dimx[7] = GSVector4i(DIMX.DM30, 0, DIMX.DM31, 0, DIMX.DM32, 0, DIMX.DM33, 0),
dimx[6] = dimx[7].xxzzlh();
}
};

View File

@ -40,21 +40,21 @@ struct GSRasterizerStats
}
};
template<class T> class GSFunctionMap
template<class KEY, class VALUE> class GSFunctionMap
{
protected:
struct ActivePtr
{
UINT64 frame, frames;
__int64 ticks, pixels;
T f;
VALUE f;
};
CRBMap<DWORD, T> m_map;
CRBMap<DWORD, ActivePtr*> m_map_active;
CRBMap<KEY, VALUE> m_map;
CRBMap<KEY, ActivePtr*> m_map_active;
ActivePtr* m_active;
virtual T GetDefaultFunction(DWORD sel) = 0;
virtual VALUE GetDefaultFunction(KEY key) = 0;
public:
GSFunctionMap()
@ -74,18 +74,18 @@ public:
m_map_active.RemoveAll();
}
void SetAt(DWORD sel, T f)
void SetAt(KEY key, VALUE f)
{
m_map.SetAt(sel, f);
m_map.SetAt(key, f);
}
T Lookup(DWORD sel)
VALUE Lookup(KEY key)
{
m_active = NULL;
if(!m_map_active.Lookup(sel, m_active))
if(!m_map_active.Lookup(key, m_active))
{
CRBMap<DWORD, T>::CPair* pair = m_map.Lookup(sel);
CRBMap<KEY, VALUE>::CPair* pair = m_map.Lookup(key);
ActivePtr* p = new ActivePtr();
@ -93,9 +93,9 @@ public:
p->frame = (UINT64)-1;
p->f = pair ? pair->m_value : GetDefaultFunction(sel);
p->f = pair ? pair->m_value : GetDefaultFunction(key);
m_map_active.SetAt(sel, p);
m_map_active.SetAt(key, p);
m_active = p;
}
@ -138,10 +138,10 @@ public:
while(pos)
{
DWORD sel;
KEY key;
ActivePtr* p;
m_map_active.GetNextAssoc(pos, sel, p);
m_map_active.GetNextAssoc(pos, key, p);
if(p->frames > 0)
{
@ -150,7 +150,7 @@ public:
__int64 ppf = p->frames > 0 ? p->pixels / p->frames : 0;
printf("[%08x]%c %6.2f%% | %5.2f%% | f %4I64d | p %10I64d | tpp %4I64d | tpf %9I64d | ppf %7I64d\n",
sel, !m_map.Lookup(sel) ? '*' : ' ',
key, !m_map.Lookup(key) ? '*' : ' ',
(float)(tpf * 10000 / 50000000) / 100,
(float)(tpf * 10000 / ttpf) / 100,
p->frames, p->pixels,

View File

@ -54,6 +54,7 @@ public:
typedef void (IDrawScanline::*DrawScanlinePtr)(int top, int left, int right, const GSVertexSW& v);
typedef void (IDrawScanline::*DrawSolidRectPtr)(const GSVector4i& r, const GSVertexSW& v);
typedef void (IDrawScanline::*SetupPrimPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan);
typedef void (*DrawScanlineStaticPtr)(int top, int left, int right, const GSVertexSW& v);
struct Functions
{

View File

@ -256,7 +256,7 @@ protected:
p.zbo = m_mem.GetOffset(context->ZBUF.Block(), context->FRAME.FBW, context->ZBUF.PSM);
p.fzbo = m_mem.GetOffset4(context->FRAME, context->ZBUF);
p.sel.dw = 0;
p.sel.key = 0;
p.sel.fpsm = 3;
p.sel.zpsm = 3;
@ -291,6 +291,9 @@ protected:
bool fwrite = p.fm != 0xffffffff;
bool ftest = p.sel.atst != ATST_ALWAYS || context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
p.sel.fwrite = fwrite;
p.sel.ftest = ftest;
if(fwrite || ftest)
{
p.sel.fpsm = GSUtil::EncodePSM(context->FRAME.PSM);
@ -307,6 +310,8 @@ protected:
p.sel.fst = PRIM->FST;
p.sel.ltf = context->TEX1.IsLinear();
p.sel.tlu = GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0;
p.sel.wms = ((context->CLAMP.WMS + 1) >> 1) & 1;
p.sel.wmt = ((context->CLAMP.WMT + 1) >> 1) & 1;
if(p.sel.iip == 0 && p.sel.tfx == TFX_MODULATE && p.sel.tcc)
{
@ -466,11 +471,18 @@ protected:
{
p.sel.rfb = 1;
}
p.sel.colclamp = env.COLCLAMP.CLAMP;
p.sel.fba = context->FBA.FBA;
p.sel.dthe = env.DTHE.DTHE;
}
bool zwrite = p.zm != 0xffffffff;
bool ztest = context->TEST.ZTE && context->TEST.ZTST > 1;
p.sel.zwrite = zwrite;
p.sel.ztest = ztest;
if(zwrite || ztest)
{
p.sel.zpsm = GSUtil::EncodePSM(context->ZBUF.PSM);

View File

@ -0,0 +1,144 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GSLocalMemory.h"
#include "GSVector.h"
union GSScanlineSelector
{
struct
{
DWORD fpsm:2; // 0
DWORD zpsm:2; // 2
DWORD ztst:2; // 4 (0: off, 1: write, 2: test (ge), 3: test (g))
DWORD atst:3; // 6
DWORD afail:2; // 9
DWORD iip:1; // 11
DWORD tfx:3; // 12
DWORD tcc:1; // 15
DWORD fst:1; // 16
DWORD ltf:1; // 17
DWORD tlu:1; // 18
DWORD fge:1; // 19
DWORD date:1; // 20
DWORD abea:2; // 21
DWORD abeb:2; // 23
DWORD abec:2; // 25
DWORD abed:2; // 27
DWORD pabe:1; // 29
DWORD rfb:1; // 30
DWORD sprite:1; // 31
DWORD fwrite:1; // 32
DWORD ftest:1; // 33
DWORD zwrite:1; // 34
DWORD ztest:1; // 35
DWORD wms:1; // 36 (0: repeat, 1: clamp)
DWORD wmt:1; // 37
DWORD colclamp:1; // 38
DWORD fba:1; // 39
DWORD dthe:1; // 40
};
struct
{
DWORD _pad1:21;
DWORD abe:8;
DWORD _pad2:3;
DWORD fb:2;
DWORD zb:2;
};
struct
{
DWORD lo;
DWORD hi;
};
UINT64 key;
operator DWORD() {return lo;}
operator UINT64() {return key;}
bool IsSolidRect()
{
return sprite
&& iip == 0
&& tfx == TFX_NONE
&& abe == 255
&& ztst <= 1
&& atst <= 1
&& date == 0
&& fge == 0;
}
};
__declspec(align(16)) struct GSScanlineParam
{
GSScanlineSelector sel;
void* vm;
const void* tex;
const DWORD* clut;
DWORD tw;
GSLocalMemory::Offset* fbo;
GSLocalMemory::Offset* zbo;
GSLocalMemory::Offset4* fzbo;
DWORD fm, zm;
};
__declspec(align(16)) struct GSScanlineEnvironment
{
GSScanlineSelector sel;
void* vm;
const void* tex;
const DWORD* clut;
DWORD tw;
GSVector4i* fbr;
GSVector4i* zbr;
int** fbc;
int** zbc;
GSVector2i* fzbr;
GSVector2i* fzbc;
GSVector4i* dimx;
GSVector4i fm, zm;
struct {GSVector4i min, max, mask;} t; // [u] x 4 [v] x 4
GSVector4i datm;
GSVector4i colclamp;
GSVector4i fba;
GSVector4i aref;
GSVector4i afix, afix2;
GSVector4i frb, fga;
struct {GSVector4 z, s, t, q; GSVector4i rb, ga, f, si, ti, _pad[7];} d[4];
struct {GSVector4 z, stq; GSVector4i c, f, st;} d4;
struct {GSVector4i rb, ga;} c;
struct {GSVector4i z, f;} p;
struct {GSVector4i z, f, s, t, q, rb, ga, zs, zd, uf, vf;} temp;
};

View File

@ -798,12 +798,21 @@ template<int i> void GSState::GIFRegHandlerALPHA(GIFReg* r)
void GSState::GIFRegHandlerDIMX(GIFReg* r)
{
bool update = false;
if(!(m_env.DIMX == (GSVector4i)r->DIMX).alltrue())
{
Flush();
update = true;
}
m_env.DIMX = (GSVector4i)r->DIMX;
if(update)
{
m_env.UpdateDIMX();
}
}
void GSState::GIFRegHandlerDTHE(GIFReg* r)
@ -1573,6 +1582,8 @@ int GSState::Defrost(const GSFreezeData* fd)
UpdateVertexKick();
m_env.UpdateDIMX();
m_env.CTXT[0].UpdateScissor();
m_env.CTXT[1].UpdateScissor();

View File

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="windows-1250"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9,00"
Version="9.00"
Name="GSdx"
ProjectGUID="{18E42F6F-3A62-41EE-B42F-79366C4F1E95}"
RootNamespace="GSdx"
@ -22,7 +22,7 @@
<Configuration
Name="Debug|Win32"
ConfigurationType="2"
InheritedPropertySheets="vsprops\common.vsprops;vsprops\debug.vsprops"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops"
UseOfMFC="1"
CharacterSet="2"
>
@ -86,7 +86,7 @@
<Configuration
Name="Debug|x64"
ConfigurationType="2"
InheritedPropertySheets="vsprops\common.vsprops;vsprops\debug.vsprops"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops"
UseOfMFC="1"
CharacterSet="2"
>
@ -150,7 +150,7 @@
<Configuration
Name="Release|Win32"
ConfigurationType="2"
InheritedPropertySheets="vsprops\common.vsprops;vsprops\release.vsprops"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops"
UseOfMFC="1"
CharacterSet="2"
WholeProgramOptimization="1"
@ -215,7 +215,7 @@
<Configuration
Name="Release|x64"
ConfigurationType="2"
InheritedPropertySheets="vsprops\common.vsprops;vsprops\release.vsprops"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops"
UseOfMFC="1"
CharacterSet="2"
WholeProgramOptimization="1"
@ -280,7 +280,7 @@
<Configuration
Name="Debug SSE2|Win32"
ConfigurationType="2"
InheritedPropertySheets="vsprops\common.vsprops;vsprops\debug.vsprops;vsprops\sse2.vsprops"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops;.\vsprops\sse2.vsprops"
UseOfMFC="1"
CharacterSet="2"
>
@ -344,7 +344,7 @@
<Configuration
Name="Debug SSE2|x64"
ConfigurationType="2"
InheritedPropertySheets="vsprops\common.vsprops;vsprops\debug.vsprops"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops"
UseOfMFC="1"
CharacterSet="2"
WholeProgramOptimization="0"
@ -407,7 +407,7 @@
<Configuration
Name="Release SSE2|Win32"
ConfigurationType="2"
InheritedPropertySheets="vsprops\common.vsprops;vsprops\release.vsprops;vsprops\sse2.vsprops"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops;.\vsprops\sse2.vsprops"
UseOfMFC="1"
CharacterSet="2"
WholeProgramOptimization="1"
@ -472,7 +472,7 @@
<Configuration
Name="Release SSE2|x64"
ConfigurationType="2"
InheritedPropertySheets="vsprops\common.vsprops;vsprops\release.vsprops"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops"
UseOfMFC="1"
CharacterSet="2"
WholeProgramOptimization="1"
@ -535,7 +535,7 @@
<Configuration
Name="Release SSSE3|Win32"
ConfigurationType="2"
InheritedPropertySheets="vsprops\common.vsprops;vsprops\release.vsprops;vsprops\ssse3.vsprops"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops;.\vsprops\ssse3.vsprops"
UseOfMFC="1"
CharacterSet="2"
WholeProgramOptimization="1"
@ -600,7 +600,7 @@
<Configuration
Name="Release SSSE3|x64"
ConfigurationType="2"
InheritedPropertySheets="vsprops\common.vsprops;vsprops\release.vsprops;vsprops\ssse3.vsprops"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops;.\vsprops\ssse3.vsprops"
UseOfMFC="1"
CharacterSet="2"
WholeProgramOptimization="1"
@ -663,7 +663,7 @@
<Configuration
Name="Debug SSSE3|Win32"
ConfigurationType="2"
InheritedPropertySheets="vsprops\common.vsprops;vsprops\debug.vsprops;vsprops\ssse3.vsprops"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops;.\vsprops\ssse3.vsprops"
UseOfMFC="1"
CharacterSet="2"
>
@ -727,7 +727,7 @@
<Configuration
Name="Debug SSSE3|x64"
ConfigurationType="2"
InheritedPropertySheets="vsprops\common.vsprops;vsprops\debug.vsprops;vsprops\ssse3.vsprops"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops;.\vsprops\ssse3.vsprops"
UseOfMFC="1"
CharacterSet="2"
>
@ -789,7 +789,7 @@
<Configuration
Name="Debug SSE4|Win32"
ConfigurationType="2"
InheritedPropertySheets="vsprops\common.vsprops;vsprops\debug.vsprops;vsprops\sse4.vsprops"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops;.\vsprops\sse4.vsprops"
UseOfMFC="1"
CharacterSet="2"
>
@ -853,7 +853,7 @@
<Configuration
Name="Debug SSE4|x64"
ConfigurationType="2"
InheritedPropertySheets="vsprops\common.vsprops;vsprops\debug.vsprops;vsprops\sse4.vsprops"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops;.\vsprops\sse4.vsprops"
UseOfMFC="1"
CharacterSet="2"
>
@ -916,7 +916,7 @@
<Configuration
Name="Release SSE4|Win32"
ConfigurationType="2"
InheritedPropertySheets="vsprops\common.vsprops;vsprops\release.vsprops;vsprops\sse4.vsprops"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops;.\vsprops\sse4.vsprops"
UseOfMFC="1"
CharacterSet="2"
WholeProgramOptimization="1"
@ -981,7 +981,7 @@
<Configuration
Name="Release SSE4|x64"
ConfigurationType="2"
InheritedPropertySheets="vsprops\common.vsprops;vsprops\release.vsprops;vsprops\sse4.vsprops"
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops;.\vsprops\sse4.vsprops"
UseOfMFC="1"
CharacterSet="2"
WholeProgramOptimization="1"
@ -1159,6 +1159,10 @@
RelativePath=".\GSDrawScanline.cpp"
>
</File>
<File
RelativePath=".\GSDrawScanlineCodeGenerator.cpp"
>
</File>
<File
RelativePath=".\GSDump.cpp"
>
@ -1669,6 +1673,10 @@
RelativePath=".\GSDrawScanline.h"
>
</File>
<File
RelativePath=".\GSDrawScanlineCodeGenerator.h"
>
</File>
<File
RelativePath=".\GSDump.h"
>
@ -1717,6 +1725,10 @@
RelativePath=".\GSRendererSW.h"
>
</File>
<File
RelativePath=".\GSScanlineEnvironment.h"
>
</File>
<File
RelativePath=".\GSSetting.h"
>
@ -5044,6 +5056,26 @@
>
</File>
</Filter>
<Filter
Name="Xbyak"
>
<File
RelativePath=".\xbyak\xbyak.h"
>
</File>
<File
RelativePath=".\xbyak\xbyak_bin2hex.h"
>
</File>
<File
RelativePath=".\xbyak\xbyak_mnemonic.h"
>
</File>
<File
RelativePath=".\xbyak\xbyak_util.h"
>
</File>
</Filter>
</Files>
<Globals>
<Global

1338
plugins/GSdx/xbyak/xbyak.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,258 @@
enum {
B00000000= 0,
B00000001= 1,
B00000010= 2,
B00000011= 3,
B00000100= 4,
B00000101= 5,
B00000110= 6,
B00000111= 7,
B00001000= 8,
B00001001= 9,
B00001010= 10,
B00001011= 11,
B00001100= 12,
B00001101= 13,
B00001110= 14,
B00001111= 15,
B00010000= 16,
B00010001= 17,
B00010010= 18,
B00010011= 19,
B00010100= 20,
B00010101= 21,
B00010110= 22,
B00010111= 23,
B00011000= 24,
B00011001= 25,
B00011010= 26,
B00011011= 27,
B00011100= 28,
B00011101= 29,
B00011110= 30,
B00011111= 31,
B00100000= 32,
B00100001= 33,
B00100010= 34,
B00100011= 35,
B00100100= 36,
B00100101= 37,
B00100110= 38,
B00100111= 39,
B00101000= 40,
B00101001= 41,
B00101010= 42,
B00101011= 43,
B00101100= 44,
B00101101= 45,
B00101110= 46,
B00101111= 47,
B00110000= 48,
B00110001= 49,
B00110010= 50,
B00110011= 51,
B00110100= 52,
B00110101= 53,
B00110110= 54,
B00110111= 55,
B00111000= 56,
B00111001= 57,
B00111010= 58,
B00111011= 59,
B00111100= 60,
B00111101= 61,
B00111110= 62,
B00111111= 63,
B01000000= 64,
B01000001= 65,
B01000010= 66,
B01000011= 67,
B01000100= 68,
B01000101= 69,
B01000110= 70,
B01000111= 71,
B01001000= 72,
B01001001= 73,
B01001010= 74,
B01001011= 75,
B01001100= 76,
B01001101= 77,
B01001110= 78,
B01001111= 79,
B01010000= 80,
B01010001= 81,
B01010010= 82,
B01010011= 83,
B01010100= 84,
B01010101= 85,
B01010110= 86,
B01010111= 87,
B01011000= 88,
B01011001= 89,
B01011010= 90,
B01011011= 91,
B01011100= 92,
B01011101= 93,
B01011110= 94,
B01011111= 95,
B01100000= 96,
B01100001= 97,
B01100010= 98,
B01100011= 99,
B01100100= 100,
B01100101= 101,
B01100110= 102,
B01100111= 103,
B01101000= 104,
B01101001= 105,
B01101010= 106,
B01101011= 107,
B01101100= 108,
B01101101= 109,
B01101110= 110,
B01101111= 111,
B01110000= 112,
B01110001= 113,
B01110010= 114,
B01110011= 115,
B01110100= 116,
B01110101= 117,
B01110110= 118,
B01110111= 119,
B01111000= 120,
B01111001= 121,
B01111010= 122,
B01111011= 123,
B01111100= 124,
B01111101= 125,
B01111110= 126,
B01111111= 127,
B10000000= 128,
B10000001= 129,
B10000010= 130,
B10000011= 131,
B10000100= 132,
B10000101= 133,
B10000110= 134,
B10000111= 135,
B10001000= 136,
B10001001= 137,
B10001010= 138,
B10001011= 139,
B10001100= 140,
B10001101= 141,
B10001110= 142,
B10001111= 143,
B10010000= 144,
B10010001= 145,
B10010010= 146,
B10010011= 147,
B10010100= 148,
B10010101= 149,
B10010110= 150,
B10010111= 151,
B10011000= 152,
B10011001= 153,
B10011010= 154,
B10011011= 155,
B10011100= 156,
B10011101= 157,
B10011110= 158,
B10011111= 159,
B10100000= 160,
B10100001= 161,
B10100010= 162,
B10100011= 163,
B10100100= 164,
B10100101= 165,
B10100110= 166,
B10100111= 167,
B10101000= 168,
B10101001= 169,
B10101010= 170,
B10101011= 171,
B10101100= 172,
B10101101= 173,
B10101110= 174,
B10101111= 175,
B10110000= 176,
B10110001= 177,
B10110010= 178,
B10110011= 179,
B10110100= 180,
B10110101= 181,
B10110110= 182,
B10110111= 183,
B10111000= 184,
B10111001= 185,
B10111010= 186,
B10111011= 187,
B10111100= 188,
B10111101= 189,
B10111110= 190,
B10111111= 191,
B11000000= 192,
B11000001= 193,
B11000010= 194,
B11000011= 195,
B11000100= 196,
B11000101= 197,
B11000110= 198,
B11000111= 199,
B11001000= 200,
B11001001= 201,
B11001010= 202,
B11001011= 203,
B11001100= 204,
B11001101= 205,
B11001110= 206,
B11001111= 207,
B11010000= 208,
B11010001= 209,
B11010010= 210,
B11010011= 211,
B11010100= 212,
B11010101= 213,
B11010110= 214,
B11010111= 215,
B11011000= 216,
B11011001= 217,
B11011010= 218,
B11011011= 219,
B11011100= 220,
B11011101= 221,
B11011110= 222,
B11011111= 223,
B11100000= 224,
B11100001= 225,
B11100010= 226,
B11100011= 227,
B11100100= 228,
B11100101= 229,
B11100110= 230,
B11100111= 231,
B11101000= 232,
B11101001= 233,
B11101010= 234,
B11101011= 235,
B11101100= 236,
B11101101= 237,
B11101110= 238,
B11101111= 239,
B11110000= 240,
B11110001= 241,
B11110010= 242,
B11110011= 243,
B11110100= 244,
B11110101= 245,
B11110110= 246,
B11110111= 247,
B11111000= 248,
B11111001= 249,
B11111010= 250,
B11111011= 251,
B11111100= 252,
B11111101= 253,
B11111110= 254,
B11111111= 255
};

View File

@ -0,0 +1,429 @@
const char *getVersionString() const { return "2.07"; }
void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); }
void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); }
void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); }
void pand(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDB); }
void pandn(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDF); }
void pmaddwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF5); }
void pmulhuw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE4); }
void pmulhw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE5); }
void pmullw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD5); }
void por(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEB); }
void punpckhbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x68); }
void punpckhwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x69); }
void punpckhdq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6A); }
void punpcklbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x60); }
void punpcklwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x61); }
void punpckldq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x62); }
void pxor(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEF); }
void pavgb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE0); }
void pavgw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE3); }
void pmaxsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEE); }
void pmaxub(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDE); }
void pminsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEA); }
void pminub(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDA); }
void psadbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF6); }
void paddq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD4); }
void pmuludq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF4); }
void psubq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFB); }
void paddb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFC); }
void paddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFD); }
void paddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFE); }
void paddsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEC); }
void paddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xED); }
void paddusb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDC); }
void paddusw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDD); }
void pcmpeqb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x74); }
void pcmpeqw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x75); }
void pcmpeqd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x76); }
void pcmpgtb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x64); }
void pcmpgtw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x65); }
void pcmpgtd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x66); }
void psllw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF1); }
void pslld(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF2); }
void psllq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF3); }
void psraw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE1); }
void psrad(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE2); }
void psrlw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD1); }
void psrld(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD2); }
void psrlq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD3); }
void psubb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF8); }
void psubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF9); }
void psubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFA); }
void psubsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE8); }
void psubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE9); }
void psubusb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD8); }
void psubusw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD9); }
void psllw(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 6); }
void pslld(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 6); }
void psllq(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x73, 6); }
void psraw(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 4); }
void psrad(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 4); }
void psrlw(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 2); }
void psrld(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 2); }
void psrlq(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x73, 2); }
void pslldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x73, 7); }
void psrldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x73, 3); }
void pshufw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0x00, imm8); }
void pshuflw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0xF2, imm8); }
void pshufhw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0xF3, imm8); }
void pshufd(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0x66, imm8); }
void movdqa(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0x66); }
void movdqa(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x7F); }
void movdqu(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0xF3); }
void movdqu(const Address& addr, const Xmm& xmm) { db(0xF3); opModM(addr, xmm, 0x0F, 0x7F); }
void movaps(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, 0x100); }
void movaps(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x29); }
void movss(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0xF3); }
void movss(const Address& addr, const Xmm& xmm) { db(0xF3); opModM(addr, xmm, 0x0F, 0x11); }
void movups(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0x100); }
void movups(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x11); }
void movapd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, 0x66); }
void movapd(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x29); }
void movsd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0xF2); }
void movsd(const Address& addr, const Xmm& xmm) { db(0xF2); opModM(addr, xmm, 0x0F, 0x11); }
void movupd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0x66); }
void movupd(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x11); }
void addps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0x100, isXMM_XMMorMEM); }
void addss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0xF3, isXMM_XMMorMEM); }
void addpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0x66, isXMM_XMMorMEM); }
void addsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0xF2, isXMM_XMMorMEM); }
void andnps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x100, isXMM_XMMorMEM); }
void andnpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x66, isXMM_XMMorMEM); }
void andps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x100, isXMM_XMMorMEM); }
void andpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x66, isXMM_XMMorMEM); }
void cmpps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); }
void cmpss(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); }
void cmppd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); }
void cmpsd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); }
void divps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0x100, isXMM_XMMorMEM); }
void divss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF3, isXMM_XMMorMEM); }
void divpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0x66, isXMM_XMMorMEM); }
void divsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF2, isXMM_XMMorMEM); }
void maxps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x100, isXMM_XMMorMEM); }
void maxss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0xF3, isXMM_XMMorMEM); }
void maxpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x66, isXMM_XMMorMEM); }
void maxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0xF2, isXMM_XMMorMEM); }
void minps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0x100, isXMM_XMMorMEM); }
void minss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF3, isXMM_XMMorMEM); }
void minpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0x66, isXMM_XMMorMEM); }
void minsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF2, isXMM_XMMorMEM); }
void mulps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0x100, isXMM_XMMorMEM); }
void mulss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF3, isXMM_XMMorMEM); }
void mulpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0x66, isXMM_XMMorMEM); }
void mulsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF2, isXMM_XMMorMEM); }
void orps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x100, isXMM_XMMorMEM); }
void orpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x66, isXMM_XMMorMEM); }
void rcpps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x53, 0x100, isXMM_XMMorMEM); }
void rcpss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x53, 0xF3, isXMM_XMMorMEM); }
void rsqrtps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x52, 0x100, isXMM_XMMorMEM); }
void rsqrtss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x52, 0xF3, isXMM_XMMorMEM); }
void shufps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC6, 0x100, isXMM_XMMorMEM, imm8); }
void shufpd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC6, 0x66, isXMM_XMMorMEM, imm8); }
void sqrtps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0x100, isXMM_XMMorMEM); }
void sqrtss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0xF3, isXMM_XMMorMEM); }
void sqrtpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0x66, isXMM_XMMorMEM); }
void sqrtsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0xF2, isXMM_XMMorMEM); }
void subps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x100, isXMM_XMMorMEM); }
void subss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF3, isXMM_XMMorMEM); }
void subpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x66, isXMM_XMMorMEM); }
void subsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF2, isXMM_XMMorMEM); }
void unpckhps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x100, isXMM_XMMorMEM); }
void unpckhpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM); }
void unpcklps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x100, isXMM_XMMorMEM); }
void unpcklpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM); }
void xorps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x57, 0x100, isXMM_XMMorMEM); }
void xorpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x57, 0x66, isXMM_XMMorMEM); }
void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { db(0x66); opModR(reg1, reg2, 0x0F, 0xF7); }
void movhlps(const Xmm& reg1, const Xmm& reg2) { opModR(reg1, reg2, 0x0F, 0x12); }
void movlhps(const Xmm& reg1, const Xmm& reg2) { opModR(reg1, reg2, 0x0F, 0x16); }
void punpckhqdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x6D, 0x66, isXMM_XMMorMEM); }
void punpcklqdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x6C, 0x66, isXMM_XMMorMEM); }
void comiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2F, 0x100, isXMM_XMMorMEM); }
void ucomiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x100, isXMM_XMMorMEM); }
void comisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2F, 0x66, isXMM_XMMorMEM); }
void ucomisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x66, isXMM_XMMorMEM); }
void cvtpd2ps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0x66, isXMM_XMMorMEM); }
void cvtps2pd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0x100, isXMM_XMMorMEM); }
void cvtsd2ss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0xF2, isXMM_XMMorMEM); }
void cvtss2sd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0xF3, isXMM_XMMorMEM); }
void cvtpd2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xE6, 0xF2, isXMM_XMMorMEM); }
void cvttpd2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xE6, 0x66, isXMM_XMMorMEM); }
void cvtdq2pd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xE6, 0xF3, isXMM_XMMorMEM); }
void cvtps2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5B, 0x66, isXMM_XMMorMEM); }
void cvttps2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5B, 0xF3, isXMM_XMMorMEM); }
void cvtdq2ps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5B, 0x100, isXMM_XMMorMEM); }
void addsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xD0, 0x66, isXMM_XMMorMEM); }
void addsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xD0, 0xF2, isXMM_XMMorMEM); }
void haddpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0x66, isXMM_XMMorMEM); }
void haddps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0xF2, isXMM_XMMorMEM); }
void hsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0x66, isXMM_XMMorMEM); }
void hsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0xF2, isXMM_XMMorMEM); }
void movddup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x12, 0xF2, isXMM_XMMorMEM); }
void movshdup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x16, 0xF3, isXMM_XMMorMEM); }
void movsldup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x12, 0xF3, isXMM_XMMorMEM); }
void cvtpi2ps(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0x100, isXMM_MMXorMEM); }
void cvtps2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0x100, isMMX_XMMorMEM); }
void cvtsi2ss(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0xF3, isXMM_REG32orMEM); }
void cvtss2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0xF3, isREG32_XMMorMEM); }
void cvttps2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0x100, isMMX_XMMorMEM); }
void cvttss2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0xF3, isREG32_XMMorMEM); }
void cvtpi2pd(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0x66, isXMM_MMXorMEM); }
void cvtpd2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0x66, isMMX_XMMorMEM); }
void cvtsi2sd(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0xF2, isXMM_REG32orMEM); }
void cvtsd2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0xF2, isREG32_XMMorMEM); }
void cvttpd2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0x66, isMMX_XMMorMEM); }
void cvttsd2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0xF2, isREG32_XMMorMEM); }
void prefetcht0(const Address& addr) { opModM(addr, Reg32(1), 0x0F, B00011000); }
void prefetcht1(const Address& addr) { opModM(addr, Reg32(2), 0x0F, B00011000); }
void prefetcht2(const Address& addr) { opModM(addr, Reg32(3), 0x0F, B00011000); }
void prefetchnta(const Address& addr) { opModM(addr, Reg32(0), 0x0F, B00011000); }
void movhps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x16, 0x100); }
void movlps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x12, 0x100); }
void movhpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x16, 0x66); }
void movlpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x12, 0x66); }
void cmovo(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 0); }
void jo(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x70, 0x80, 0x0F); }
void seto(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 0); }
void cmovno(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 1); }
void jno(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x71, 0x81, 0x0F); }
void setno(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 1); }
void cmovb(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); }
void jb(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }
void setb(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 2); }
void cmovnae(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); }
void jnae(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }
void setnae(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 2); }
void cmovnb(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); }
void jnb(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }
void setnb(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 3); }
void cmovae(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); }
void jae(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }
void setae(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 3); }
void cmove(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 4); }
void je(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }
void sete(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 4); }
void cmovz(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 4); }
void jz(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }
void setz(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 4); }
void cmovne(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 5); }
void jne(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); }
void setne(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 5); }
void cmovnz(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 5); }
void jnz(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); }
void setnz(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 5); }
void cmovbe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 6); }
void jbe(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); }
void setbe(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 6); }
void cmovna(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 6); }
void jna(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); }
void setna(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 6); }
void cmovnbe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 7); }
void jnbe(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }
void setnbe(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 7); }
void cmova(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 7); }
void ja(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }
void seta(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 7); }
void cmovs(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 8); }
void js(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x78, 0x88, 0x0F); }
void sets(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 8); }
void cmovns(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 9); }
void jns(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x79, 0x89, 0x0F); }
void setns(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 9); }
void cmovp(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 10); }
void jp(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); }
void setp(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 10); }
void cmovpe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 10); }
void jpe(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); }
void setpe(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 10); }
void cmovnp(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 11); }
void jnp(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); }
void setnp(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 11); }
void cmovpo(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 11); }
void jpo(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); }
void setpo(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 11); }
void cmovl(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 12); }
void jl(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); }
void setl(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 12); }
void cmovnge(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 12); }
void jnge(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); }
void setnge(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 12); }
void cmovnl(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 13); }
void jnl(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); }
void setnl(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 13); }
void cmovge(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 13); }
void jge(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); }
void setge(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 13); }
void cmovle(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 14); }
void jle(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); }
void setle(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 14); }
void cmovng(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 14); }
void jng(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); }
void setng(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 14); }
void cmovnle(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 15); }
void jnle(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }
void setnle(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 15); }
void cmovg(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 15); }
void jg(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }
void setg(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 15); }
#ifdef XBYAK64
void cdqe() { db(0x48); db(0x98); }
#else
void aaa() { db(0x37); }
void aad() { db(0xD5); db(0x0A); }
void aam() { db(0xD4); db(0x0A); }
void aas() { db(0x3F); }
void daa() { db(0x27); }
void das() { db(0x2F); }
void popad() { db(0x61); }
void popfd() { db(0x9D); }
void pusha() { db(0x60); }
void pushad() { db(0x60); }
void pushfd() { db(0x9C); }
void popa() { db(0x61); }
#endif
void cbw() { db(0x66); db(0x98); }
void cdq() { db(0x99); }
void clc() { db(0xF8); }
void cld() { db(0xFC); }
void cli() { db(0xFA); }
void cmc() { db(0xF5); }
void cpuid() { db(0x0F); db(0xA2); }
void cwd() { db(0x66); db(0x99); }
void cwde() { db(0x98); }
void lahf() { db(0x9F); }
void lock() { db(0xF0); }
void nop() { db(0x90); }
void sahf() { db(0x9E); }
void stc() { db(0xF9); }
void std() { db(0xFD); }
void sti() { db(0xFB); }
void emms() { db(0x0F); db(0x77); }
void pause() { db(0xF3); db(0x90); }
void sfence() { db(0x0F); db(0xAE); db(0xF8); }
void lfence() { db(0x0F); db(0xAE); db(0xE8); }
void mfence() { db(0x0F); db(0xAE); db(0xF0); }
void monitor() { db(0x0F); db(0x01); db(0xC8); }
void mwait() { db(0x0F); db(0x01); db(0xC9); }
void rdmsr() { db(0x0F); db(0x32); }
void rdpmc() { db(0x0F); db(0x33); }
void rdtsc() { db(0x0F); db(0x31); }
void wait() { db(0x9B); }
void wbinvd() { db(0x0F); db(0x09); }
void wrmsr() { db(0x0F); db(0x30); }
void xlatb() { db(0xD7); }
void popf() { db(0x9D); }
void pushf() { db(0x9C); }
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
void add(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x00); }
void add(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x00, 0); }
void and(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x20); }
void and(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x20, 4); }
void cmp(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x38); }
void cmp(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x38, 7); }
void or(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); }
void or(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x08, 1); }
void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); }
void sbb(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x18, 3); }
void sub(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x28); }
void sub(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x28, 5); }
void xor(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x30); }
void xor(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x30, 6); }
void dec(const Operand& op) { opIncDec(op, 0x48, 1); }
void inc(const Operand& op) { opIncDec(op, 0x40, 0); }
void div(const Operand& op) { opR_ModM(op, 0, 3, 6, 0xF6); }
void idiv(const Operand& op) { opR_ModM(op, 0, 3, 7, 0xF6); }
void imul(const Operand& op) { opR_ModM(op, 0, 3, 5, 0xF6); }
void mul(const Operand& op) { opR_ModM(op, 0, 3, 4, 0xF6); }
void neg(const Operand& op) { opR_ModM(op, 0, 3, 3, 0xF6); }
void not(const Operand& op) { opR_ModM(op, 0, 3, 2, 0xF6); }
void rcl(const Operand& op, int imm) { opShift(op, imm, 2); }
void rcl(const Operand& op, const Reg8& cl) { opShift(op, cl, 2); }
void rcr(const Operand& op, int imm) { opShift(op, imm, 3); }
void rcr(const Operand& op, const Reg8& cl) { opShift(op, cl, 3); }
void rol(const Operand& op, int imm) { opShift(op, imm, 0); }
void rol(const Operand& op, const Reg8& cl) { opShift(op, cl, 0); }
void ror(const Operand& op, int imm) { opShift(op, imm, 1); }
void ror(const Operand& op, const Reg8& cl) { opShift(op, cl, 1); }
void sar(const Operand& op, int imm) { opShift(op, imm, 7); }
void sar(const Operand& op, const Reg8& cl) { opShift(op, cl, 7); }
void shl(const Operand& op, int imm) { opShift(op, imm, 4); }
void shl(const Operand& op, const Reg8& cl) { opShift(op, cl, 4); }
void shr(const Operand& op, int imm) { opShift(op, imm, 5); }
void shr(const Operand& op, const Reg8& cl) { opShift(op, cl, 5); }
void sal(const Operand& op, int imm) { opShift(op, imm, 4); }
void sal(const Operand& op, const Reg8& cl) { opShift(op, cl, 4); }
void shld(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0xA4); }
void shld(const Operand& op, const Reg& reg, const Reg8& cl) { opShxd(op, reg, 0, 0xA4, &cl); }
void shrd(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0xAC); }
void shrd(const Operand& op, const Reg& reg, const Reg8& cl) { opShxd(op, reg, 0, 0xAC, &cl); }
void bsf(const Reg&reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBC); }
void bsr(const Reg&reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBD); }
void pshufb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x00, 0x66, 256, 0x38); }
void phaddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x01, 0x66, 256, 0x38); }
void phaddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x02, 0x66, 256, 0x38); }
void phaddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x03, 0x66, 256, 0x38); }
void pmaddubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x04, 0x66, 256, 0x38); }
void phsubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x05, 0x66, 256, 0x38); }
void phsubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x06, 0x66, 256, 0x38); }
void phsubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x07, 0x66, 256, 0x38); }
void psignb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x08, 0x66, 256, 0x38); }
void psignw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x09, 0x66, 256, 0x38); }
void psignd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0A, 0x66, 256, 0x38); }
void pmulhrsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0B, 0x66, 256, 0x38); }
void pabsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1C, 0x66, 256, 0x38); }
void pabsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1D, 0x66, 256, 0x38); }
void pabsd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1E, 0x66, 256, 0x38); }
void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast<uint8>(imm), 0x3a); }
void blendvpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void blendvps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void packusdw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2B, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pblendvb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x10, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pcmpeqq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x29, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void ptest(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x17, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovsxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x20, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovsxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovsxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x22, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovsxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x23, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovsxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x24, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovsxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x25, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovzxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x30, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovzxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x31, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovzxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x32, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovzxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x33, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovzxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x34, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmovzxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x35, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pminsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x38, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pminsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x39, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pminuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3A, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pminud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3B, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmaxsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3C, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmaxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3D, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmaxuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3E, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmaxud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3F, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmuldq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x28, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pmulld(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void phminposuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void pcmpgtq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x37, 0x66, isXMM_XMMorMEM, 256, 0x38); }
void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void dpps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void mpsadbw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x42, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void pblendw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0E, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void roundps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x08, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void roundpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x09, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void roundss(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0A, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void roundsd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0B, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void pcmpestrm(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x60, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void pcmpestri(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x61, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void pcmpistrm(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x62, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void pcmpistri(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x63, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
void ldmxcsr(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0xAE); }
void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); }
void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); }
void movntpd(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0x2B); }
void movntdq(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0xE7); }
void movsx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xBE); }
void movzx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xB6); }

View File

@ -0,0 +1,185 @@
#ifndef XBYAK_XBYAK_UTIL_H_
#define XBYAK_XBYAK_UTIL_H_
/**
utility class for Xbyak
@note this header is under construction
*/
#ifdef _WIN32
#if (_MSC_VER < 1400) && defined(XBYAK32)
static inline __declspec(naked) void __cpuid(int[4], int)
{
__asm {
push ebx
push esi
mov eax, dword ptr [esp + 4 * 2 + 8] // eaxIn
cpuid
mov esi, dword ptr [esp + 4 * 2 + 4] // data
mov dword ptr [esi], eax
mov dword ptr [esi + 4], ebx
mov dword ptr [esi + 8], ecx
mov dword ptr [esi + 12], edx
pop esi
pop ebx
ret
}
}
#else
#include <intrin.h> // for __cpuid
#endif
#else
#if __GNUC_PREREQ(4, 3)
#include <cpuid.h>
#else
#define __cpuid(eaxIn, a, b, c, d) __asm__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
#endif
#endif
namespace Xbyak { namespace util {
/**
CPU detection class
*/
class Cpu {
unsigned int type_;
public:
static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
{
#ifdef _WIN32
__cpuid(reinterpret_cast<int*>(data), eaxIn);
#else
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
#endif
}
enum Type {
NONE = 0,
tMMX = 1 << 0,
tMMX2 = 1 << 1,
tCMOV = 1 << 2,
tSSE = 1 << 3,
tSSE2 = 1 << 4,
tSSE3 = 1 << 5,
tSSSE3 = 1 << 6,
tSSE41 = 1 << 7,
tSSE42 = 1 << 8,
tPOPCNT = 1 << 9,
t3DN = 1 << 16,
tE3DN = 1 << 17,
tSSE4a = 1 << 18,
tSSE5 = 1 << 11,
tINTEL = 1 << 24,
tAMD = 1 << 25
};
Cpu()
: type_(NONE)
{
unsigned int data[4];
getCpuid(0, data);
static const char intel[] = "ntel";
static const char amd[] = "cAMD";
if (data[2] == *reinterpret_cast<const unsigned int*>(amd)) {
type_ |= tAMD;
getCpuid(0x80000001, data);
if (data[3] & (1 << 31)) type_ |= t3DN;
if (data[3] & (1 << 15)) type_ |= tCMOV;
if (data[3] & (1 << 30)) type_ |= tE3DN;
if (data[3] & (1 << 22)) type_ |= tMMX2;
}
if (data[2] == *reinterpret_cast<const unsigned int*>(intel)) {
type_ |= tINTEL;
}
getCpuid(1, data);
if (data[2] & (1 << 0)) type_ |= tSSE3;
if (data[2] & (1 << 9)) type_ |= tSSSE3;
if (data[2] & (1 << 19)) type_ |= tSSE41;
if (data[2] & (1 << 20)) type_ |= tSSE42;
if (data[2] & (1 << 23)) type_ |= tPOPCNT;
if (data[3] & (1 << 15)) type_ |= tCMOV;
if (data[3] & (1 << 23)) type_ |= tMMX;
if (data[3] & (1 << 25)) type_ |= tMMX2 | tSSE;
if (data[3] & (1 << 26)) type_ |= tSSE2;
}
bool has(Type type) const
{
return (type & type_) != 0;
}
};
#ifdef XBYAK32
namespace local {
#ifdef _WIN32
#define XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(x) static inline __declspec(naked) void set_eip_to_ ## x() { \
__asm { mov x, dword ptr [esp] } __asm { ret } \
}
#else
#define XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(x) static inline void set_eip_to_ ## x() { \
__asm__ volatile("movl (%esp), %" #x); \
}
#endif
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(eax)
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(ecx)
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(edx)
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(ebx)
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(esi)
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(edi)
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(ebp)
#undef XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG
} // end of local
template<class Gen>
struct EnableSetEip : public Gen {
EnableSetEip(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0)
: Gen(maxSize, userPtr)
{
}
/**
get pid to out register
@note out = eax or ecx or edx
*/
void setEipTo(const Xbyak::Reg32& out)
{
#if 0
Gen::call(Gen::getCurr() + 5);
Gen::pop(out);
#else
int idx = out.getIdx();
switch (idx) {
case Xbyak::Operand::EAX:
Gen::call((void*)local::set_eip_to_eax);
break;
case Xbyak::Operand::ECX:
Gen::call((void*)local::set_eip_to_ecx);
break;
case Xbyak::Operand::EDX:
Gen::call((void*)local::set_eip_to_edx);
break;
case Xbyak::Operand::EBX:
Gen::call((void*)local::set_eip_to_ebx);
break;
case Xbyak::Operand::ESI:
Gen::call((void*)local::set_eip_to_esi);
break;
case Xbyak::Operand::EDI:
Gen::call((void*)local::set_eip_to_esi);
break;
case Xbyak::Operand::EBP:
Gen::call((void*)local::set_eip_to_ebp);
break;
default:
assert(0);
}
#endif
}
};
#endif
} } // end of util
#endif