mirror of https://github.com/PCSX2/pcsx2.git
GSdx software renderer speed-up, using xbyak to JIT compile a few things, more to follow.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@469 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
5249b67a56
commit
9ee9d817c4
|
@ -871,11 +871,11 @@ GPUDrawScanline::GPUDrawScanlineMap::GPUDrawScanlineMap()
|
|||
#endif
|
||||
}
|
||||
|
||||
IDrawScanline::DrawScanlinePtr GPUDrawScanline::GPUDrawScanlineMap::GetDefaultFunction(DWORD dw)
|
||||
IDrawScanline::DrawScanlinePtr GPUDrawScanline::GPUDrawScanlineMap::GetDefaultFunction(DWORD key)
|
||||
{
|
||||
GPUScanlineSelector sel;
|
||||
|
||||
sel.dw = dw;
|
||||
sel.key = key;
|
||||
|
||||
return m_default[sel];
|
||||
}
|
||||
|
@ -899,11 +899,11 @@ GPUDrawScanline::GPUSetupPrimMap::GPUSetupPrimMap()
|
|||
InitSP_SPRITE(1);
|
||||
}
|
||||
|
||||
IDrawScanline::SetupPrimPtr GPUDrawScanline::GPUSetupPrimMap::GetDefaultFunction(DWORD dw)
|
||||
IDrawScanline::SetupPrimPtr GPUDrawScanline::GPUSetupPrimMap::GetDefaultFunction(DWORD key)
|
||||
{
|
||||
DWORD sprite = (dw >> 0) & 1;
|
||||
DWORD tme = (dw >> 1) & 1;
|
||||
DWORD iip = (dw >> 2) & 1;
|
||||
DWORD sprite = (key >> 0) & 1;
|
||||
DWORD tme = (key >> 1) & 1;
|
||||
DWORD iip = (key >> 2) & 1;
|
||||
|
||||
return m_default[sprite][tme][iip];
|
||||
}
|
||||
|
|
|
@ -50,9 +50,9 @@ union GPUScanlineSelector
|
|||
DWORD tfx:2; // 5
|
||||
};
|
||||
|
||||
DWORD dw;
|
||||
DWORD key;
|
||||
|
||||
operator DWORD() {return dw & 0xff;}
|
||||
operator DWORD() {return key & 0xff;}
|
||||
};
|
||||
|
||||
__declspec(align(16)) struct GPUScanlineEnvironment
|
||||
|
@ -87,28 +87,28 @@ class GPUDrawScanline : public GSAlignedClass<16>, public IDrawScanline
|
|||
|
||||
//
|
||||
|
||||
class GPUDrawScanlineMap : public GSFunctionMap<DrawScanlinePtr>
|
||||
class GPUDrawScanlineMap : public GSFunctionMap<DWORD, DrawScanlinePtr>
|
||||
{
|
||||
DrawScanlinePtr m_default[256];
|
||||
|
||||
public:
|
||||
GPUDrawScanlineMap();
|
||||
|
||||
DrawScanlinePtr GetDefaultFunction(DWORD dw);
|
||||
DrawScanlinePtr GetDefaultFunction(DWORD key);
|
||||
};
|
||||
|
||||
GPUDrawScanlineMap m_ds;
|
||||
|
||||
//
|
||||
|
||||
class GPUSetupPrimMap : public GSFunctionMap<SetupPrimPtr>
|
||||
class GPUSetupPrimMap : public GSFunctionMap<DWORD, SetupPrimPtr>
|
||||
{
|
||||
SetupPrimPtr m_default[2][2][2];
|
||||
|
||||
public:
|
||||
GPUSetupPrimMap();
|
||||
|
||||
SetupPrimPtr GetDefaultFunction(DWORD dw);
|
||||
SetupPrimPtr GetDefaultFunction(DWORD key);
|
||||
};
|
||||
|
||||
GPUSetupPrimMap m_sp;
|
||||
|
|
|
@ -125,7 +125,7 @@ protected:
|
|||
|
||||
GPUScanlineParam p;
|
||||
|
||||
p.sel.dw = 0;
|
||||
p.sel.key = 0;
|
||||
p.sel.iip = env.PRIM.IIP;
|
||||
p.sel.me = env.STATUS.ME;
|
||||
p.sel.abe = env.PRIM.ABE;
|
||||
|
|
|
@ -567,41 +567,38 @@ REG64_(GIFReg, COLCLAMP)
|
|||
REG_END
|
||||
|
||||
REG64_(GIFReg, DIMX)
|
||||
UINT32 DM00:3;
|
||||
UINT32 _PAD00:1;
|
||||
UINT32 DM01:3;
|
||||
UINT32 _PAD01:1;
|
||||
UINT32 DM02:3;
|
||||
UINT32 _PAD02:1;
|
||||
UINT32 DM03:3;
|
||||
UINT32 _PAD03:1;
|
||||
|
||||
UINT32 DM10:3;
|
||||
UINT32 _PAD10:1;
|
||||
UINT32 DM11:3;
|
||||
UINT32 _PAD11:1;
|
||||
UINT32 DM12:3;
|
||||
UINT32 _PAD12:1;
|
||||
UINT32 DM13:3;
|
||||
UINT32 _PAD13:1;
|
||||
|
||||
UINT32 DM20:3;
|
||||
UINT32 _PAD20:1;
|
||||
UINT32 DM21:3;
|
||||
UINT32 _PAD21:1;
|
||||
UINT32 DM22:3;
|
||||
UINT32 _PAD22:1;
|
||||
UINT32 DM23:3;
|
||||
UINT32 _PAD23:1;
|
||||
|
||||
UINT32 DM30:3;
|
||||
UINT32 _PAD30:1;
|
||||
UINT32 DM31:3;
|
||||
UINT32 _PAD31:1;
|
||||
UINT32 DM32:3;
|
||||
UINT32 _PAD32:1;
|
||||
UINT32 DM33:3;
|
||||
UINT32 _PAD33:1;
|
||||
INT32 DM00:3;
|
||||
INT32 _PAD00:1;
|
||||
INT32 DM01:3;
|
||||
INT32 _PAD01:1;
|
||||
INT32 DM02:3;
|
||||
INT32 _PAD02:1;
|
||||
INT32 DM03:3;
|
||||
INT32 _PAD03:1;
|
||||
INT32 DM10:3;
|
||||
INT32 _PAD10:1;
|
||||
INT32 DM11:3;
|
||||
INT32 _PAD11:1;
|
||||
INT32 DM12:3;
|
||||
INT32 _PAD12:1;
|
||||
INT32 DM13:3;
|
||||
INT32 _PAD13:1;
|
||||
INT32 DM20:3;
|
||||
INT32 _PAD20:1;
|
||||
INT32 DM21:3;
|
||||
INT32 _PAD21:1;
|
||||
INT32 DM22:3;
|
||||
INT32 _PAD22:1;
|
||||
INT32 DM23:3;
|
||||
INT32 _PAD23:1;
|
||||
INT32 DM30:3;
|
||||
INT32 _PAD30:1;
|
||||
INT32 DM31:3;
|
||||
INT32 _PAD31:1;
|
||||
INT32 DM32:3;
|
||||
INT32 _PAD32:1;
|
||||
INT32 DM33:3;
|
||||
INT32 _PAD33:1;
|
||||
REG_END
|
||||
|
||||
REG64_(GIFReg, DTHE)
|
||||
|
|
|
@ -32,6 +32,12 @@ GSDrawScanline::GSDrawScanline(GSState* state, int id)
|
|||
|
||||
GSDrawScanline::~GSDrawScanline()
|
||||
{
|
||||
POSITION pos = m_dscg.GetHeadPosition();
|
||||
|
||||
while(pos)
|
||||
{
|
||||
delete m_dscg.GetNextValue(pos);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
||||
|
@ -60,6 +66,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
|||
m_env.afix2 = m_env.afix.yywwlh().sll16(7);
|
||||
m_env.frb = GSVector4i((int)env.FOGCOL.ai32[0] & 0x00ff00ff);
|
||||
m_env.fga = GSVector4i((int)(env.FOGCOL.ai32[0] >> 8) & 0x00ff00ff);
|
||||
m_env.dimx = env.dimx;
|
||||
|
||||
if(m_env.sel.fpsm == 1)
|
||||
{
|
||||
|
@ -163,7 +170,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
|||
|
||||
//
|
||||
|
||||
f->sl = m_ds.Lookup(m_env.sel);
|
||||
f->sl = (DrawScanlinePtr)&GSDrawScanline::DrawScanline; // m_ds.Lookup(m_env.sel);
|
||||
|
||||
//
|
||||
|
||||
|
@ -186,16 +193,31 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
|
|||
}
|
||||
|
||||
f->sp = m_sp.Lookup(sel);
|
||||
|
||||
//
|
||||
|
||||
GSDrawScanlineCodeGenerator* cg = NULL;
|
||||
|
||||
if(!m_dscg.Lookup(m_env.sel, cg))
|
||||
{
|
||||
cg = new GSDrawScanlineCodeGenerator(m_env);
|
||||
|
||||
m_dscg.SetAt(m_env.sel, cg);
|
||||
}
|
||||
|
||||
m_dsf = (DrawScanlineStaticPtr)cg->getCode();
|
||||
}
|
||||
|
||||
void GSDrawScanline::EndDraw(const GSRasterizerStats& stats)
|
||||
{
|
||||
m_ds.UpdateStats(stats, m_state->m_perfmon.GetFrame());
|
||||
// m_ds.UpdateStats(stats, m_state->m_perfmon.GetFrame());
|
||||
}
|
||||
|
||||
template<DWORD zbe, DWORD fge, DWORD tme, DWORD fst, DWORD iip>
|
||||
void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan)
|
||||
{
|
||||
// TODO: clean up unused parts
|
||||
|
||||
// p
|
||||
|
||||
GSVector4 p = dscan.p;
|
||||
|
@ -311,47 +333,28 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dsc
|
|||
}
|
||||
else
|
||||
{
|
||||
GSVector4i rgba = GSVector4i(vertices[0].c);
|
||||
GSVector4i c = GSVector4i(vertices[0].c);
|
||||
|
||||
GSVector4i rbga = rgba.upl16(rgba.zwxy());
|
||||
c = c.upl16(c.zwxy());
|
||||
|
||||
if(tme == 0)
|
||||
{
|
||||
rbga = rbga.srl16(7);
|
||||
|
||||
DWORD abe = m_env.sel.abe & 0x3f; // a, b, c
|
||||
|
||||
DWORD abea = m_env.sel.abea;
|
||||
DWORD abeb = m_env.sel.abeb;
|
||||
DWORD abec = m_env.sel.abec;
|
||||
DWORD abed = m_env.sel.abed;
|
||||
|
||||
if(fge == 0 && abe != 0x3f && !(abe & 0x15) && abea != abeb) // 0x15 = 010101b => a, b, c != 1
|
||||
{
|
||||
GSVector4i c[4];
|
||||
|
||||
c[0] = rbga;
|
||||
c[1] = rgba.zzzzh().zzzz();
|
||||
c[2] = GSVector4i::zero();
|
||||
c[3] = m_env.afix2;
|
||||
|
||||
GSVector4i cc = GSVector4i::lerp16<1>(c[abea], c[abeb], c[abec + 1]);
|
||||
|
||||
if(abed == 0)
|
||||
{
|
||||
cc = cc.add16(c[0]);
|
||||
}
|
||||
|
||||
m_env.c2.rb = cc.xxxx();
|
||||
m_env.c2.ga = cc.zzzz().mix16(c[1].srl16(7));
|
||||
}
|
||||
c = c.srl16(7);
|
||||
}
|
||||
|
||||
m_env.c.rb = rbga.xxxx();
|
||||
m_env.c.ga = rbga.zzzz();
|
||||
m_env.c.rb = c.xxxx();
|
||||
m_env.c.ga = c.zzzz();
|
||||
}
|
||||
}
|
||||
|
||||
void GSDrawScanline::DrawScanline(int top, int left, int right, const GSVertexSW& v)
|
||||
{
|
||||
// TODO: call this directly from rasterizer
|
||||
|
||||
m_dsf(top, left, right, v);
|
||||
}
|
||||
|
||||
/*
|
||||
GSVector4i GSDrawScanline::Wrap(const GSVector4i& t)
|
||||
{
|
||||
GSVector4i clamp = t.sat_i16(m_env.t.min, m_env.t.max);
|
||||
|
@ -1133,7 +1136,7 @@ void GSDrawScanline::DrawScanlineEx(int top, int left, int right, const GSVertex
|
|||
|
||||
GSVector4i rb, ga;
|
||||
|
||||
if(tfx == TFX_NONE && fge == 0 && abea != 1 && abeb != 1 && abec != 1 && abea != abeb)
|
||||
if(!iip && tfx == TFX_NONE && !fge && abea != 1 && abeb != 1 && abec != 1 && abea != abeb)
|
||||
{
|
||||
c[0] = m_env.c2.rb;
|
||||
c[1] = m_env.c2.ga;
|
||||
|
@ -1236,15 +1239,9 @@ void GSDrawScanline::DrawScanlineEx(int top, int left, int right, const GSVertex
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
*/
|
||||
void GSDrawScanline::DrawSolidRect(const GSVector4i& r, const GSVertexSW& v)
|
||||
{
|
||||
/*
|
||||
static FILE* s_fp = NULL;
|
||||
if(!s_fp) s_fp = fopen("c:\\log2.txt", "w");
|
||||
__int64 start = __rdtsc();
|
||||
int size = (r.z - r.x) * (r.w - r.y);
|
||||
*/
|
||||
ASSERT(r.y >= 0);
|
||||
ASSERT(r.w >= 0);
|
||||
|
||||
|
@ -1318,10 +1315,6 @@ int size = (r.z - r.x) * (r.w - r.y);
|
|||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
__int64 stop = __rdtsc();
|
||||
fprintf(s_fp, "%I64d => %I64d = %I64d (%d,%d - %d,%d) %d\n", start, stop, stop - start, r.x, r.y, r.z, r.w, size);
|
||||
*/
|
||||
}
|
||||
|
||||
template<class T, bool masked>
|
||||
|
@ -1396,7 +1389,7 @@ void GSDrawScanline::FillBlock(const GSVector4i* row, int* col, const GSVector4i
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
//
|
||||
|
||||
GSDrawScanline::GSDrawScanlineMap::GSDrawScanlineMap()
|
||||
|
@ -2736,11 +2729,11 @@ GSDrawScanline::GSDrawScanlineMap::GSDrawScanlineMap()
|
|||
#endif
|
||||
}
|
||||
|
||||
IDrawScanline::DrawScanlinePtr GSDrawScanline::GSDrawScanlineMap::GetDefaultFunction(DWORD dw)
|
||||
IDrawScanline::DrawScanlinePtr GSDrawScanline::GSDrawScanlineMap::GetDefaultFunction(DWORD key)
|
||||
{
|
||||
GSScanlineSelector sel;
|
||||
|
||||
sel.dw = dw;
|
||||
sel.key = key;
|
||||
|
||||
return m_default[sel.fpsm][sel.zpsm][sel.ztst][sel.iip];
|
||||
}
|
||||
|
@ -2755,19 +2748,19 @@ void GSDrawScanline::GSDrawScanlineMap::PrintStats()
|
|||
|
||||
while(pos)
|
||||
{
|
||||
DWORD dw;
|
||||
DWORD key;
|
||||
ActivePtr* p;
|
||||
|
||||
m_map_active.GetNextAssoc(pos, dw, p);
|
||||
m_map_active.GetNextAssoc(pos, key, p);
|
||||
|
||||
if(m_map.Lookup(dw))
|
||||
if(m_map.Lookup(key))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
GSScanlineSelector sel;
|
||||
|
||||
sel.dw = dw;
|
||||
sel.key = key;
|
||||
|
||||
if(p->frames > 30 && !sel.IsSolidRect())
|
||||
{
|
||||
|
@ -2775,7 +2768,7 @@ void GSDrawScanline::GSDrawScanlineMap::PrintStats()
|
|||
|
||||
if(tpf >= 500)
|
||||
{
|
||||
_ftprintf(fp, _T("InitDS_Sel(0x%08x); // %6.2f%%\n"), sel.dw, (float)tpf / 100);
|
||||
_ftprintf(fp, _T("InitDS_Sel(0x%08x); // %6.2f%%\n"), (DWORD)sel, (float)tpf / 100);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2785,7 +2778,7 @@ void GSDrawScanline::GSDrawScanlineMap::PrintStats()
|
|||
}
|
||||
|
||||
//
|
||||
|
||||
*/
|
||||
GSDrawScanline::GSSetupPrimMap::GSSetupPrimMap()
|
||||
{
|
||||
#define InitSP_IIP(zbe, fge, tme, fst, iip) \
|
||||
|
@ -2811,13 +2804,13 @@ GSDrawScanline::GSSetupPrimMap::GSSetupPrimMap()
|
|||
InitSP_ZBE(1);
|
||||
}
|
||||
|
||||
IDrawScanline::SetupPrimPtr GSDrawScanline::GSSetupPrimMap::GetDefaultFunction(DWORD dw)
|
||||
IDrawScanline::SetupPrimPtr GSDrawScanline::GSSetupPrimMap::GetDefaultFunction(DWORD key)
|
||||
{
|
||||
DWORD zbe = (dw >> 0) & 1;
|
||||
DWORD fge = (dw >> 1) & 1;
|
||||
DWORD tme = (dw >> 2) & 1;
|
||||
DWORD fst = (dw >> 3) & 1;
|
||||
DWORD iip = (dw >> 4) & 1;
|
||||
DWORD zbe = (key >> 0) & 1;
|
||||
DWORD fge = (key >> 1) & 1;
|
||||
DWORD tme = (key >> 2) & 1;
|
||||
DWORD fst = (key >> 3) & 1;
|
||||
DWORD iip = (key >> 4) & 1;
|
||||
|
||||
return m_default[zbe][fge][tme][fst][iip];
|
||||
}
|
||||
|
@ -2831,7 +2824,7 @@ const GSVector4 GSDrawScanline::m_shift[4] =
|
|||
GSVector4(-2.0f, -1.0f, 0.0f, 1.0f),
|
||||
GSVector4(-3.0f, -2.0f, -1.0f, 0.0f),
|
||||
};
|
||||
|
||||
/*
|
||||
const GSVector4i GSDrawScanline::m_test[8] =
|
||||
{
|
||||
GSVector4i::zero(),
|
||||
|
@ -2843,3 +2836,4 @@ const GSVector4i GSDrawScanline::m_test[8] =
|
|||
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff),
|
||||
GSVector4i::zero(),
|
||||
};
|
||||
*/
|
||||
|
|
|
@ -23,150 +23,61 @@
|
|||
|
||||
#include "GSState.h"
|
||||
#include "GSRasterizer.h"
|
||||
#include "GSScanlineEnvironment.h"
|
||||
#include "GSDrawScanlineCodeGenerator.h"
|
||||
#include "GSAlignedClass.h"
|
||||
|
||||
union GSScanlineSelector
|
||||
{
|
||||
struct
|
||||
{
|
||||
DWORD fpsm:2; // 0
|
||||
DWORD zpsm:2; // 2
|
||||
DWORD ztst:2; // 4 (0: off, 1: write, 2: test (ge), 3: test (g))
|
||||
DWORD atst:3; // 6
|
||||
DWORD afail:2; // 9
|
||||
DWORD iip:1; // 11
|
||||
DWORD tfx:3; // 12
|
||||
DWORD tcc:1; // 15
|
||||
DWORD fst:1; // 16
|
||||
DWORD ltf:1; // 17
|
||||
DWORD tlu:1; // 18
|
||||
DWORD fge:1; // 19
|
||||
DWORD date:1; // 20
|
||||
DWORD abea:2; // 21
|
||||
DWORD abeb:2; // 23
|
||||
DWORD abec:2; // 25
|
||||
DWORD abed:2; // 27
|
||||
DWORD pabe:1; // 29
|
||||
DWORD rfb:1; // 30
|
||||
DWORD sprite:1; // 31
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
DWORD _pad1:21;
|
||||
DWORD abe:8;
|
||||
DWORD _pad2:3;
|
||||
};
|
||||
|
||||
DWORD dw;
|
||||
|
||||
operator DWORD() {return dw;}
|
||||
|
||||
bool IsSolidRect()
|
||||
{
|
||||
return sprite
|
||||
&& iip == 0
|
||||
&& tfx == TFX_NONE
|
||||
&& abe == 255
|
||||
&& ztst <= 1
|
||||
&& atst <= 1
|
||||
&& date == 0
|
||||
&& fge == 0;
|
||||
}
|
||||
};
|
||||
|
||||
__declspec(align(16)) struct GSScanlineEnvironment
|
||||
{
|
||||
GSScanlineSelector sel;
|
||||
|
||||
void* vm;
|
||||
const void* tex;
|
||||
const DWORD* clut;
|
||||
DWORD tw;
|
||||
|
||||
GSVector4i* fbr;
|
||||
GSVector4i* zbr;
|
||||
int** fbc;
|
||||
int** zbc;
|
||||
GSVector2i* fzbr;
|
||||
GSVector2i* fzbc;
|
||||
|
||||
GSVector4i fm, zm;
|
||||
struct {GSVector4i min, max, mask;} t; // [u] x 4 [v] x 4
|
||||
GSVector4i datm;
|
||||
GSVector4i colclamp;
|
||||
GSVector4i fba;
|
||||
GSVector4i aref;
|
||||
GSVector4i afix, afix2;
|
||||
GSVector4i frb, fga;
|
||||
|
||||
struct {GSVector4 z, s, t, q; GSVector4i rb, ga, f, si, ti, _pad[3];} d[4];
|
||||
struct {GSVector4 z, stq; GSVector4i c, f, st;} d4;
|
||||
struct {GSVector4i rb, ga;} c;
|
||||
struct {GSVector4i z, f;} p;
|
||||
struct {GSVector4i rb, ga;} c2;
|
||||
};
|
||||
|
||||
__declspec(align(16)) struct GSScanlineParam
|
||||
{
|
||||
GSScanlineSelector sel;
|
||||
|
||||
void* vm;
|
||||
const void* tex;
|
||||
const DWORD* clut;
|
||||
DWORD tw;
|
||||
|
||||
GSLocalMemory::Offset* fbo;
|
||||
GSLocalMemory::Offset* zbo;
|
||||
GSLocalMemory::Offset4* fzbo;
|
||||
|
||||
DWORD fm, zm;
|
||||
};
|
||||
|
||||
class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline
|
||||
{
|
||||
GSScanlineEnvironment m_env;
|
||||
|
||||
static const GSVector4 m_shift[4];
|
||||
static const GSVector4i m_test[8];
|
||||
/* static const GSVector4i m_test[8];
|
||||
|
||||
//
|
||||
|
||||
class GSDrawScanlineMap : public GSFunctionMap<DrawScanlinePtr>
|
||||
class GSDrawScanlineMap : public GSFunctionMap<DWORD, DrawScanlinePtr>
|
||||
{
|
||||
DrawScanlinePtr m_default[4][4][4][2];
|
||||
|
||||
public:
|
||||
GSDrawScanlineMap();
|
||||
|
||||
DrawScanlinePtr GetDefaultFunction(DWORD dw);
|
||||
DrawScanlinePtr GetDefaultFunction(DWORD key);
|
||||
|
||||
void PrintStats();
|
||||
};
|
||||
|
||||
GSDrawScanlineMap m_ds;
|
||||
|
||||
*/
|
||||
//
|
||||
|
||||
class GSSetupPrimMap : public GSFunctionMap<SetupPrimPtr>
|
||||
class GSSetupPrimMap : public GSFunctionMap<DWORD, SetupPrimPtr>
|
||||
{
|
||||
SetupPrimPtr m_default[2][2][2][2][2];
|
||||
|
||||
public:
|
||||
GSSetupPrimMap();
|
||||
|
||||
SetupPrimPtr GetDefaultFunction(DWORD dw);
|
||||
SetupPrimPtr GetDefaultFunction(DWORD key);
|
||||
};
|
||||
|
||||
GSSetupPrimMap m_sp;
|
||||
|
||||
//
|
||||
|
||||
template<DWORD zbe, DWORD fge, DWORD tme, DWORD fst, DWORD iip>
|
||||
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan);
|
||||
|
||||
//
|
||||
|
||||
CRBMap<UINT64, GSDrawScanlineCodeGenerator*> m_dscg;
|
||||
|
||||
DrawScanlineStaticPtr m_dsf;
|
||||
|
||||
void DrawScanline(int top, int left, int right, const GSVertexSW& v);
|
||||
|
||||
/*
|
||||
//
|
||||
|
||||
__forceinline GSVector4i Wrap(const GSVector4i& t);
|
||||
|
||||
__forceinline void SampleTexture(DWORD ltf, DWORD tlu, const GSVector4i& u, const GSVector4i& v, GSVector4i* c);
|
||||
|
@ -187,7 +98,7 @@ class GSDrawScanline : public GSAlignedClass<16>, public IDrawScanline
|
|||
|
||||
template<DWORD sel>
|
||||
void DrawScanlineEx(int top, int left, int right, const GSVertexSW& v);
|
||||
|
||||
*/
|
||||
//
|
||||
|
||||
void DrawSolidRect(const GSVector4i& r, const GSVertexSW& v);
|
||||
|
@ -213,5 +124,5 @@ public:
|
|||
|
||||
void BeginDraw(const GSRasterizerData* data, Functions* f);
|
||||
void EndDraw(const GSRasterizerStats& stats);
|
||||
void PrintStats() {m_ds.PrintStats();}
|
||||
void PrintStats() {/*m_ds.PrintStats();*/}
|
||||
};
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "GSScanlineEnvironment.h"
|
||||
#include "xbyak/xbyak.h"
|
||||
#include "xbyak/xbyak_util.h"
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
class GSDrawScanlineCodeGenerator : public CodeGenerator
|
||||
{
|
||||
void operator = (const GSDrawScanlineCodeGenerator&);
|
||||
|
||||
static const GSVector4i m_test[8];
|
||||
|
||||
util::Cpu m_cpu;
|
||||
|
||||
GSScanlineEnvironment& m_env;
|
||||
|
||||
void Generate();
|
||||
|
||||
void Init(int params);
|
||||
void Step();
|
||||
void TestZ(const Xmm& temp1, const Xmm& temp2);
|
||||
void SampleTexture();
|
||||
void AlphaTFX();
|
||||
void TestAlpha();
|
||||
void ColorTFX();
|
||||
void Fog();
|
||||
void ReadFrame();
|
||||
void TestDestAlpha();
|
||||
void WriteZBuf();
|
||||
void AlphaBlend();
|
||||
void WriteFrame(int params);
|
||||
|
||||
void ReadPixel(const Xmm& dst, const Reg32& addr);
|
||||
void WritePixel(const Xmm& src, const Xmm& temp, const Reg32& addr, uint8 i, int psm);
|
||||
void ReadTexel(const Xmm& dst, const Xmm& addr, const Reg32& base, const Xmm& temp1, const Xmm& temp2);
|
||||
void ReadTexel(const Xmm& dst, const Xmm& addr, const Reg32& base, uint8 i);
|
||||
void Wrap(const Xmm& uv, const Xmm& temp);
|
||||
|
||||
template<int shift> void modulate16(const Xmm& a, const Operand& f);
|
||||
template<int shift> void lerp16(const Xmm& a, const Xmm& b, const Xmm& f);
|
||||
void mix16(const Xmm& a, const Xmm& b, const Xmm& temp);
|
||||
void clamp16(const Xmm& a, const Xmm& temp);
|
||||
void alltrue(const Xmm& a, const Reg32& temp, LPCTSTR label);
|
||||
void blend8(const Xmm& a, const Xmm& b);
|
||||
void blend(const Xmm& a, const Xmm& b, const Xmm& mask);
|
||||
void blend8r(const Xmm& b, const Xmm& a);
|
||||
void blendr(const Xmm& b, const Xmm& a, const Xmm& mask);
|
||||
|
||||
public:
|
||||
GSDrawScanlineCodeGenerator(GSScanlineEnvironment& env);
|
||||
};
|
|
@ -71,6 +71,22 @@ public:
|
|||
|
||||
CTXT[0].Reset();
|
||||
CTXT[1].Reset();
|
||||
|
||||
memset(dimx, 0, sizeof(dimx));
|
||||
}
|
||||
|
||||
GSVector4i dimx[8];
|
||||
|
||||
void UpdateDIMX()
|
||||
{
|
||||
dimx[1] = GSVector4i(DIMX.DM00, 0, DIMX.DM01, 0, DIMX.DM02, 0, DIMX.DM03, 0);
|
||||
dimx[0] = dimx[1].xxzzlh();
|
||||
dimx[3] = GSVector4i(DIMX.DM10, 0, DIMX.DM11, 0, DIMX.DM12, 0, DIMX.DM13, 0),
|
||||
dimx[2] = dimx[2].xxzzlh();
|
||||
dimx[5] = GSVector4i(DIMX.DM20, 0, DIMX.DM21, 0, DIMX.DM22, 0, DIMX.DM23, 0),
|
||||
dimx[4] = dimx[4].xxzzlh();
|
||||
dimx[7] = GSVector4i(DIMX.DM30, 0, DIMX.DM31, 0, DIMX.DM32, 0, DIMX.DM33, 0),
|
||||
dimx[6] = dimx[7].xxzzlh();
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -40,21 +40,21 @@ struct GSRasterizerStats
|
|||
}
|
||||
};
|
||||
|
||||
template<class T> class GSFunctionMap
|
||||
template<class KEY, class VALUE> class GSFunctionMap
|
||||
{
|
||||
protected:
|
||||
struct ActivePtr
|
||||
{
|
||||
UINT64 frame, frames;
|
||||
__int64 ticks, pixels;
|
||||
T f;
|
||||
VALUE f;
|
||||
};
|
||||
|
||||
CRBMap<DWORD, T> m_map;
|
||||
CRBMap<DWORD, ActivePtr*> m_map_active;
|
||||
CRBMap<KEY, VALUE> m_map;
|
||||
CRBMap<KEY, ActivePtr*> m_map_active;
|
||||
ActivePtr* m_active;
|
||||
|
||||
virtual T GetDefaultFunction(DWORD sel) = 0;
|
||||
virtual VALUE GetDefaultFunction(KEY key) = 0;
|
||||
|
||||
public:
|
||||
GSFunctionMap()
|
||||
|
@ -74,18 +74,18 @@ public:
|
|||
m_map_active.RemoveAll();
|
||||
}
|
||||
|
||||
void SetAt(DWORD sel, T f)
|
||||
void SetAt(KEY key, VALUE f)
|
||||
{
|
||||
m_map.SetAt(sel, f);
|
||||
m_map.SetAt(key, f);
|
||||
}
|
||||
|
||||
T Lookup(DWORD sel)
|
||||
VALUE Lookup(KEY key)
|
||||
{
|
||||
m_active = NULL;
|
||||
|
||||
if(!m_map_active.Lookup(sel, m_active))
|
||||
if(!m_map_active.Lookup(key, m_active))
|
||||
{
|
||||
CRBMap<DWORD, T>::CPair* pair = m_map.Lookup(sel);
|
||||
CRBMap<KEY, VALUE>::CPair* pair = m_map.Lookup(key);
|
||||
|
||||
ActivePtr* p = new ActivePtr();
|
||||
|
||||
|
@ -93,9 +93,9 @@ public:
|
|||
|
||||
p->frame = (UINT64)-1;
|
||||
|
||||
p->f = pair ? pair->m_value : GetDefaultFunction(sel);
|
||||
p->f = pair ? pair->m_value : GetDefaultFunction(key);
|
||||
|
||||
m_map_active.SetAt(sel, p);
|
||||
m_map_active.SetAt(key, p);
|
||||
|
||||
m_active = p;
|
||||
}
|
||||
|
@ -138,10 +138,10 @@ public:
|
|||
|
||||
while(pos)
|
||||
{
|
||||
DWORD sel;
|
||||
KEY key;
|
||||
ActivePtr* p;
|
||||
|
||||
m_map_active.GetNextAssoc(pos, sel, p);
|
||||
m_map_active.GetNextAssoc(pos, key, p);
|
||||
|
||||
if(p->frames > 0)
|
||||
{
|
||||
|
@ -150,7 +150,7 @@ public:
|
|||
__int64 ppf = p->frames > 0 ? p->pixels / p->frames : 0;
|
||||
|
||||
printf("[%08x]%c %6.2f%% | %5.2f%% | f %4I64d | p %10I64d | tpp %4I64d | tpf %9I64d | ppf %7I64d\n",
|
||||
sel, !m_map.Lookup(sel) ? '*' : ' ',
|
||||
key, !m_map.Lookup(key) ? '*' : ' ',
|
||||
(float)(tpf * 10000 / 50000000) / 100,
|
||||
(float)(tpf * 10000 / ttpf) / 100,
|
||||
p->frames, p->pixels,
|
||||
|
|
|
@ -54,6 +54,7 @@ public:
|
|||
typedef void (IDrawScanline::*DrawScanlinePtr)(int top, int left, int right, const GSVertexSW& v);
|
||||
typedef void (IDrawScanline::*DrawSolidRectPtr)(const GSVector4i& r, const GSVertexSW& v);
|
||||
typedef void (IDrawScanline::*SetupPrimPtr)(const GSVertexSW* vertices, const GSVertexSW& dscan);
|
||||
typedef void (*DrawScanlineStaticPtr)(int top, int left, int right, const GSVertexSW& v);
|
||||
|
||||
struct Functions
|
||||
{
|
||||
|
|
|
@ -256,7 +256,7 @@ protected:
|
|||
p.zbo = m_mem.GetOffset(context->ZBUF.Block(), context->FRAME.FBW, context->ZBUF.PSM);
|
||||
p.fzbo = m_mem.GetOffset4(context->FRAME, context->ZBUF);
|
||||
|
||||
p.sel.dw = 0;
|
||||
p.sel.key = 0;
|
||||
|
||||
p.sel.fpsm = 3;
|
||||
p.sel.zpsm = 3;
|
||||
|
@ -291,6 +291,9 @@ protected:
|
|||
bool fwrite = p.fm != 0xffffffff;
|
||||
bool ftest = p.sel.atst != ATST_ALWAYS || context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24;
|
||||
|
||||
p.sel.fwrite = fwrite;
|
||||
p.sel.ftest = ftest;
|
||||
|
||||
if(fwrite || ftest)
|
||||
{
|
||||
p.sel.fpsm = GSUtil::EncodePSM(context->FRAME.PSM);
|
||||
|
@ -307,6 +310,8 @@ protected:
|
|||
p.sel.fst = PRIM->FST;
|
||||
p.sel.ltf = context->TEX1.IsLinear();
|
||||
p.sel.tlu = GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0;
|
||||
p.sel.wms = ((context->CLAMP.WMS + 1) >> 1) & 1;
|
||||
p.sel.wmt = ((context->CLAMP.WMT + 1) >> 1) & 1;
|
||||
|
||||
if(p.sel.iip == 0 && p.sel.tfx == TFX_MODULATE && p.sel.tcc)
|
||||
{
|
||||
|
@ -466,11 +471,18 @@ protected:
|
|||
{
|
||||
p.sel.rfb = 1;
|
||||
}
|
||||
|
||||
p.sel.colclamp = env.COLCLAMP.CLAMP;
|
||||
p.sel.fba = context->FBA.FBA;
|
||||
p.sel.dthe = env.DTHE.DTHE;
|
||||
}
|
||||
|
||||
bool zwrite = p.zm != 0xffffffff;
|
||||
bool ztest = context->TEST.ZTE && context->TEST.ZTST > 1;
|
||||
|
||||
p.sel.zwrite = zwrite;
|
||||
p.sel.ztest = ztest;
|
||||
|
||||
if(zwrite || ztest)
|
||||
{
|
||||
p.sel.zpsm = GSUtil::EncodePSM(context->ZBUF.PSM);
|
||||
|
|
|
@ -0,0 +1,144 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "GSLocalMemory.h"
|
||||
#include "GSVector.h"
|
||||
|
||||
union GSScanlineSelector
|
||||
{
|
||||
struct
|
||||
{
|
||||
DWORD fpsm:2; // 0
|
||||
DWORD zpsm:2; // 2
|
||||
DWORD ztst:2; // 4 (0: off, 1: write, 2: test (ge), 3: test (g))
|
||||
DWORD atst:3; // 6
|
||||
DWORD afail:2; // 9
|
||||
DWORD iip:1; // 11
|
||||
DWORD tfx:3; // 12
|
||||
DWORD tcc:1; // 15
|
||||
DWORD fst:1; // 16
|
||||
DWORD ltf:1; // 17
|
||||
DWORD tlu:1; // 18
|
||||
DWORD fge:1; // 19
|
||||
DWORD date:1; // 20
|
||||
DWORD abea:2; // 21
|
||||
DWORD abeb:2; // 23
|
||||
DWORD abec:2; // 25
|
||||
DWORD abed:2; // 27
|
||||
DWORD pabe:1; // 29
|
||||
DWORD rfb:1; // 30
|
||||
DWORD sprite:1; // 31
|
||||
|
||||
DWORD fwrite:1; // 32
|
||||
DWORD ftest:1; // 33
|
||||
DWORD zwrite:1; // 34
|
||||
DWORD ztest:1; // 35
|
||||
DWORD wms:1; // 36 (0: repeat, 1: clamp)
|
||||
DWORD wmt:1; // 37
|
||||
DWORD colclamp:1; // 38
|
||||
DWORD fba:1; // 39
|
||||
DWORD dthe:1; // 40
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
DWORD _pad1:21;
|
||||
DWORD abe:8;
|
||||
DWORD _pad2:3;
|
||||
DWORD fb:2;
|
||||
DWORD zb:2;
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
DWORD lo;
|
||||
DWORD hi;
|
||||
};
|
||||
|
||||
UINT64 key;
|
||||
|
||||
operator DWORD() {return lo;}
|
||||
operator UINT64() {return key;}
|
||||
|
||||
bool IsSolidRect()
|
||||
{
|
||||
return sprite
|
||||
&& iip == 0
|
||||
&& tfx == TFX_NONE
|
||||
&& abe == 255
|
||||
&& ztst <= 1
|
||||
&& atst <= 1
|
||||
&& date == 0
|
||||
&& fge == 0;
|
||||
}
|
||||
};
|
||||
|
||||
__declspec(align(16)) struct GSScanlineParam
|
||||
{
|
||||
GSScanlineSelector sel;
|
||||
|
||||
void* vm;
|
||||
const void* tex;
|
||||
const DWORD* clut;
|
||||
DWORD tw;
|
||||
|
||||
GSLocalMemory::Offset* fbo;
|
||||
GSLocalMemory::Offset* zbo;
|
||||
GSLocalMemory::Offset4* fzbo;
|
||||
|
||||
DWORD fm, zm;
|
||||
};
|
||||
|
||||
__declspec(align(16)) struct GSScanlineEnvironment
|
||||
{
|
||||
GSScanlineSelector sel;
|
||||
|
||||
void* vm;
|
||||
const void* tex;
|
||||
const DWORD* clut;
|
||||
DWORD tw;
|
||||
|
||||
GSVector4i* fbr;
|
||||
GSVector4i* zbr;
|
||||
int** fbc;
|
||||
int** zbc;
|
||||
GSVector2i* fzbr;
|
||||
GSVector2i* fzbc;
|
||||
|
||||
GSVector4i* dimx;
|
||||
|
||||
GSVector4i fm, zm;
|
||||
struct {GSVector4i min, max, mask;} t; // [u] x 4 [v] x 4
|
||||
GSVector4i datm;
|
||||
GSVector4i colclamp;
|
||||
GSVector4i fba;
|
||||
GSVector4i aref;
|
||||
GSVector4i afix, afix2;
|
||||
GSVector4i frb, fga;
|
||||
|
||||
struct {GSVector4 z, s, t, q; GSVector4i rb, ga, f, si, ti, _pad[7];} d[4];
|
||||
struct {GSVector4 z, stq; GSVector4i c, f, st;} d4;
|
||||
struct {GSVector4i rb, ga;} c;
|
||||
struct {GSVector4i z, f;} p;
|
||||
struct {GSVector4i z, f, s, t, q, rb, ga, zs, zd, uf, vf;} temp;
|
||||
};
|
|
@ -798,12 +798,21 @@ template<int i> void GSState::GIFRegHandlerALPHA(GIFReg* r)
|
|||
|
||||
void GSState::GIFRegHandlerDIMX(GIFReg* r)
|
||||
{
|
||||
bool update = false;
|
||||
|
||||
if(!(m_env.DIMX == (GSVector4i)r->DIMX).alltrue())
|
||||
{
|
||||
Flush();
|
||||
|
||||
update = true;
|
||||
}
|
||||
|
||||
m_env.DIMX = (GSVector4i)r->DIMX;
|
||||
|
||||
if(update)
|
||||
{
|
||||
m_env.UpdateDIMX();
|
||||
}
|
||||
}
|
||||
|
||||
void GSState::GIFRegHandlerDTHE(GIFReg* r)
|
||||
|
@ -1573,6 +1582,8 @@ int GSState::Defrost(const GSFreezeData* fd)
|
|||
|
||||
UpdateVertexKick();
|
||||
|
||||
m_env.UpdateDIMX();
|
||||
|
||||
m_env.CTXT[0].UpdateScissor();
|
||||
m_env.CTXT[1].UpdateScissor();
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<?xml version="1.0" encoding="windows-1250"?>
|
||||
<VisualStudioProject
|
||||
ProjectType="Visual C++"
|
||||
Version="9,00"
|
||||
Version="9.00"
|
||||
Name="GSdx"
|
||||
ProjectGUID="{18E42F6F-3A62-41EE-B42F-79366C4F1E95}"
|
||||
RootNamespace="GSdx"
|
||||
|
@ -22,7 +22,7 @@
|
|||
<Configuration
|
||||
Name="Debug|Win32"
|
||||
ConfigurationType="2"
|
||||
InheritedPropertySheets="vsprops\common.vsprops;vsprops\debug.vsprops"
|
||||
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops"
|
||||
UseOfMFC="1"
|
||||
CharacterSet="2"
|
||||
>
|
||||
|
@ -86,7 +86,7 @@
|
|||
<Configuration
|
||||
Name="Debug|x64"
|
||||
ConfigurationType="2"
|
||||
InheritedPropertySheets="vsprops\common.vsprops;vsprops\debug.vsprops"
|
||||
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops"
|
||||
UseOfMFC="1"
|
||||
CharacterSet="2"
|
||||
>
|
||||
|
@ -150,7 +150,7 @@
|
|||
<Configuration
|
||||
Name="Release|Win32"
|
||||
ConfigurationType="2"
|
||||
InheritedPropertySheets="vsprops\common.vsprops;vsprops\release.vsprops"
|
||||
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops"
|
||||
UseOfMFC="1"
|
||||
CharacterSet="2"
|
||||
WholeProgramOptimization="1"
|
||||
|
@ -215,7 +215,7 @@
|
|||
<Configuration
|
||||
Name="Release|x64"
|
||||
ConfigurationType="2"
|
||||
InheritedPropertySheets="vsprops\common.vsprops;vsprops\release.vsprops"
|
||||
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops"
|
||||
UseOfMFC="1"
|
||||
CharacterSet="2"
|
||||
WholeProgramOptimization="1"
|
||||
|
@ -280,7 +280,7 @@
|
|||
<Configuration
|
||||
Name="Debug SSE2|Win32"
|
||||
ConfigurationType="2"
|
||||
InheritedPropertySheets="vsprops\common.vsprops;vsprops\debug.vsprops;vsprops\sse2.vsprops"
|
||||
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops;.\vsprops\sse2.vsprops"
|
||||
UseOfMFC="1"
|
||||
CharacterSet="2"
|
||||
>
|
||||
|
@ -344,7 +344,7 @@
|
|||
<Configuration
|
||||
Name="Debug SSE2|x64"
|
||||
ConfigurationType="2"
|
||||
InheritedPropertySheets="vsprops\common.vsprops;vsprops\debug.vsprops"
|
||||
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops"
|
||||
UseOfMFC="1"
|
||||
CharacterSet="2"
|
||||
WholeProgramOptimization="0"
|
||||
|
@ -407,7 +407,7 @@
|
|||
<Configuration
|
||||
Name="Release SSE2|Win32"
|
||||
ConfigurationType="2"
|
||||
InheritedPropertySheets="vsprops\common.vsprops;vsprops\release.vsprops;vsprops\sse2.vsprops"
|
||||
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops;.\vsprops\sse2.vsprops"
|
||||
UseOfMFC="1"
|
||||
CharacterSet="2"
|
||||
WholeProgramOptimization="1"
|
||||
|
@ -472,7 +472,7 @@
|
|||
<Configuration
|
||||
Name="Release SSE2|x64"
|
||||
ConfigurationType="2"
|
||||
InheritedPropertySheets="vsprops\common.vsprops;vsprops\release.vsprops"
|
||||
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops"
|
||||
UseOfMFC="1"
|
||||
CharacterSet="2"
|
||||
WholeProgramOptimization="1"
|
||||
|
@ -535,7 +535,7 @@
|
|||
<Configuration
|
||||
Name="Release SSSE3|Win32"
|
||||
ConfigurationType="2"
|
||||
InheritedPropertySheets="vsprops\common.vsprops;vsprops\release.vsprops;vsprops\ssse3.vsprops"
|
||||
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops;.\vsprops\ssse3.vsprops"
|
||||
UseOfMFC="1"
|
||||
CharacterSet="2"
|
||||
WholeProgramOptimization="1"
|
||||
|
@ -600,7 +600,7 @@
|
|||
<Configuration
|
||||
Name="Release SSSE3|x64"
|
||||
ConfigurationType="2"
|
||||
InheritedPropertySheets="vsprops\common.vsprops;vsprops\release.vsprops;vsprops\ssse3.vsprops"
|
||||
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops;.\vsprops\ssse3.vsprops"
|
||||
UseOfMFC="1"
|
||||
CharacterSet="2"
|
||||
WholeProgramOptimization="1"
|
||||
|
@ -663,7 +663,7 @@
|
|||
<Configuration
|
||||
Name="Debug SSSE3|Win32"
|
||||
ConfigurationType="2"
|
||||
InheritedPropertySheets="vsprops\common.vsprops;vsprops\debug.vsprops;vsprops\ssse3.vsprops"
|
||||
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops;.\vsprops\ssse3.vsprops"
|
||||
UseOfMFC="1"
|
||||
CharacterSet="2"
|
||||
>
|
||||
|
@ -727,7 +727,7 @@
|
|||
<Configuration
|
||||
Name="Debug SSSE3|x64"
|
||||
ConfigurationType="2"
|
||||
InheritedPropertySheets="vsprops\common.vsprops;vsprops\debug.vsprops;vsprops\ssse3.vsprops"
|
||||
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops;.\vsprops\ssse3.vsprops"
|
||||
UseOfMFC="1"
|
||||
CharacterSet="2"
|
||||
>
|
||||
|
@ -789,7 +789,7 @@
|
|||
<Configuration
|
||||
Name="Debug SSE4|Win32"
|
||||
ConfigurationType="2"
|
||||
InheritedPropertySheets="vsprops\common.vsprops;vsprops\debug.vsprops;vsprops\sse4.vsprops"
|
||||
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops;.\vsprops\sse4.vsprops"
|
||||
UseOfMFC="1"
|
||||
CharacterSet="2"
|
||||
>
|
||||
|
@ -853,7 +853,7 @@
|
|||
<Configuration
|
||||
Name="Debug SSE4|x64"
|
||||
ConfigurationType="2"
|
||||
InheritedPropertySheets="vsprops\common.vsprops;vsprops\debug.vsprops;vsprops\sse4.vsprops"
|
||||
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\debug.vsprops;.\vsprops\sse4.vsprops"
|
||||
UseOfMFC="1"
|
||||
CharacterSet="2"
|
||||
>
|
||||
|
@ -916,7 +916,7 @@
|
|||
<Configuration
|
||||
Name="Release SSE4|Win32"
|
||||
ConfigurationType="2"
|
||||
InheritedPropertySheets="vsprops\common.vsprops;vsprops\release.vsprops;vsprops\sse4.vsprops"
|
||||
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops;.\vsprops\sse4.vsprops"
|
||||
UseOfMFC="1"
|
||||
CharacterSet="2"
|
||||
WholeProgramOptimization="1"
|
||||
|
@ -981,7 +981,7 @@
|
|||
<Configuration
|
||||
Name="Release SSE4|x64"
|
||||
ConfigurationType="2"
|
||||
InheritedPropertySheets="vsprops\common.vsprops;vsprops\release.vsprops;vsprops\sse4.vsprops"
|
||||
InheritedPropertySheets=".\vsprops\common.vsprops;.\vsprops\release.vsprops;.\vsprops\sse4.vsprops"
|
||||
UseOfMFC="1"
|
||||
CharacterSet="2"
|
||||
WholeProgramOptimization="1"
|
||||
|
@ -1159,6 +1159,10 @@
|
|||
RelativePath=".\GSDrawScanline.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSDrawScanlineCodeGenerator.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSDump.cpp"
|
||||
>
|
||||
|
@ -1669,6 +1673,10 @@
|
|||
RelativePath=".\GSDrawScanline.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSDrawScanlineCodeGenerator.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSDump.h"
|
||||
>
|
||||
|
@ -1717,6 +1725,10 @@
|
|||
RelativePath=".\GSRendererSW.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSScanlineEnvironment.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSSetting.h"
|
||||
>
|
||||
|
@ -5044,6 +5056,26 @@
|
|||
>
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Xbyak"
|
||||
>
|
||||
<File
|
||||
RelativePath=".\xbyak\xbyak.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\xbyak\xbyak_bin2hex.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\xbyak\xbyak_mnemonic.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\xbyak\xbyak_util.h"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
</Files>
|
||||
<Globals>
|
||||
<Global
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,258 @@
|
|||
enum {
|
||||
B00000000= 0,
|
||||
B00000001= 1,
|
||||
B00000010= 2,
|
||||
B00000011= 3,
|
||||
B00000100= 4,
|
||||
B00000101= 5,
|
||||
B00000110= 6,
|
||||
B00000111= 7,
|
||||
B00001000= 8,
|
||||
B00001001= 9,
|
||||
B00001010= 10,
|
||||
B00001011= 11,
|
||||
B00001100= 12,
|
||||
B00001101= 13,
|
||||
B00001110= 14,
|
||||
B00001111= 15,
|
||||
B00010000= 16,
|
||||
B00010001= 17,
|
||||
B00010010= 18,
|
||||
B00010011= 19,
|
||||
B00010100= 20,
|
||||
B00010101= 21,
|
||||
B00010110= 22,
|
||||
B00010111= 23,
|
||||
B00011000= 24,
|
||||
B00011001= 25,
|
||||
B00011010= 26,
|
||||
B00011011= 27,
|
||||
B00011100= 28,
|
||||
B00011101= 29,
|
||||
B00011110= 30,
|
||||
B00011111= 31,
|
||||
B00100000= 32,
|
||||
B00100001= 33,
|
||||
B00100010= 34,
|
||||
B00100011= 35,
|
||||
B00100100= 36,
|
||||
B00100101= 37,
|
||||
B00100110= 38,
|
||||
B00100111= 39,
|
||||
B00101000= 40,
|
||||
B00101001= 41,
|
||||
B00101010= 42,
|
||||
B00101011= 43,
|
||||
B00101100= 44,
|
||||
B00101101= 45,
|
||||
B00101110= 46,
|
||||
B00101111= 47,
|
||||
B00110000= 48,
|
||||
B00110001= 49,
|
||||
B00110010= 50,
|
||||
B00110011= 51,
|
||||
B00110100= 52,
|
||||
B00110101= 53,
|
||||
B00110110= 54,
|
||||
B00110111= 55,
|
||||
B00111000= 56,
|
||||
B00111001= 57,
|
||||
B00111010= 58,
|
||||
B00111011= 59,
|
||||
B00111100= 60,
|
||||
B00111101= 61,
|
||||
B00111110= 62,
|
||||
B00111111= 63,
|
||||
B01000000= 64,
|
||||
B01000001= 65,
|
||||
B01000010= 66,
|
||||
B01000011= 67,
|
||||
B01000100= 68,
|
||||
B01000101= 69,
|
||||
B01000110= 70,
|
||||
B01000111= 71,
|
||||
B01001000= 72,
|
||||
B01001001= 73,
|
||||
B01001010= 74,
|
||||
B01001011= 75,
|
||||
B01001100= 76,
|
||||
B01001101= 77,
|
||||
B01001110= 78,
|
||||
B01001111= 79,
|
||||
B01010000= 80,
|
||||
B01010001= 81,
|
||||
B01010010= 82,
|
||||
B01010011= 83,
|
||||
B01010100= 84,
|
||||
B01010101= 85,
|
||||
B01010110= 86,
|
||||
B01010111= 87,
|
||||
B01011000= 88,
|
||||
B01011001= 89,
|
||||
B01011010= 90,
|
||||
B01011011= 91,
|
||||
B01011100= 92,
|
||||
B01011101= 93,
|
||||
B01011110= 94,
|
||||
B01011111= 95,
|
||||
B01100000= 96,
|
||||
B01100001= 97,
|
||||
B01100010= 98,
|
||||
B01100011= 99,
|
||||
B01100100= 100,
|
||||
B01100101= 101,
|
||||
B01100110= 102,
|
||||
B01100111= 103,
|
||||
B01101000= 104,
|
||||
B01101001= 105,
|
||||
B01101010= 106,
|
||||
B01101011= 107,
|
||||
B01101100= 108,
|
||||
B01101101= 109,
|
||||
B01101110= 110,
|
||||
B01101111= 111,
|
||||
B01110000= 112,
|
||||
B01110001= 113,
|
||||
B01110010= 114,
|
||||
B01110011= 115,
|
||||
B01110100= 116,
|
||||
B01110101= 117,
|
||||
B01110110= 118,
|
||||
B01110111= 119,
|
||||
B01111000= 120,
|
||||
B01111001= 121,
|
||||
B01111010= 122,
|
||||
B01111011= 123,
|
||||
B01111100= 124,
|
||||
B01111101= 125,
|
||||
B01111110= 126,
|
||||
B01111111= 127,
|
||||
B10000000= 128,
|
||||
B10000001= 129,
|
||||
B10000010= 130,
|
||||
B10000011= 131,
|
||||
B10000100= 132,
|
||||
B10000101= 133,
|
||||
B10000110= 134,
|
||||
B10000111= 135,
|
||||
B10001000= 136,
|
||||
B10001001= 137,
|
||||
B10001010= 138,
|
||||
B10001011= 139,
|
||||
B10001100= 140,
|
||||
B10001101= 141,
|
||||
B10001110= 142,
|
||||
B10001111= 143,
|
||||
B10010000= 144,
|
||||
B10010001= 145,
|
||||
B10010010= 146,
|
||||
B10010011= 147,
|
||||
B10010100= 148,
|
||||
B10010101= 149,
|
||||
B10010110= 150,
|
||||
B10010111= 151,
|
||||
B10011000= 152,
|
||||
B10011001= 153,
|
||||
B10011010= 154,
|
||||
B10011011= 155,
|
||||
B10011100= 156,
|
||||
B10011101= 157,
|
||||
B10011110= 158,
|
||||
B10011111= 159,
|
||||
B10100000= 160,
|
||||
B10100001= 161,
|
||||
B10100010= 162,
|
||||
B10100011= 163,
|
||||
B10100100= 164,
|
||||
B10100101= 165,
|
||||
B10100110= 166,
|
||||
B10100111= 167,
|
||||
B10101000= 168,
|
||||
B10101001= 169,
|
||||
B10101010= 170,
|
||||
B10101011= 171,
|
||||
B10101100= 172,
|
||||
B10101101= 173,
|
||||
B10101110= 174,
|
||||
B10101111= 175,
|
||||
B10110000= 176,
|
||||
B10110001= 177,
|
||||
B10110010= 178,
|
||||
B10110011= 179,
|
||||
B10110100= 180,
|
||||
B10110101= 181,
|
||||
B10110110= 182,
|
||||
B10110111= 183,
|
||||
B10111000= 184,
|
||||
B10111001= 185,
|
||||
B10111010= 186,
|
||||
B10111011= 187,
|
||||
B10111100= 188,
|
||||
B10111101= 189,
|
||||
B10111110= 190,
|
||||
B10111111= 191,
|
||||
B11000000= 192,
|
||||
B11000001= 193,
|
||||
B11000010= 194,
|
||||
B11000011= 195,
|
||||
B11000100= 196,
|
||||
B11000101= 197,
|
||||
B11000110= 198,
|
||||
B11000111= 199,
|
||||
B11001000= 200,
|
||||
B11001001= 201,
|
||||
B11001010= 202,
|
||||
B11001011= 203,
|
||||
B11001100= 204,
|
||||
B11001101= 205,
|
||||
B11001110= 206,
|
||||
B11001111= 207,
|
||||
B11010000= 208,
|
||||
B11010001= 209,
|
||||
B11010010= 210,
|
||||
B11010011= 211,
|
||||
B11010100= 212,
|
||||
B11010101= 213,
|
||||
B11010110= 214,
|
||||
B11010111= 215,
|
||||
B11011000= 216,
|
||||
B11011001= 217,
|
||||
B11011010= 218,
|
||||
B11011011= 219,
|
||||
B11011100= 220,
|
||||
B11011101= 221,
|
||||
B11011110= 222,
|
||||
B11011111= 223,
|
||||
B11100000= 224,
|
||||
B11100001= 225,
|
||||
B11100010= 226,
|
||||
B11100011= 227,
|
||||
B11100100= 228,
|
||||
B11100101= 229,
|
||||
B11100110= 230,
|
||||
B11100111= 231,
|
||||
B11101000= 232,
|
||||
B11101001= 233,
|
||||
B11101010= 234,
|
||||
B11101011= 235,
|
||||
B11101100= 236,
|
||||
B11101101= 237,
|
||||
B11101110= 238,
|
||||
B11101111= 239,
|
||||
B11110000= 240,
|
||||
B11110001= 241,
|
||||
B11110010= 242,
|
||||
B11110011= 243,
|
||||
B11110100= 244,
|
||||
B11110101= 245,
|
||||
B11110110= 246,
|
||||
B11110111= 247,
|
||||
B11111000= 248,
|
||||
B11111001= 249,
|
||||
B11111010= 250,
|
||||
B11111011= 251,
|
||||
B11111100= 252,
|
||||
B11111101= 253,
|
||||
B11111110= 254,
|
||||
B11111111= 255
|
||||
};
|
|
@ -0,0 +1,429 @@
|
|||
const char *getVersionString() const { return "2.07"; }
|
||||
void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); }
|
||||
void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); }
|
||||
void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); }
|
||||
void pand(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDB); }
|
||||
void pandn(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDF); }
|
||||
void pmaddwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF5); }
|
||||
void pmulhuw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE4); }
|
||||
void pmulhw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE5); }
|
||||
void pmullw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD5); }
|
||||
void por(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEB); }
|
||||
void punpckhbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x68); }
|
||||
void punpckhwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x69); }
|
||||
void punpckhdq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6A); }
|
||||
void punpcklbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x60); }
|
||||
void punpcklwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x61); }
|
||||
void punpckldq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x62); }
|
||||
void pxor(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEF); }
|
||||
void pavgb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE0); }
|
||||
void pavgw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE3); }
|
||||
void pmaxsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEE); }
|
||||
void pmaxub(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDE); }
|
||||
void pminsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEA); }
|
||||
void pminub(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDA); }
|
||||
void psadbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF6); }
|
||||
void paddq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD4); }
|
||||
void pmuludq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF4); }
|
||||
void psubq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFB); }
|
||||
void paddb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFC); }
|
||||
void paddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFD); }
|
||||
void paddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFE); }
|
||||
void paddsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEC); }
|
||||
void paddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xED); }
|
||||
void paddusb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDC); }
|
||||
void paddusw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDD); }
|
||||
void pcmpeqb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x74); }
|
||||
void pcmpeqw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x75); }
|
||||
void pcmpeqd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x76); }
|
||||
void pcmpgtb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x64); }
|
||||
void pcmpgtw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x65); }
|
||||
void pcmpgtd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x66); }
|
||||
void psllw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF1); }
|
||||
void pslld(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF2); }
|
||||
void psllq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF3); }
|
||||
void psraw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE1); }
|
||||
void psrad(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE2); }
|
||||
void psrlw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD1); }
|
||||
void psrld(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD2); }
|
||||
void psrlq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD3); }
|
||||
void psubb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF8); }
|
||||
void psubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF9); }
|
||||
void psubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFA); }
|
||||
void psubsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE8); }
|
||||
void psubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE9); }
|
||||
void psubusb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD8); }
|
||||
void psubusw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD9); }
|
||||
void psllw(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 6); }
|
||||
void pslld(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 6); }
|
||||
void psllq(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x73, 6); }
|
||||
void psraw(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 4); }
|
||||
void psrad(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 4); }
|
||||
void psrlw(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 2); }
|
||||
void psrld(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 2); }
|
||||
void psrlq(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x73, 2); }
|
||||
void pslldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x73, 7); }
|
||||
void psrldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x73, 3); }
|
||||
void pshufw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0x00, imm8); }
|
||||
void pshuflw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0xF2, imm8); }
|
||||
void pshufhw(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0xF3, imm8); }
|
||||
void pshufd(const Mmx& mmx, const Operand& op, uint8 imm8) { opMMX(mmx, op, 0x70, 0x66, imm8); }
|
||||
void movdqa(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0x66); }
|
||||
void movdqa(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x7F); }
|
||||
void movdqu(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0xF3); }
|
||||
void movdqu(const Address& addr, const Xmm& xmm) { db(0xF3); opModM(addr, xmm, 0x0F, 0x7F); }
|
||||
void movaps(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, 0x100); }
|
||||
void movaps(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x29); }
|
||||
void movss(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0xF3); }
|
||||
void movss(const Address& addr, const Xmm& xmm) { db(0xF3); opModM(addr, xmm, 0x0F, 0x11); }
|
||||
void movups(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0x100); }
|
||||
void movups(const Address& addr, const Xmm& xmm) { opModM(addr, xmm, 0x0F, 0x11); }
|
||||
void movapd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, 0x66); }
|
||||
void movapd(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x29); }
|
||||
void movsd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0xF2); }
|
||||
void movsd(const Address& addr, const Xmm& xmm) { db(0xF2); opModM(addr, xmm, 0x0F, 0x11); }
|
||||
void movupd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, 0x66); }
|
||||
void movupd(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x11); }
|
||||
void addps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0x100, isXMM_XMMorMEM); }
|
||||
void addss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0xF3, isXMM_XMMorMEM); }
|
||||
void addpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0x66, isXMM_XMMorMEM); }
|
||||
void addsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x58, 0xF2, isXMM_XMMorMEM); }
|
||||
void andnps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x100, isXMM_XMMorMEM); }
|
||||
void andnpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x55, 0x66, isXMM_XMMorMEM); }
|
||||
void andps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x100, isXMM_XMMorMEM); }
|
||||
void andpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x54, 0x66, isXMM_XMMorMEM); }
|
||||
void cmpps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); }
|
||||
void cmpss(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); }
|
||||
void cmppd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); }
|
||||
void cmpsd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); }
|
||||
void divps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0x100, isXMM_XMMorMEM); }
|
||||
void divss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF3, isXMM_XMMorMEM); }
|
||||
void divpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0x66, isXMM_XMMorMEM); }
|
||||
void divsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF2, isXMM_XMMorMEM); }
|
||||
void maxps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x100, isXMM_XMMorMEM); }
|
||||
void maxss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0xF3, isXMM_XMMorMEM); }
|
||||
void maxpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0x66, isXMM_XMMorMEM); }
|
||||
void maxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5F, 0xF2, isXMM_XMMorMEM); }
|
||||
void minps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0x100, isXMM_XMMorMEM); }
|
||||
void minss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF3, isXMM_XMMorMEM); }
|
||||
void minpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0x66, isXMM_XMMorMEM); }
|
||||
void minsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5D, 0xF2, isXMM_XMMorMEM); }
|
||||
void mulps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0x100, isXMM_XMMorMEM); }
|
||||
void mulss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF3, isXMM_XMMorMEM); }
|
||||
void mulpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0x66, isXMM_XMMorMEM); }
|
||||
void mulsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x59, 0xF2, isXMM_XMMorMEM); }
|
||||
void orps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x100, isXMM_XMMorMEM); }
|
||||
void orpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x56, 0x66, isXMM_XMMorMEM); }
|
||||
void rcpps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x53, 0x100, isXMM_XMMorMEM); }
|
||||
void rcpss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x53, 0xF3, isXMM_XMMorMEM); }
|
||||
void rsqrtps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x52, 0x100, isXMM_XMMorMEM); }
|
||||
void rsqrtss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x52, 0xF3, isXMM_XMMorMEM); }
|
||||
void shufps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC6, 0x100, isXMM_XMMorMEM, imm8); }
|
||||
void shufpd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC6, 0x66, isXMM_XMMorMEM, imm8); }
|
||||
void sqrtps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0x100, isXMM_XMMorMEM); }
|
||||
void sqrtss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0xF3, isXMM_XMMorMEM); }
|
||||
void sqrtpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0x66, isXMM_XMMorMEM); }
|
||||
void sqrtsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x51, 0xF2, isXMM_XMMorMEM); }
|
||||
void subps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x100, isXMM_XMMorMEM); }
|
||||
void subss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF3, isXMM_XMMorMEM); }
|
||||
void subpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x66, isXMM_XMMorMEM); }
|
||||
void subsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF2, isXMM_XMMorMEM); }
|
||||
void unpckhps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x100, isXMM_XMMorMEM); }
|
||||
void unpckhpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM); }
|
||||
void unpcklps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x100, isXMM_XMMorMEM); }
|
||||
void unpcklpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM); }
|
||||
void xorps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x57, 0x100, isXMM_XMMorMEM); }
|
||||
void xorpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x57, 0x66, isXMM_XMMorMEM); }
|
||||
void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { db(0x66); opModR(reg1, reg2, 0x0F, 0xF7); }
|
||||
void movhlps(const Xmm& reg1, const Xmm& reg2) { opModR(reg1, reg2, 0x0F, 0x12); }
|
||||
void movlhps(const Xmm& reg1, const Xmm& reg2) { opModR(reg1, reg2, 0x0F, 0x16); }
|
||||
void punpckhqdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x6D, 0x66, isXMM_XMMorMEM); }
|
||||
void punpcklqdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x6C, 0x66, isXMM_XMMorMEM); }
|
||||
void comiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2F, 0x100, isXMM_XMMorMEM); }
|
||||
void ucomiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x100, isXMM_XMMorMEM); }
|
||||
void comisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2F, 0x66, isXMM_XMMorMEM); }
|
||||
void ucomisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x66, isXMM_XMMorMEM); }
|
||||
void cvtpd2ps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0x66, isXMM_XMMorMEM); }
|
||||
void cvtps2pd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0x100, isXMM_XMMorMEM); }
|
||||
void cvtsd2ss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0xF2, isXMM_XMMorMEM); }
|
||||
void cvtss2sd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5A, 0xF3, isXMM_XMMorMEM); }
|
||||
void cvtpd2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xE6, 0xF2, isXMM_XMMorMEM); }
|
||||
void cvttpd2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xE6, 0x66, isXMM_XMMorMEM); }
|
||||
void cvtdq2pd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xE6, 0xF3, isXMM_XMMorMEM); }
|
||||
void cvtps2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5B, 0x66, isXMM_XMMorMEM); }
|
||||
void cvttps2dq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5B, 0xF3, isXMM_XMMorMEM); }
|
||||
void cvtdq2ps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5B, 0x100, isXMM_XMMorMEM); }
|
||||
void addsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xD0, 0x66, isXMM_XMMorMEM); }
|
||||
void addsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xD0, 0xF2, isXMM_XMMorMEM); }
|
||||
void haddpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0x66, isXMM_XMMorMEM); }
|
||||
void haddps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0xF2, isXMM_XMMorMEM); }
|
||||
void hsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0x66, isXMM_XMMorMEM); }
|
||||
void hsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0xF2, isXMM_XMMorMEM); }
|
||||
void movddup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x12, 0xF2, isXMM_XMMorMEM); }
|
||||
void movshdup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x16, 0xF3, isXMM_XMMorMEM); }
|
||||
void movsldup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x12, 0xF3, isXMM_XMMorMEM); }
|
||||
void cvtpi2ps(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0x100, isXMM_MMXorMEM); }
|
||||
void cvtps2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0x100, isMMX_XMMorMEM); }
|
||||
void cvtsi2ss(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0xF3, isXMM_REG32orMEM); }
|
||||
void cvtss2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0xF3, isREG32_XMMorMEM); }
|
||||
void cvttps2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0x100, isMMX_XMMorMEM); }
|
||||
void cvttss2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0xF3, isREG32_XMMorMEM); }
|
||||
void cvtpi2pd(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0x66, isXMM_MMXorMEM); }
|
||||
void cvtpd2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0x66, isMMX_XMMorMEM); }
|
||||
void cvtsi2sd(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2A, 0xF2, isXMM_REG32orMEM); }
|
||||
void cvtsd2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2D, 0xF2, isREG32_XMMorMEM); }
|
||||
void cvttpd2pi(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0x66, isMMX_XMMorMEM); }
|
||||
void cvttsd2si(const Operand& reg, const Operand& op) { opGen(reg, op, 0x2C, 0xF2, isREG32_XMMorMEM); }
|
||||
void prefetcht0(const Address& addr) { opModM(addr, Reg32(1), 0x0F, B00011000); }
|
||||
void prefetcht1(const Address& addr) { opModM(addr, Reg32(2), 0x0F, B00011000); }
|
||||
void prefetcht2(const Address& addr) { opModM(addr, Reg32(3), 0x0F, B00011000); }
|
||||
void prefetchnta(const Address& addr) { opModM(addr, Reg32(0), 0x0F, B00011000); }
|
||||
void movhps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x16, 0x100); }
|
||||
void movlps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x12, 0x100); }
|
||||
void movhpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x16, 0x66); }
|
||||
void movlpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, 0x12, 0x66); }
|
||||
void cmovo(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 0); }
|
||||
void jo(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x70, 0x80, 0x0F); }
|
||||
void seto(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 0); }
|
||||
void cmovno(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 1); }
|
||||
void jno(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x71, 0x81, 0x0F); }
|
||||
void setno(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 1); }
|
||||
void cmovb(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); }
|
||||
void jb(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }
|
||||
void setb(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 2); }
|
||||
void cmovnae(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 2); }
|
||||
void jnae(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }
|
||||
void setnae(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 2); }
|
||||
void cmovnb(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); }
|
||||
void jnb(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }
|
||||
void setnb(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 3); }
|
||||
void cmovae(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 3); }
|
||||
void jae(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }
|
||||
void setae(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 3); }
|
||||
void cmove(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 4); }
|
||||
void je(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }
|
||||
void sete(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 4); }
|
||||
void cmovz(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 4); }
|
||||
void jz(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }
|
||||
void setz(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 4); }
|
||||
void cmovne(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 5); }
|
||||
void jne(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); }
|
||||
void setne(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 5); }
|
||||
void cmovnz(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 5); }
|
||||
void jnz(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); }
|
||||
void setnz(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 5); }
|
||||
void cmovbe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 6); }
|
||||
void jbe(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); }
|
||||
void setbe(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 6); }
|
||||
void cmovna(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 6); }
|
||||
void jna(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); }
|
||||
void setna(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 6); }
|
||||
void cmovnbe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 7); }
|
||||
void jnbe(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }
|
||||
void setnbe(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 7); }
|
||||
void cmova(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 7); }
|
||||
void ja(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }
|
||||
void seta(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 7); }
|
||||
void cmovs(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 8); }
|
||||
void js(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x78, 0x88, 0x0F); }
|
||||
void sets(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 8); }
|
||||
void cmovns(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 9); }
|
||||
void jns(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x79, 0x89, 0x0F); }
|
||||
void setns(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 9); }
|
||||
void cmovp(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 10); }
|
||||
void jp(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); }
|
||||
void setp(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 10); }
|
||||
void cmovpe(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 10); }
|
||||
void jpe(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); }
|
||||
void setpe(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 10); }
|
||||
void cmovnp(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 11); }
|
||||
void jnp(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); }
|
||||
void setnp(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 11); }
|
||||
void cmovpo(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 11); }
|
||||
void jpo(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); }
|
||||
void setpo(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 11); }
|
||||
void cmovl(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 12); }
|
||||
void jl(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); }
|
||||
void setl(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 12); }
|
||||
void cmovnge(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 12); }
|
||||
void jnge(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); }
|
||||
void setnge(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 12); }
|
||||
void cmovnl(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 13); }
|
||||
void jnl(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); }
|
||||
void setnl(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 13); }
|
||||
void cmovge(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 13); }
|
||||
void jge(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); }
|
||||
void setge(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 13); }
|
||||
void cmovle(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 14); }
|
||||
void jle(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); }
|
||||
void setle(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 14); }
|
||||
void cmovng(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 14); }
|
||||
void jng(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); }
|
||||
void setng(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 14); }
|
||||
void cmovnle(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 15); }
|
||||
void jnle(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }
|
||||
void setnle(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 15); }
|
||||
void cmovg(const Reg32e& reg, const Operand& op) { opModRM(reg, op, op.isREG(i32e), op.isMEM(), 0x0F, B01000000 | 15); }
|
||||
void jg(const char *label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }
|
||||
void setg(const Operand& op) { opR_ModM(op, 8, 3, 0, 0x0F, B10010000 | 15); }
|
||||
#ifdef XBYAK64
|
||||
void cdqe() { db(0x48); db(0x98); }
|
||||
#else
|
||||
void aaa() { db(0x37); }
|
||||
void aad() { db(0xD5); db(0x0A); }
|
||||
void aam() { db(0xD4); db(0x0A); }
|
||||
void aas() { db(0x3F); }
|
||||
void daa() { db(0x27); }
|
||||
void das() { db(0x2F); }
|
||||
void popad() { db(0x61); }
|
||||
void popfd() { db(0x9D); }
|
||||
void pusha() { db(0x60); }
|
||||
void pushad() { db(0x60); }
|
||||
void pushfd() { db(0x9C); }
|
||||
void popa() { db(0x61); }
|
||||
#endif
|
||||
void cbw() { db(0x66); db(0x98); }
|
||||
void cdq() { db(0x99); }
|
||||
void clc() { db(0xF8); }
|
||||
void cld() { db(0xFC); }
|
||||
void cli() { db(0xFA); }
|
||||
void cmc() { db(0xF5); }
|
||||
void cpuid() { db(0x0F); db(0xA2); }
|
||||
void cwd() { db(0x66); db(0x99); }
|
||||
void cwde() { db(0x98); }
|
||||
void lahf() { db(0x9F); }
|
||||
void lock() { db(0xF0); }
|
||||
void nop() { db(0x90); }
|
||||
void sahf() { db(0x9E); }
|
||||
void stc() { db(0xF9); }
|
||||
void std() { db(0xFD); }
|
||||
void sti() { db(0xFB); }
|
||||
void emms() { db(0x0F); db(0x77); }
|
||||
void pause() { db(0xF3); db(0x90); }
|
||||
void sfence() { db(0x0F); db(0xAE); db(0xF8); }
|
||||
void lfence() { db(0x0F); db(0xAE); db(0xE8); }
|
||||
void mfence() { db(0x0F); db(0xAE); db(0xF0); }
|
||||
void monitor() { db(0x0F); db(0x01); db(0xC8); }
|
||||
void mwait() { db(0x0F); db(0x01); db(0xC9); }
|
||||
void rdmsr() { db(0x0F); db(0x32); }
|
||||
void rdpmc() { db(0x0F); db(0x33); }
|
||||
void rdtsc() { db(0x0F); db(0x31); }
|
||||
void wait() { db(0x9B); }
|
||||
void wbinvd() { db(0x0F); db(0x09); }
|
||||
void wrmsr() { db(0x0F); db(0x30); }
|
||||
void xlatb() { db(0xD7); }
|
||||
void popf() { db(0x9D); }
|
||||
void pushf() { db(0x9C); }
|
||||
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
||||
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
|
||||
void add(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x00); }
|
||||
void add(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x00, 0); }
|
||||
void and(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x20); }
|
||||
void and(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x20, 4); }
|
||||
void cmp(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x38); }
|
||||
void cmp(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x38, 7); }
|
||||
void or(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x08); }
|
||||
void or(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x08, 1); }
|
||||
void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); }
|
||||
void sbb(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x18, 3); }
|
||||
void sub(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x28); }
|
||||
void sub(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x28, 5); }
|
||||
void xor(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x30); }
|
||||
void xor(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x30, 6); }
|
||||
void dec(const Operand& op) { opIncDec(op, 0x48, 1); }
|
||||
void inc(const Operand& op) { opIncDec(op, 0x40, 0); }
|
||||
void div(const Operand& op) { opR_ModM(op, 0, 3, 6, 0xF6); }
|
||||
void idiv(const Operand& op) { opR_ModM(op, 0, 3, 7, 0xF6); }
|
||||
void imul(const Operand& op) { opR_ModM(op, 0, 3, 5, 0xF6); }
|
||||
void mul(const Operand& op) { opR_ModM(op, 0, 3, 4, 0xF6); }
|
||||
void neg(const Operand& op) { opR_ModM(op, 0, 3, 3, 0xF6); }
|
||||
void not(const Operand& op) { opR_ModM(op, 0, 3, 2, 0xF6); }
|
||||
void rcl(const Operand& op, int imm) { opShift(op, imm, 2); }
|
||||
void rcl(const Operand& op, const Reg8& cl) { opShift(op, cl, 2); }
|
||||
void rcr(const Operand& op, int imm) { opShift(op, imm, 3); }
|
||||
void rcr(const Operand& op, const Reg8& cl) { opShift(op, cl, 3); }
|
||||
void rol(const Operand& op, int imm) { opShift(op, imm, 0); }
|
||||
void rol(const Operand& op, const Reg8& cl) { opShift(op, cl, 0); }
|
||||
void ror(const Operand& op, int imm) { opShift(op, imm, 1); }
|
||||
void ror(const Operand& op, const Reg8& cl) { opShift(op, cl, 1); }
|
||||
void sar(const Operand& op, int imm) { opShift(op, imm, 7); }
|
||||
void sar(const Operand& op, const Reg8& cl) { opShift(op, cl, 7); }
|
||||
void shl(const Operand& op, int imm) { opShift(op, imm, 4); }
|
||||
void shl(const Operand& op, const Reg8& cl) { opShift(op, cl, 4); }
|
||||
void shr(const Operand& op, int imm) { opShift(op, imm, 5); }
|
||||
void shr(const Operand& op, const Reg8& cl) { opShift(op, cl, 5); }
|
||||
void sal(const Operand& op, int imm) { opShift(op, imm, 4); }
|
||||
void sal(const Operand& op, const Reg8& cl) { opShift(op, cl, 4); }
|
||||
void shld(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0xA4); }
|
||||
void shld(const Operand& op, const Reg& reg, const Reg8& cl) { opShxd(op, reg, 0, 0xA4, &cl); }
|
||||
void shrd(const Operand& op, const Reg& reg, uint8 imm) { opShxd(op, reg, imm, 0xAC); }
|
||||
void shrd(const Operand& op, const Reg& reg, const Reg8& cl) { opShxd(op, reg, 0, 0xAC, &cl); }
|
||||
void bsf(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBC); }
|
||||
void bsr(const Reg®, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBD); }
|
||||
void pshufb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x00, 0x66, 256, 0x38); }
|
||||
void phaddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x01, 0x66, 256, 0x38); }
|
||||
void phaddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x02, 0x66, 256, 0x38); }
|
||||
void phaddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x03, 0x66, 256, 0x38); }
|
||||
void pmaddubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x04, 0x66, 256, 0x38); }
|
||||
void phsubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x05, 0x66, 256, 0x38); }
|
||||
void phsubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x06, 0x66, 256, 0x38); }
|
||||
void phsubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x07, 0x66, 256, 0x38); }
|
||||
void psignb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x08, 0x66, 256, 0x38); }
|
||||
void psignw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x09, 0x66, 256, 0x38); }
|
||||
void psignd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0A, 0x66, 256, 0x38); }
|
||||
void pmulhrsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0B, 0x66, 256, 0x38); }
|
||||
void pabsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1C, 0x66, 256, 0x38); }
|
||||
void pabsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1D, 0x66, 256, 0x38); }
|
||||
void pabsd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1E, 0x66, 256, 0x38); }
|
||||
void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0f, 0x66, static_cast<uint8>(imm), 0x3a); }
|
||||
void blendvpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void blendvps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void packusdw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2B, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pblendvb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x10, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pcmpeqq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x29, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void ptest(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x17, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovsxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x20, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovsxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x21, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovsxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x22, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovsxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x23, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovsxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x24, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovsxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x25, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovzxbw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x30, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovzxbd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x31, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovzxbq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x32, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovzxwd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x33, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovzxwq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x34, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmovzxdq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x35, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pminsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x38, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pminsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x39, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pminuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3A, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pminud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3B, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmaxsb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3C, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmaxsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3D, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmaxuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3E, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmaxud(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x3F, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmuldq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x28, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pmulld(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void phminposuw(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void pcmpgtq(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x37, 0x66, isXMM_XMMorMEM, 256, 0x38); }
|
||||
void blendpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0D, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void blendps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0C, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void dpps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void mpsadbw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x42, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void pblendw(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0E, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void roundps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x08, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void roundpd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x09, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void roundss(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0A, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void roundsd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x0B, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void pcmpestrm(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x60, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void pcmpestri(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x61, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void pcmpistrm(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x62, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void pcmpistri(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x63, 0x66, isXMM_XMMorMEM, static_cast<uint8>(imm), 0x3A); }
|
||||
void ldmxcsr(const Address& addr) { opModM(addr, Reg32(2), 0x0F, 0xAE); }
|
||||
void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); }
|
||||
void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); }
|
||||
void movntpd(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0x2B); }
|
||||
void movntdq(const Address& addr, const Xmm& reg) { opModM(addr, Reg16(reg.getIdx()), 0x0F, 0xE7); }
|
||||
void movsx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xBE); }
|
||||
void movzx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xB6); }
|
|
@ -0,0 +1,185 @@
|
|||
#ifndef XBYAK_XBYAK_UTIL_H_
|
||||
#define XBYAK_XBYAK_UTIL_H_
|
||||
|
||||
/**
|
||||
utility class for Xbyak
|
||||
@note this header is under construction
|
||||
*/
|
||||
|
||||
#ifdef _WIN32
|
||||
#if (_MSC_VER < 1400) && defined(XBYAK32)
|
||||
static inline __declspec(naked) void __cpuid(int[4], int)
|
||||
{
|
||||
__asm {
|
||||
push ebx
|
||||
push esi
|
||||
mov eax, dword ptr [esp + 4 * 2 + 8] // eaxIn
|
||||
cpuid
|
||||
mov esi, dword ptr [esp + 4 * 2 + 4] // data
|
||||
mov dword ptr [esi], eax
|
||||
mov dword ptr [esi + 4], ebx
|
||||
mov dword ptr [esi + 8], ecx
|
||||
mov dword ptr [esi + 12], edx
|
||||
pop esi
|
||||
pop ebx
|
||||
ret
|
||||
}
|
||||
}
|
||||
#else
|
||||
#include <intrin.h> // for __cpuid
|
||||
#endif
|
||||
#else
|
||||
#if __GNUC_PREREQ(4, 3)
|
||||
#include <cpuid.h>
|
||||
#else
|
||||
#define __cpuid(eaxIn, a, b, c, d) __asm__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace Xbyak { namespace util {
|
||||
|
||||
/**
|
||||
CPU detection class
|
||||
*/
|
||||
class Cpu {
|
||||
unsigned int type_;
|
||||
public:
|
||||
static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
|
||||
{
|
||||
#ifdef _WIN32
|
||||
__cpuid(reinterpret_cast<int*>(data), eaxIn);
|
||||
#else
|
||||
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
|
||||
#endif
|
||||
}
|
||||
enum Type {
|
||||
NONE = 0,
|
||||
tMMX = 1 << 0,
|
||||
tMMX2 = 1 << 1,
|
||||
tCMOV = 1 << 2,
|
||||
tSSE = 1 << 3,
|
||||
tSSE2 = 1 << 4,
|
||||
tSSE3 = 1 << 5,
|
||||
tSSSE3 = 1 << 6,
|
||||
tSSE41 = 1 << 7,
|
||||
tSSE42 = 1 << 8,
|
||||
tPOPCNT = 1 << 9,
|
||||
|
||||
t3DN = 1 << 16,
|
||||
tE3DN = 1 << 17,
|
||||
tSSE4a = 1 << 18,
|
||||
tSSE5 = 1 << 11,
|
||||
|
||||
tINTEL = 1 << 24,
|
||||
tAMD = 1 << 25
|
||||
};
|
||||
Cpu()
|
||||
: type_(NONE)
|
||||
{
|
||||
unsigned int data[4];
|
||||
getCpuid(0, data);
|
||||
static const char intel[] = "ntel";
|
||||
static const char amd[] = "cAMD";
|
||||
if (data[2] == *reinterpret_cast<const unsigned int*>(amd)) {
|
||||
type_ |= tAMD;
|
||||
getCpuid(0x80000001, data);
|
||||
if (data[3] & (1 << 31)) type_ |= t3DN;
|
||||
if (data[3] & (1 << 15)) type_ |= tCMOV;
|
||||
if (data[3] & (1 << 30)) type_ |= tE3DN;
|
||||
if (data[3] & (1 << 22)) type_ |= tMMX2;
|
||||
}
|
||||
if (data[2] == *reinterpret_cast<const unsigned int*>(intel)) {
|
||||
type_ |= tINTEL;
|
||||
}
|
||||
getCpuid(1, data);
|
||||
if (data[2] & (1 << 0)) type_ |= tSSE3;
|
||||
if (data[2] & (1 << 9)) type_ |= tSSSE3;
|
||||
if (data[2] & (1 << 19)) type_ |= tSSE41;
|
||||
if (data[2] & (1 << 20)) type_ |= tSSE42;
|
||||
if (data[2] & (1 << 23)) type_ |= tPOPCNT;
|
||||
|
||||
if (data[3] & (1 << 15)) type_ |= tCMOV;
|
||||
if (data[3] & (1 << 23)) type_ |= tMMX;
|
||||
if (data[3] & (1 << 25)) type_ |= tMMX2 | tSSE;
|
||||
if (data[3] & (1 << 26)) type_ |= tSSE2;
|
||||
}
|
||||
bool has(Type type) const
|
||||
{
|
||||
return (type & type_) != 0;
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef XBYAK32
|
||||
|
||||
namespace local {
|
||||
#ifdef _WIN32
|
||||
#define XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(x) static inline __declspec(naked) void set_eip_to_ ## x() { \
|
||||
__asm { mov x, dword ptr [esp] } __asm { ret } \
|
||||
}
|
||||
#else
|
||||
#define XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(x) static inline void set_eip_to_ ## x() { \
|
||||
__asm__ volatile("movl (%esp), %" #x); \
|
||||
}
|
||||
#endif
|
||||
|
||||
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(eax)
|
||||
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(ecx)
|
||||
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(edx)
|
||||
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(ebx)
|
||||
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(esi)
|
||||
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(edi)
|
||||
XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG(ebp)
|
||||
|
||||
#undef XBYAK_LOCAL_DEFINE_SET_EIP_TO_REG
|
||||
} // end of local
|
||||
|
||||
template<class Gen>
|
||||
struct EnableSetEip : public Gen {
|
||||
EnableSetEip(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0)
|
||||
: Gen(maxSize, userPtr)
|
||||
{
|
||||
}
|
||||
/**
|
||||
get pid to out register
|
||||
@note out = eax or ecx or edx
|
||||
*/
|
||||
void setEipTo(const Xbyak::Reg32& out)
|
||||
{
|
||||
#if 0
|
||||
Gen::call(Gen::getCurr() + 5);
|
||||
Gen::pop(out);
|
||||
#else
|
||||
int idx = out.getIdx();
|
||||
switch (idx) {
|
||||
case Xbyak::Operand::EAX:
|
||||
Gen::call((void*)local::set_eip_to_eax);
|
||||
break;
|
||||
case Xbyak::Operand::ECX:
|
||||
Gen::call((void*)local::set_eip_to_ecx);
|
||||
break;
|
||||
case Xbyak::Operand::EDX:
|
||||
Gen::call((void*)local::set_eip_to_edx);
|
||||
break;
|
||||
case Xbyak::Operand::EBX:
|
||||
Gen::call((void*)local::set_eip_to_ebx);
|
||||
break;
|
||||
case Xbyak::Operand::ESI:
|
||||
Gen::call((void*)local::set_eip_to_esi);
|
||||
break;
|
||||
case Xbyak::Operand::EDI:
|
||||
Gen::call((void*)local::set_eip_to_esi);
|
||||
break;
|
||||
case Xbyak::Operand::EBP:
|
||||
Gen::call((void*)local::set_eip_to_ebp);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
} } // end of util
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue